#loading iris data
data(iris)
#Valor p Ajustado
rnorm(n=1,mean=0, sd = 1)
## [1] 0.9629
#creating a function
testeLeandro <- function(){
amostra1 <- rnorm(n=7,mean=0, sd = 1)
amostra2 <- rnorm(n=7,mean=0, sd = 1)
resultado <- t.test(amostra1, amostra2)
return(resultado$p.value)
}
valoresP <- replicate(n=50, expr= testeLeandro())
valoresP_sorted <- sort(valoresP, decreasing=F)
plot(valoresP_sorted, type="l", lwd= 2)
abline(h=0.05, lty=2)
table(valoresP < 0.05)
##
## FALSE TRUE
## 47 3
valoresP_sorted_bonf <- p.adjust(valoresP_sorted,"bonferroni")
valoresP_sorted_BH <- p.adjust(valoresP_sorted,"BH")
lines(valoresP_sorted_bonf, col= "red", lwd =2)
lines(valoresP_sorted_BH, col = "blue", lwd = 2)
ANOVA
#ANOVA - Assunption of data normal distributed
#h0: our data is normal distributed
#h1: our data is not normal distributed
normal.test.Sepal.Width <- shapiro.test(iris$Sepal.Width)
#The data are normal distributed?
normal.test.Sepal.Width$p.value > 0.05
## [1] TRUE
#we're going to use the columms grupos and valores from matrix dados
ANOVA.Sepal.Width <- aov(Sepal.Width ~ Species, iris)
#There is difference among the groups?
#Pay atention to the Pr(>F)
summary(ANOVA.Sepal.Width)
## Df Sum Sq Mean Sq F value Pr(>F)
## Species 2 11.3 5.67 49.2 <2e-16 ***
## Residuals 147 17.0 0.12
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Which groups have significant difference
#Between the groups
TukeyHSD(ANOVA.Sepal.Width)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Sepal.Width ~ Species, data = iris)
##
## $Species
## diff lwr upr p adj
## versicolor-setosa -0.658 -0.81886 -0.4971 0.0000
## virginica-setosa -0.454 -0.61486 -0.2931 0.0000
## virginica-versicolor 0.204 0.04314 0.3649 0.0088
#ANOVA - Assunption of data normal distributed
#h0: our data is normal distributed
#h1: our data is not normal distributed
normal.test.Sepal.Length <- shapiro.test(iris$Sepal.Length)
normal.test.Petal.Length <- shapiro.test(iris$Petal.Length)
normal.test.Petal.Width <- shapiro.test(iris$Petal.Width)
#The data are normal distributed?
normal.test.Sepal.Length$p.value > 0.05
## [1] FALSE
normal.test.Petal.Length$p.value > 0.05
## [1] FALSE
normal.test.Petal.Width$p.value > 0.05
## [1] FALSE
#If your data are not normal distributed
#you better use a non-parametric Analysis of Variance
#One non-parametric test is the Kruskal-Wallis Rank test
#This test is based on the ranked values
kruskal.test.result.Sepal.Length <- kruskal.test(Sepal.Length ~ Species, iris)
kruskal.test.result.Petal.Length <- kruskal.test(Petal.Length ~ Species, iris)
kruskal.test.result.Petal.Width <- kruskal.test(Petal.Width ~ Species, iris)
kruskal.test.result.Sepal.Length$p.value < 0.05
## [1] TRUE
kruskal.test.result.Petal.Length$p.value < 0.05
## [1] TRUE
kruskal.test.result.Petal.Width$p.value < 0.05
## [1] TRUE
