» Multiple test correction and ANOVA GeneBio – Genética e Bioinformática

#loading iris data

data(iris)

#Valor p Ajustado

rnorm(n=1,mean=0, sd = 1)

## [1] 0.9629

#creating a function
testeLeandro <- function(){
amostra1 <- rnorm(n=7,mean=0, sd = 1)
amostra2 <- rnorm(n=7,mean=0, sd = 1)
resultado <- t.test(amostra1, amostra2)
return(resultado$p.value)
}

valoresP <- replicate(n=50, expr= testeLeandro())
valoresP_sorted <- sort(valoresP, decreasing=F)
plot(valoresP_sorted, type="l", lwd= 2)
abline(h=0.05, lty=2)
table(valoresP < 0.05)

## 
## FALSE  TRUE 
##    47     3

valoresP_sorted_bonf <- p.adjust(valoresP_sorted,"bonferroni")
valoresP_sorted_BH <- p.adjust(valoresP_sorted,"BH")
lines(valoresP_sorted_bonf, col= "red", lwd =2)
lines(valoresP_sorted_BH, col = "blue", lwd = 2)

ANOVA

#ANOVA - Assunption of data normal distributed
#h0: our data is normal distributed
#h1: our data is not normal distributed
normal.test.Sepal.Width <- shapiro.test(iris$Sepal.Width)

#The data are normal distributed?
normal.test.Sepal.Width$p.value > 0.05

## [1] TRUE

#we're going to use the columms grupos and valores from matrix dados
ANOVA.Sepal.Width <- aov(Sepal.Width ~ Species, iris)

#There is difference among the groups?
#Pay atention to the Pr(>F)
summary(ANOVA.Sepal.Width)

##              Df Sum Sq Mean Sq F value Pr(>F)    
## Species       2   11.3    5.67    49.2 <2e-16 ***
## Residuals   147   17.0    0.12                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Which groups have significant difference
#Between the groups
TukeyHSD(ANOVA.Sepal.Width)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Sepal.Width ~ Species, data = iris)
## 
## $Species
##                        diff      lwr     upr  p adj
## versicolor-setosa    -0.658 -0.81886 -0.4971 0.0000
## virginica-setosa     -0.454 -0.61486 -0.2931 0.0000
## virginica-versicolor  0.204  0.04314  0.3649 0.0088


#ANOVA - Assunption of data normal distributed
#h0: our data is normal distributed
#h1: our data is not normal distributed
normal.test.Sepal.Length <- shapiro.test(iris$Sepal.Length)
normal.test.Petal.Length <- shapiro.test(iris$Petal.Length)
normal.test.Petal.Width <- shapiro.test(iris$Petal.Width)
#The data are normal distributed?
normal.test.Sepal.Length$p.value > 0.05

## [1] FALSE

normal.test.Petal.Length$p.value > 0.05

## [1] FALSE

normal.test.Petal.Width$p.value > 0.05

## [1] FALSE

#If your data are not normal distributed 
#you better use a non-parametric Analysis of Variance
#One non-parametric test is the Kruskal-Wallis Rank test
#This test is based on the ranked values
kruskal.test.result.Sepal.Length <- kruskal.test(Sepal.Length ~ Species, iris)
kruskal.test.result.Petal.Length <- kruskal.test(Petal.Length ~ Species, iris)
kruskal.test.result.Petal.Width <- kruskal.test(Petal.Width ~ Species, iris)

kruskal.test.result.Sepal.Length$p.value < 0.05

## [1] TRUE

kruskal.test.result.Petal.Length$p.value < 0.05

## [1] TRUE

kruskal.test.result.Petal.Width$p.value < 0.05

## [1] TRUE

M	T	W	T	F	S	S

1	2	3	4	5	6	7
8	9	10	11	12	13	14
15	16	17	18	19	20	21
22	23	24	25	26	27	28
29	30

GeneBio – Genética e Bioinformática

Multiple test correction and ANOVA

Post a Comment Cancel reply