Analise de expressao
#diretorio de trabalho
#setwd("C:\\Users\\aluno\\Downloads")
#lista o conteudo do diretorio
ls()
# carregando arquivo com os dados
dataMA <- read.csv("C:\\Users\\aluno\\Downloads\\expressionData1.csv", sep = ",", row.names = 1)
#checando se os dados carregaram corretamente
# imprime na tela as 6 primeiras linhas
head(dataMA)
## X12_13_02_U133A_Mer_Latin_Square_Expt6_R1
## 1007_s_at 950.83
## 1053_at 1066.47
## 117_at 105.79
## 121_at 1400.54
## 1255_g_at 44.41
## 1294_at 210.70
## X12_13_02_U133A_Mer_Latin_Square_Expt6_R2
## 1007_s_at 1146.52
## 1053_at 1254.76
## 117_at 155.25
## 121_at 1408.94
## 1255_g_at 44.59
## 1294_at 192.60
## X12_13_02_U133A_Mer_Latin_Square_Expt6_R3
## 1007_s_at 890.40
## 1053_at 792.94
## 117_at 111.40
## 121_at 1245.49
## 1255_g_at 46.01
## 1294_at 178.02
## X12_13_02_U133A_Mer_Latin_Square_Expt7_R1
## 1007_s_at 929.22
## 1053_at 721.11
## 117_at 186.08
## 121_at 1126.09
## 1255_g_at 43.18
## 1294_at 207.37
## X12_13_02_U133A_Mer_Latin_Square_Expt7_R2
## 1007_s_at 986.09
## 1053_at 852.71
## 117_at 82.21
## 121_at 1186.81
## 1255_g_at 33.95
## 1294_at 272.09
## X12_13_02_U133A_Mer_Latin_Square_Expt7_R3
## 1007_s_at 1009.84
## 1053_at 1040.57
## 117_at 94.31
## 121_at 1127.97
## 1255_g_at 46.60
## 1294_at 238.87
# verifica classe da variavel
class(dataMA)
## [1] "data.frame"
# tranasforma meus dados para matriz
dataMA <- as.matrix(dataMA)
# verifica classe da variavel
class(dataMA)
## [1] "matrix"
# dataMA dimensao da matriz
dim(dataMA)
[1] 22300 6
# media da expressao para o gene 1 siatuacao 1 (colunas de 1 a 3)
mean(dataMA[1, 1:3])
[1] 995.9
# mediada expressaopara o gene 1 siatuacao 2 (colunas de 4 a 6) mean(dataMA[1, 4:6])
[1] 975.1
# desvioda expressaopadrao para o gene 1 siatuacao 1 (colunas de 1 a 3) sd(dataMA[1, 1:3])
[1] 133.9
# desvioda expressaopadrao para o gene 1 siatuacao 2 (colunas de 3 a 6) sd(dataMA[1, 4:6])
[1] 41.43
# teste t para diferenca de expressao do gene 1
t.test(dataMA[1, 1:3], dataMA[1, 4:6])
##
## Welch Two Sample t-test
##
## data: dataMA[1, 1:3] and dataMA[1, 4:6]
## t = 0.2579, df = 2.379, p-value = 0.8171
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -279.1 320.9
## sample estimates:
## mean of x mean of y
## 995.9 975.1
mediaA1 <- rowMeans(dataMA[, 1:3])
mediaA2 <- rowMeans(dataMA[, 4:6])
mediaA1[1:6]
## 1007_s_at 1053_at 117_at 121_at 1255_g_at 1294_at
## 995.9 1038.1 124.1 1351.7 45.0 193.8
mediaA2[1:6]
## 1007_s_at 1053_at 117_at 121_at 1255_g_at 1294_at
## 975.05 871.46 120.87 1146.96 41.24 239.44
# grafico scatterplot das medias brutas dos resultados de MicroArray
plot(mediaA1, mediaA2)
# adiciona uma linha vermelha na diagonal do grafico
abline(1, 1, col = "red", lwd = 2)
# histograma dos dados da amostra 1 com os dados divididos em 50 classes
hist(dataMA[, 1:3], nclass = 50, col = "lightblue")
# histograma dos dados da amostra 1 com os dados divididos em 50 classes
hist(dataMA[, 4:6], nclass = 50, col = "lightblue")
# media da expressao
A <- (mediaA1 + mediaA2) * 1/2
# fold change
M <- mediaA1/mediaA2
# scatterplot da media vs fold change, fold change não transformada
plot(A, M, main = "Grafico de fold change", xlab = "A", ylab = "M")
# exemplo de dados não transformados
A1 <- c(1:50)
A2 <- c(50:1)
A1
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
## [24] 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
## [47] 47 48 49 50
A2
## [1] 50 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32 31 30 29 28
## [24] 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5
## [47] 4 3 2 1
A_teste <- c(1:50)
M_teste <- (A1/A2)
# comportamento da curva de fold change antes do tratamento log2
plot(A_teste, M_teste, main = "Grafico de teste\ndo fold change", xlab = "A",
ylab = "M")
# comportamento da curva de fold change depois do tratamento log2
plot(A_teste, log2(M_teste), main = "Gr?fico de teste\ndo fold change", xlab = "A",
ylab = "M")
hist(log2(dataMA[, 1:3]), nclass = 50, col = "lightblue")
hist(log2(dataMA[, 4:6]), nclass = 50, col = "lightblue")
mediaA1 <- rowMeans(log2(dataMA[, 1:3]))
mediaA2 <- rowMeans(log2(dataMA[, 4:6]))
# grafico scatterplot das medias log2 dos resultados de MicroArray
plot(mediaA1, mediaA2)
# adiciona uma linha vermelha na diagonal do grafico
abline(1, 1, col = "red", lwd = 2)
# scatterplot da media vs log2(fold change)
plot(log2(A), log2(M), main = "Grafico de fold change", xlab = "A", ylab = "M")
abline(h = 0, col = "red", lwd = 3, lty = 2)
abline(h = 1, col = "red", lwd = 3, lty = 2)
abline(h = -1, col = "red", lwd = 3, lty = 2)
dataMAlog2 <- log2(dataMA)
head(dataMAlog2)
## X12_13_02_U133A_Mer_Latin_Square_Expt6_R1
## 1007_s_at 9.893
## 1053_at 10.059
## 117_at 6.725
## 121_at 10.452
## 1255_g_at 5.473
## 1294_at 7.719
## X12_13_02_U133A_Mer_Latin_Square_Expt6_R2
## 1007_s_at 10.163
## 1053_at 10.293
## 117_at 7.278
## 121_at 10.460
## 1255_g_at 5.479
## 1294_at 7.589
## X12_13_02_U133A_Mer_Latin_Square_Expt6_R3
## 1007_s_at 9.798
## 1053_at 9.631
## 117_at 6.800
## 121_at 10.283
## 1255_g_at 5.524
## 1294_at 7.476
## X12_13_02_U133A_Mer_Latin_Square_Expt7_R1
## 1007_s_at 9.860
## 1053_at 9.494
## 117_at 7.540
## 121_at 10.137
## 1255_g_at 5.432
## 1294_at 7.696
## X12_13_02_U133A_Mer_Latin_Square_Expt7_R2
## 1007_s_at 9.946
## 1053_at 9.736
## 117_at 6.361
## 121_at 10.213
## 1255_g_at 5.085
## 1294_at 8.088
## X12_13_02_U133A_Mer_Latin_Square_Expt7_R3
## 1007_s_at 9.980
## 1053_at 10.023
## 117_at 6.559
## 121_at 10.140
## 1255_g_at 5.542
## 1294_at 7.900
#funcao para realizar o test-t
#comparacao das amostras 1, 2 e 3 contra 4, 5 e 6
ttest_matrix <- function(matriz, linha) {
resultado <- t.test(matriz[linha, 1:3], matriz[linha, 4:6])
return(resultado$p.value)
}
valorP <- NULL
for (i in 1:22300) {
valorP <- c(valorP, ttest_matrix(dataMAlog2, i))
}
head(valorP)
## [1] 0.8569 0.3831 0.7964 0.0398 0.4206 0.1007
# volcano plot, comapra??o do valor p vs o fold change
plot(log2(M), -log2(valorP))
abline(v = 1, col = "red", lwd = 3)
abline(v = -1, col = "red", lwd = 3)
abline(h = 4.32, col = "blue", lwd = 3)
# abline(h=1.0, col = 'magenta', lwd=3) abline(h=13.0, col = 'darkgreen',
# lwd=3)
-log2(1e-04)
## [1] 13.29
head(valorP)
## [1] 0.8569 0.3831 0.7964 0.0398 0.4206 0.1007
head(dataMAlog2)
## X12_13_02_U133A_Mer_Latin_Square_Expt6_R1
## 1007_s_at 9.893
## 1053_at 10.059
## 117_at 6.725
## 121_at 10.452
## 1255_g_at 5.473
## 1294_at 7.719
## X12_13_02_U133A_Mer_Latin_Square_Expt6_R2
## 1007_s_at 10.163
## 1053_at 10.293
## 117_at 7.278
## 121_at 10.460
## 1255_g_at 5.479
## 1294_at 7.589
## X12_13_02_U133A_Mer_Latin_Square_Expt6_R3
## 1007_s_at 9.798
## 1053_at 9.631
## 117_at 6.800
## 121_at 10.283
## 1255_g_at 5.524
## 1294_at 7.476
## X12_13_02_U133A_Mer_Latin_Square_Expt7_R1
## 1007_s_at 9.860
## 1053_at 9.494
## 117_at 7.540
## 121_at 10.137
## 1255_g_at 5.432
## 1294_at 7.696
## X12_13_02_U133A_Mer_Latin_Square_Expt7_R2
## 1007_s_at 9.946
## 1053_at 9.736
## 117_at 6.361
## 121_at 10.213
## 1255_g_at 5.085
## 1294_at 8.088
## X12_13_02_U133A_Mer_Latin_Square_Expt7_R3
## 1007_s_at 9.980
## 1053_at 10.023
## 117_at 6.559
## 121_at 10.140
## 1255_g_at 5.542
## 1294_at 7.900
dim(dataMAlog2)
## [1] 22300 6
length(valorP)
## [1] 22300
dataMAlog2_pv <- cbind(dataMAlog2, valorP)
head(dataMAlog2_pv)
## X12_13_02_U133A_Mer_Latin_Square_Expt6_R1
## 1007_s_at 9.893
## 1053_at 10.059
## 117_at 6.725
## 121_at 10.452
## 1255_g_at 5.473
## 1294_at 7.719
## X12_13_02_U133A_Mer_Latin_Square_Expt6_R2
## 1007_s_at 10.163
## 1053_at 10.293
## 117_at 7.278
## 121_at 10.460
## 1255_g_at 5.479
## 1294_at 7.589
## X12_13_02_U133A_Mer_Latin_Square_Expt6_R3
## 1007_s_at 9.798
## 1053_at 9.631
## 117_at 6.800
## 121_at 10.283
## 1255_g_at 5.524
## 1294_at 7.476
## X12_13_02_U133A_Mer_Latin_Square_Expt7_R1
## 1007_s_at 9.860
## 1053_at 9.494
## 117_at 7.540
## 121_at 10.137
## 1255_g_at 5.432
## 1294_at 7.696
## X12_13_02_U133A_Mer_Latin_Square_Expt7_R2
## 1007_s_at 9.946
## 1053_at 9.736
## 117_at 6.361
## 121_at 10.213
## 1255_g_at 5.085
## 1294_at 8.088
## X12_13_02_U133A_Mer_Latin_Square_Expt7_R3 valorP
## 1007_s_at 9.980 0.8569
## 1053_at 10.023 0.3831
## 117_at 6.559 0.7964
## 121_at 10.140 0.0398
## 1255_g_at 5.542 0.4206
## 1294_at 7.900 0.1007
valorP_sig <- p.adjust(dataMAlog2_pv[, 7], method = "holm") <= 0.05 #bonferroni, hochberg
head(valorP_sig)
## 1007_s_at 1053_at 117_at 121_at 1255_g_at 1294_at
## FALSE FALSE FALSE FALSE FALSE FALSE
dataMAlog2_pv_sig <- dataMAlog2_pv[valorP_sig, ]
dim(dataMAlog2_pv_sig)
## [1] 0 7
dataMAlog2_pv_sig
## X12_13_02_U133A_Mer_Latin_Square_Expt6_R1
## X12_13_02_U133A_Mer_Latin_Square_Expt6_R2
## X12_13_02_U133A_Mer_Latin_Square_Expt6_R3
## X12_13_02_U133A_Mer_Latin_Square_Expt7_R1
## X12_13_02_U133A_Mer_Latin_Square_Expt7_R2
## X12_13_02_U133A_Mer_Latin_Square_Expt7_R3 valorP