
### Sampaio et al 2017 ###
  ## Biogeosciences ##

# This script requires the prior creation of .csv files 
# for every variable present in the Dataset

# Packages required

library(nlme)
library(lme4)
library(ggplot2)
library(lmerTest)

#### FULTON'S K ####
# Import data
Rfulton <- read.csv("Rfulton.csv")

str(Rfulton)
Rfulton$Tank<-factor(Rfulton$Tank)
summary(Rfulton)

#DATA EXPLORATION
# A. Outliers
# B. Outliers in the X
# C. Collinearity
# D. Conditional boxplot
# E. Interactions
# F. Zero inflation
# G. Balance

# A and B. Outliers
par(mfrow = c(1, 2))
boxplot(Rfulton$Fulton, 
        main = "Fulton")
dotchart(Rfulton$Fulton, 
         xlab = "Range of data", 
         ylab = "Order of the data")
#seems okay

# C. Collinearity (not really important here)

# D. Conditional boxplots
par(mfrow = c(1, 1))
boxplot(Fulton ~ factor(Tank), 
        data = Rfulton,
        ylab = "Fulton Index",
        xlab = "Tanks",
        cex.lab = 1.5)

# Not very bad it seems.. but we will add random effects (Tank) regardless

# E. Interactions
# Every covariate is categorical

# F. Zero inflation
#We won't have it here

# G. Balance
table(Rfulton$Temp)
table(Rfulton$pH)
table(Rfulton$Cont)
table(Rfulton$Tank)#it is okay

#MODELLING 
modelfulton<-glm(Fulton~Temp*pH*Cont,data=Rfulton)
summary(modelfulton)
stepAIC(modelfulton)
modelfulton1<-glm(Fulton~Cont, data=Rfulton)
summary(modelfulton1)

modf1<- lmer(Fulton~Temp*pH*Cont  + (1|Tank), data = Rfulton, REML=FALSE)
summary(modf1)
#VarExplained<-(0.0000/(0.0000+0.03805))*100
#VarExplained #Random effect only explains 0% of residual variance (Not significant)
step(modf1)
modf2<-lmer(Fulton~Cont + (1|Tank), data=Rfulton, REML=FALSE)
summary(modf2)
AIC(modelfulton,modelfulton1,modf1,modf2)
#            df       AIC
#modelfulton   9 -6.567653
#modelfulton1  7 -7.956932
#modf1        10 -4.567653
#modf2         8 -5.956932

#modfulton1 is the best model!

summary(modelfulton1)
#Coefficients:
#Estimate Std. Error t value Pr(>|t|)    
#(Intercept)  1.60172    0.04097  39.097   <2e-16 ***
#ContM       -0.07229    0.05744  -1.259    0.213    
#
#Not significant

#MODEL VALIDATION
#1. Homogeneity
#2. Independence
#3. Influential observations
#4. Normality

M1<- glm(Fulton~Cont,data=Rfulton) #AIC: -8.5685
par(mfrow = c(2, 2))
plot(M1)

#1. Homogeneity
E1 <- resid(M1)   #or better: 
E1 <- rstandard(M1)
F1 <- fitted(M1)

plot(x = F1, 
     y = E1, 
     xlab = "Fitted values",
     ylab = "Residuals", 
     main = "Homogeneity?")
abline(h = 0, v = 0, lty = 2)
abline(v = 0, lwd = 2, col = 2)
# Okay

# 2. Dependence due to model misfit
#Plot residuals versus covariates
plot(x = Rfulton$Temp, 
     y = E1)
abline(h = 0, lty = 2)

plot(x = Rfulton$pH, 
     y = E1)
abline(h = 0, lty = 2)

plot(x = Rfulton$Tank, 
     y = E1)
abline(h = 0, lty = 2)
# Tank is taken care of, the rest seems okay

# 3. Look at influential observations
par(mfrow = c(1, 1))
plot(cooks.distance(M1), type = "h", ylim = c(0, 1))
abline(h = 1, col = 2,lwd = 3)
# Perfect

########################################################################

#### HgT quantification ####

# Import data
mercury <- read.csv("mercury.csv")

str(mercury)
mercury$Tank<-factor(mercury$Tank)
summary(mercury)

# DATA EXPLORATION
# A. Outliers
# B. Outliers in the X
# C. Collinearity
# D. Conditional boxplot
# E. Interactions
# F. Zero inflation
# G. Balance

# A and B. Outliers
par(mfrow = c(1, 2))
boxplot(mercury$Merc, 
        main = "Merc")
dotchart(mercury$Merc, 
         xlab = "Range of data", 
         ylab = "Order of the data")
#seems okay, but obviously controls are much lower

# C. Collinearity (not really important here)

# D. Conditional boxplots
par(mfrow = c(1, 1))
boxplot(SOD ~ factor(Tank), 
        data = Enz,
        ylab = "SOD concentration",
        xlab = "Tanks",
        cex.lab = 1.5)

# Not very bad it seems.. but we will add random effects (Tank) regardless

# E. Interactions
# Every covariate is categorical

# F. Zero inflation
#We won't have it here

# G. Balance
table(mercury$Temp)
table(mercury$pH)
table(mercury$Cont) #perfect

# We need to subset contaminated treatments to check for preferential
# MeHg accumulation and effects of pH and Temp within each organ
# Subsets
merc_M <- subset(mercury, mercury$Cont == "M")

# Outliers
par(mfrow = c(1, 2))
boxplot(merc_M$Merc, 
        main = "Merc")
dotchart(merc_M$Merc, 
         xlab = "Range of data", 
         ylab = "Order of the data")
#seems okay

par(mfrow = c(2, 2))
boxplot(Merc ~ factor(tissue), 
        data = merc_M,
        ylab = "Merc concentration",
        xlab = "Organs",
        cex.lab = 1.5)

boxplot(Merc  ~ factor(Temp), 
        data = merc_M,
        ylab = "Merc concentration",
        xlab = "Temperature",
        cex.lab = 1.5)

boxplot(Merc  ~ factor(pH), 
        data = merc_M,
        ylab = "Merc concentration",
        xlab = "pH",
        cex.lab = 1.5)

boxplot(Merc  ~ factor(Tank), 
        data = merc_M,
        ylab = "Merc  concentration",
        xlab = "Tanks",
        cex.lab = 1.5)

# G. Balance
table(merc_M$Temp)
table(merc_M$pH)
table(merc_M$tissue) #perfect


#MODELLING 

# We just need the difference between non-contamination and contamination for reference
mC<- glm(Merc ~  Cont  , data = mercury)
summary(mC)

# t value = 9.079
# p value = 6.81e-14 ***

# Back to the analysis of the subsets
M1<- glm(Merc~ Temp*pH*tissue ,data = merc_M)
summary(M1) #AIC: 266.27
stepAIC(M1)

#Including Tank as random effect
mod1<- lmer(Merc ~  Temp*pH*tissue  + (1|Tank), data = merc_M, REML=FALSE)
summary(mod1)
VarExplained<-(0.1496/(0.1496+3.8077))*100
VarExplained #Random effect only explains 3.78% of residual variance (Not significant)

AIC(M1,mod1)
#     df      AIC
#M1   13 266.2662
#mod1 14 267.6227

#M1 is the best model!

#MODEL VALIDATION
#1. Homogeneity
#2. Independence
#3. Influential observations
#4. Normality

M1<- glm(Merc~ Temp*pH*tissue ,data = merc_M)
par(mfrow = c(2, 2))
plot(M1)

#1. Homogeneity
E1 <- resid(M1)   #or better: 
E1 <- rstandard(M1)
F1 <- fitted(M1)

plot(x = F1, 
     y = E1, 
     xlab = "Fitted values",
     ylab = "Residuals", 
     main = "Homogeneity?")
abline(h = 0, v = 0, lty = 2)
abline(v = 0, lwd = 2, col = 2)
# NOT GOOD! We have to go for Gamma

# MODELLING
M1<- glm(Merc~ tissue*Temp*pH, family=Gamma(link=log),data = merc_M)
summary(M1) #AIC 218.61
par(mfrow = c(2, 2))
plot(M1)

#MODEL VALIDATION
#1. Homogeneity
#2. Independence
#3. Influential observations

#1. Homogeneity
E1 <- resid(M1)   #or better: 
E1 <- rstandard(M1)
F1 <- fitted(M1)

plot(x = F1, 
     y = E1, 
     xlab = "Fitted values",
     ylab = "Residuals", 
     main = "Homogeneity?")
abline(h = 0, v = 0, lty = 2)
abline(v = 0, lwd = 2, col = 2)
#Much better!

# 2. Dependence due to model misfit
#Plot residuals versus covariates
plot(x = merc_M$Tank, 
     y = E1)
abline(h = 0, lty = 2)
# Tank is taken care of

# 3. Look at influential observations
par(mfrow = c(1, 1))
plot(cooks.distance(M1), type = "h", ylim = c(0, 1))
abline(h = 1, col = 2,lwd = 3)
# Perfect!!

#However, we will break the model, in order to better explain the triple interaction:
# - 1(M1a) model for preferential HgT accumulation in organ tissue
# - 3 (M1b, M1c and M1d) models (1 for each organ) describing 
# pH and Temp effects within organs

# HgT accumulation in the organs
M1a<- glm(Merc~ tissue , family= Gamma(link=log),data = merc_M)
summary(M1a) 

merc_M$tissue<-relevel(merc_M$tissue,"gills")
M1a<- glm(Merc~ tissue, family=Gamma(link=log), data = merc_M)
summary(M1a) #AIC: 270.26

# M1a. model HgT per tissue:
# Coefficients:
#Estimate Std. Error t value Pr(>|t|)    
#(Intercept)  1.57632    0.08249  19.109  < 2e-16 ***
#muscle-gills  0.46956    0.12810   3.665 0.000565 ***
#muscle-liver  0.66031    0.13043   5.063 5.13e-06 ***   
#gills-liver  0.19075    0.14076   1.355 0.181006    


anova(M1a, test="F") 
#F value = 14.015 
#p value = 1.252e-05

#Further subsetting:
merc_musc<-subset(merc_M,tissue=="musc")
merc_gill<-subset(merc_M,tissue=="gills")
merc_liver<-subset(merc_M,tissue=="liver")

# HgT in the muscle
M1b<- glm(Merc~ Temp*pH, family=Gamma(link=log), data = merc_musc)
summary(M1b) #AIC: 59.754

#Coefficients:
#Estimate Std. Error t value Pr(>|t|)    
#(Intercept)    1.52002    0.06252  24.311 2.53e-16 ***
#Temp23C       -0.04818    0.08842  -0.545   0.5919    
#pHpH8         -0.20051    0.08842  -2.268   0.0346 *  
#Temp23C:pHpH8  0.62740    0.12505   5.017 6.61e-05 ***

# HgT in the gills
M1c<- glm(Merc~ Temp*pH, family=Gamma(link=log), data = merc_gill)
summary(M1c) #AIC: 73.298

#Coefficients:
#Estimate Std. Error t value Pr(>|t|)    
#(Intercept)     2.0959     0.1252  16.736 3.55e-10 ***
#Temp23C        -0.9167     0.1913  -4.792 0.000352 ***
#pHpH8          -0.1569     0.1617  -0.970 0.349595    
#Temp23C:pHpH8   1.4523     0.2505   5.799 6.19e-05 ***

#Hgt in the liver
M1d<- glm(Merc~ Temp*pH, family=Gamma(link=log), data = merc_liver)
summary(M1d) #AIC: 81.98

#Coefficients:
#Estimate Std. Error t value Pr(>|t|)    
#(Intercept)     2.2869     0.1507  15.178  3.4e-09 ***
#Temp23C        -0.7943     0.2610  -3.043 0.010208 *  
#pHpH8          -0.2945     0.1945  -1.514 0.155920    
#Temp23C:pHpH8   1.4675     0.3255   4.508 0.000716 ***


# PLOT THE MODELS

# Figure 1a
pH8 <- expression("400?atm")
pH7.5 <- expression("1500?atm")
M<- expression("Contaminated")
C<- expression("Non-contaminated")

meanMerc<-aggregate(mercury$Merc,c(list(mercury$Cont),list(mercury$tissue)),mean)
meanMerc$se<- aggregate(mercury$Merc,c(list(mercury$Cont),list(mercury$tissue)), function(x) sd(x)/sqrt(16))[,3]

require(ggplot2)
library(ggplot2)

ylab<- expression("HgT concentration (" ~ mg~ kg ^{-1}~")")
ylab
musc<-expression("Muscle")
gills<-expression("Gills")
liver<-expression("Liver")

theme_set(theme_classic(base_size = 22))
jpeg(paste("Merc.jpg",sep=""), width=20, height=20, units="cm", res=500)
gp_p <- ggplot(meanMerc,aes(x=as.factor(Group.1), y=x, group=as.factor(Group.2))) + geom_line(aes(linetype=as.factor(Group.2)),size=.6) +
  geom_point(aes(shape=as.factor(Group.2)),size=4) + geom_errorbar(aes(ymax=x+se, ymin=x-se), width=.1) +
  xlab("MeHg") + ylab(ylab) + labs(title="", shape="", linetype="") + theme_set(theme_classic(base_size = 22)) +
  scale_linetype_discrete(labels=c(musc,gills,liver))+ scale_shape_discrete(labels=c(musc,gills,liver)) + scale_color_discrete(labels=c(musc,gills,liver)) +
  scale_x_discrete(breaks = c("C","M"), labels=c("Non-contaminated","Contaminated")) + scale_y_continuous(limits = c(0, 15))
gp_p
dev.off()

# Figure 1b (muscle)
merc_musc$pH<-relevel(merc_musc$pH,"pH8")

meanMerc<-aggregate(merc_musc$Merc,c(list(merc_musc$pH),list(merc_musc$Temp)),mean)
meanMerc$se<- aggregate(merc_musc$Merc,c(list(merc_musc$pH),list(merc_musc$Temp)), function(x) sd(x)/sqrt(6))[,3]

require(ggplot2)
library(ggplot2)

ylab<- expression("HgT concentration (" ~ mg~ kg ^{-1} ~")")
xlab<-expression("pCO"[2])
ylab

jpeg(paste("Merc Musc.jpg",sep=""), width=20, height=20, units="cm", res=500)
gp_p1 <- ggplot(meanMerc,aes(x=as.factor(Group.1), y=x, group=as.factor(Group.2))) + geom_line(aes(linetype=as.factor(Group.2)),size=.6) +
  geom_point(aes(shape=as.factor(Group.2)),size=4) + geom_errorbar(aes(ymax=x+se, ymin=x-se), width=.1) +
  xlab(xlab) + ylab(ylab) + labs(title="", shape="", linetype="") + theme_set(theme_classic(base_size = 22)) +
  scale_linetype_discrete(labels=c("19 ?C","23 ?C")) + scale_shape_discrete(labels=c("19 ?C","23 ?C")) + 
  scale_y_continuous(limits = c(0, 15)) + scale_x_discrete(breaks = c("pH8", "pH7.5"), labels=c(pH8, pH7.5))
gp_p1
dev.off()

# Figure 1c (gills)
merc_gill$pH<-relevel(merc_gill$pH,"pH8")

meanMerc<-aggregate(merc_gill$Merc,c(list(merc_gill$pH),list(merc_gill$Temp)),mean)
meanMerc$se<- aggregate(merc_gill$Merc,c(list(merc_gill$pH),list(merc_gill$Temp)), function(x) sd(x)/sqrt(4))[,3]

require(ggplot2)
library(ggplot2)

ylab<- expression("HgT concentration (" ~ mg~ kg ^{-1} ~ ")")
xlab<-expression("pCO"[2])
ylab

jpeg(paste("Merc Gills.jpg",sep=""), width=20, height=20, units="cm", res=500)
gp_p2 <- ggplot(meanMerc,aes(x=as.factor(Group.1), y=x, group=as.factor(Group.2))) + geom_line(aes(linetype=as.factor(Group.2)),size=.6) +
  geom_point(aes(shape=as.factor(Group.2)),size=4) + geom_errorbar(aes(ymax=x+se, ymin=x-se), width=.1) +
  xlab(xlab) + ylab(ylab) + labs(title="", shape="", linetype="") + theme_set(theme_classic(base_size = 22)) +
  scale_linetype_discrete(labels=c("19 ?C","23 ?C")) + scale_shape_discrete(labels=c("19 ?C","23 ?C")) + 
  scale_y_continuous(limits = c(0, 18)) + scale_x_discrete(breaks = c("pH8", "pH7.5"), labels=c(pH8, pH7.5))
gp_p2
dev.off()

# Figure 1d (liver)
merc_liver$pH<-relevel(merc_liver$pH,"pH8")

meanMerc<-aggregate(merc_liver$Merc,c(list(merc_liver$pH),list(merc_liver$Temp)),mean)
meanMerc$se<- aggregate(merc_liver$Merc,c(list(merc_liver$pH),list(merc_liver$Temp)), function(x) sd(x)/sqrt(4))[,3]

require(ggplot2)
library(ggplot2)

ylab<- expression("HgT concentration (" ~ mg~ kg ^{-1}~")")
xlab<-expression("pCO"[2])
xlab

jpeg(paste("Merc Liver.jpg",sep=""), width=20, height=20, units="cm", res=500)
gp_p3 <- ggplot(meanMerc,aes(x=as.factor(Group.1), y=x, group=as.factor(Group.2))) + geom_line(aes(linetype=as.factor(Group.2)),size=.6) +
  geom_point(aes(shape=as.factor(Group.2)),size=4) + geom_errorbar(aes(ymax=x+se, ymin=x-se), width=.1) +
  xlab(xlab) + ylab(ylab) + labs(title="", shape="", linetype="") + theme_set(theme_classic(base_size = 22)) +
  scale_linetype_discrete(labels=c("19 ?C","23 ?C")) + scale_shape_discrete(labels=c("19 ?C","23 ?C")) + 
  scale_y_continuous(limits = c(0, 18)) + scale_x_discrete(breaks = c("pH8", "pH7.5"), labels=c(pH8, pH7.5))
gp_p3
dev.off()

#JOIN INTO FIGURE 1


multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)
  
  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)
  
  numPlots = length(plots)
  
  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  
  if (numPlots==1) {
    print(plots[[1]])
    
  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    
    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

jpeg(paste("Merc Aggregate.jpg",sep=""), width=40, height=40, units="cm", res=500)
multiplot(gp_p, gp_p2, gp_p1, gp_p3, cols=2)
dev.off()

#########################################################################

#### MDA ####

# Import data
Enz <- read.csv(file = "enzimas.csv", 
                header = TRUE, sep=";")

# DATA EXPLORATION
# A. Outliers
# B. Outliers in the X
# C. Collinearity
# D. Conditional boxplot
# E. Interactions
# F. Zero inflation
# G. Balance
str(Enz)
Enz$Tank<-factor(Enz$Tank)
summary(Enz)

# A. Outliers
par(mfrow = c(2, 2))
boxplot(Enz$MDA, 
        main = "MDA")
dotchart(Enz$MDA, 
         xlab = "Range of data", 
         ylab = "Order of the data")

boxplot(Enz$GST, 
        main = "GST")
dotchart(Enz$GST, 
         xlab = "Range of data", 
         ylab = "Order of the data")

boxplot(Enz$CAT, 
        main = "CAT")
dotchart(Enz$CAT, 
         xlab = "Range of data", 
         ylab = "Order of the data")

boxplot(Enz$SOD, 
        main = "SOD")
dotchart(Enz$SOD, 
         xlab = "Range of data", 
         ylab = "Order of the data")

# No clear outliers

# B. Outliers in the X
par(mfrow = c(2, 2), mar = c(4, 3, 3, 2))
dotchart(Enz$MDA, main = "MDA")
dotchart(Enz$GST, main = "GST")
dotchart(Enz$CAT, main = "CAT")
dotchart(Enz$SOD, main = "SOD")
# No clear outliers

# C. Collinearity (not really important here)

# D. Conditional boxplots
par(mfrow = c(2, 2), mar = c(4, 3, 3, 2))
boxplot(MDA ~ factor(Tank), 
        data = Enz,
        ylab = "MDA concentration",
        xlab = "Tanks",
        cex.lab = 1.5)

boxplot(GST ~ factor(Tank), 
        data = Enz,
        ylab = "GST concentration",
        xlab = "Tanks",
        cex.lab = 1.5)

boxplot(CAT ~ factor(Tank), 
        data = Enz,
        ylab = "CAT concentration",
        xlab = "Tanks",
        cex.lab = 1.5)

boxplot(SOD ~ factor(Tank), 
        data = Enz,
        ylab = "SOD concentration",
        xlab = "Tanks",
        cex.lab = 1.5)
# Not very bad it seems.. but we will add random effects (Tank) regardless

# E. Interactions
# Every covariate is categorical

# F. Zero inflation
#We won't have it here

# G. Balance
table(Enz$Temp)
table(Enz$pH)
table(Enz$Cont) #perfect

# MODELLING


# Frequentist approach to MDA Model 
M1<- glm(MDA ~ Temp*pH*Cont ,data = Enz)
summary(M1) #AIC: -273.31
stepAIC(M1)
M1a<-glm(MDA ~ Temp*Cont ,data = Enz)
summary(M1a) #AIC: -277.22

# Mixed modelling with tank as random effect #

mod1<- lmer(MDA ~ Temp*pH*Cont + (1|Tank), data = Enz,REML=FALSE)
summary(mod1)
VarExplained<-(0.000000/(0.000000+0.0001259))*100
VarExplained #Random effect explains 0% of residual variance
mod1a<- lmer(MDA ~ Temp*Cont + (1|Tank),data = Enz,REML=FALSE)
summary(mod1a)
AIC(M1,M1a,mod1,mod1a)
#       df       AIC
# M1     9 -273.3124
# M1a    5 -277.2175
# mod1  10 -271.3124
# mod1a  6 -275.2175

#M1a is the best model!

summary(M1a)
#Coefficients:
#Estimate Std. Error t value Pr(>|t|)    
#(Intercept)    0.025951   0.003222   8.055 4.69e-10 ***
#Temp23C       -0.009855   0.004556  -2.163   0.0363 *  
#ContM         -0.004348   0.004556  -0.954   0.3454    
#Temp23C:ContM  0.014353   0.006603   2.174   0.0354 *    

#Model validation
#1. Homogeneity
#2. Independence
#3. Influential observations

par(mfrow = c(2, 2))
plot(M1a)

#1. Homogeneity
#E1 <- resid(M1)   #or better: 
E1 <- rstandard(M1a)
F1 <- fitted(M1a)

plot(x = F1, 
     y = E1, 
     xlab = "Fitted values",
     ylab = "Residuals", 
     main = "Homogeneity?")
abline(h = 0, v = 0, lty = 2)
# A bit heterogenic, but okay

#2. Dependence due to model misfit
#Plot residuals versus covariates
plot(x = Enz$Temp, 
     y = E1)
abline(h = 0, lty = 2)

plot(x = Enz$pH, 
     y = E1)
abline(h = 0, lty = 2)

plot(x = Enz$Cont, 
     y = E1)
abline(h = 0, lty = 2)

plot(x = Enz$Tank, 
     y = E1)
abline(h = 0, lty = 2)
# Possible dependence on Tank, but mixed effects model ruled that out

#3. Look at influential observations
par(mfrow = c(1, 1))
plot(cooks.distance(M1), type = "h", ylim = c(0, 1))
abline(h = 1, col = 2,lwd = 3)

#Or:
plot(M1, which = c(4))
# Perfect, no overly influential values

# PLOT THE MODEL
# We will use the graphs previously made.

pH8 <- expression("400 atm")
pH7.5 <- expression("1500 atm")
M<- expression("Contaminated")
C<- expression("Non-contaminated")
musc<-expression("Muscle")
gills<-expression("Gills")
liver<-expression("Liver")

#Calculate mean and se
meanMDA<-aggregate(Enz$MDA,c(list(Enz$Cont),list(Enz$Temp)),mean)
meanMDA$se<- aggregate(Enz$MDA,c(list(Enz$Cont),list(Enz$Temp)), function(x) sd(x)/sqrt(6))[,3]

require(ggplot2)
library(ggplot2)

ylab<- expression("MDA concentration (" ~ nmol~ mg ^{-1} ~ protein ~")")
xlab<-expression("pCO"[2])

theme_set(theme_classic(base_size = 22))
jpeg(paste("MDA.jpg",sep=""), width=20, height=20, units="cm", res=500)
gp_p <- ggplot(meanMDA,aes(x=as.factor(Group.1), y=x, group=as.factor(Group.2))) + geom_line(aes(linetype=as.factor(Group.2)),size=.6) +
  geom_point(aes(shape=as.factor(Group.2)),size=4) + geom_errorbar(aes(ymax=x+se, ymin=x-se), width=.1) +
  xlab("MeHg") + ylab(ylab) + labs(title="", shape="", linetype="") + theme_set(theme_classic(base_size = 22)) +
  scale_linetype_discrete(labels=c("19 C", "23 C"))+ scale_shape_discrete(labels=c("19 C", "23 C")) +
  scale_x_discrete(breaks = c("C","M"), labels=c("Non-contaminated","Contaminated")) + scale_y_continuous(limits = c(0, 0.04))
gp_p
dev.off()

#######################################################################################

#### CAT ####

# Import data
Enz <- read.csv(file = "enzimas.csv", 
                header = TRUE, sep=";")

# DATA EXPLORATION
# A. Outliers
# B. Outliers in the X
# C. Collinearity
# D. Conditional boxplot
# E. Interactions
# F. Zero inflation
# G. Balance

# A and B. Outliers
par(mfrow = c(1, 2))
boxplot(Enz$CAT, 
        main = "CAT")
dotchart(Enz$CAT, 
         xlab = "Range of data", 
         ylab = "Order of the data")
#seems okay

# C. Collinearity (not really important here)
MyVar <- c("Temp", "pH", "Cont")
pairs(Enz[, MyVar])

# D. Conditional boxplots
par(mfrow = c(1, 1))
boxplot(CAT ~ factor(Tank), 
        data = Enz,
        ylab = "GST concentration",
        xlab = "Tanks",
        cex.lab = 1.5)
# Not very bad it seems.. but we will add random effects (Tank) regardless

# E. Interactions
# Every covariate is categorical

# F. Zero inflation
#We won't have it here

# G. Balance
table(Enz$Temp)
table(Enz$pH)
table(Enz$Cont) #perfect

# MODELLING

# Frequentist approach to GST Model 
M1<- glm(CAT~ Temp*pH*Cont ,data = Enz)
summary(M1) #AIC: 168.32
stepAIC(M1)
M1<- glm(CAT~ pH*Cont ,data = Enz)
summary(M1) #AIC: 166.15

mod1<- lmer(CAT ~ Temp*pH*Cont + (1|Tank), data = Enz,REML=FALSE)
summary(mod1)
VarExplained<-(0.000/(0.000+1.537))*100
VarExplained #Random effect explains 0% of residual variance
step(mod1) #all the same
mod2<- lmer(CAT ~ pH*Cont + (1|Tank), data = Enz,REML=FALSE)

AIC(M1,mod1,mod2)
#     df      AIC
#M1    9 166.1517
#mod1 10 170.3142
#mod2  6 168.1517
#M1 is the best model! 
summary(M1)

#Coefficients:
#Estimate Std. Error t value Pr(>|t|)    
#(Intercept)   4.3747     0.3991  10.962 6.74e-14 ***
#pHpH8        -0.4536     0.5644  -0.804    0.426    
#ContM         1.4818     0.5644   2.625    0.012 *  
#pHpH8:ContM   1.3130     0.8179   1.605    0.116    

#MODEL VALIDATION
#1. Homogeneity
#2. Independence
#3. Influential observations
#4. Normality
#5. Does it all make sense?

M1<- glm(CAT~ pH*Cont ,data = Enz)
summary(M1)
par(mfrow = c(2, 2))
plot(M1)

#1. Homogeneity
E1 <- resid(M1)   #or better: 
E1 <- rstandard(M1)
F1 <- fitted(M1)

plot(x = F1, 
     y = E1, 
     xlab = "Fitted values",
     ylab = "Residuals", 
     main = "Homogeneity?")
abline(h = 0, v = 0, lty = 2)
abline(v = 0, lwd = 2, col = 2)
# Good, no non-linear patterns

# 2. Dependence due to model misfit
#Plot residuals versus covariates
plot(x = Enz$Temp, 
     y = E1)
abline(h = 0, lty = 2)

plot(x = Enz$pH, 
     y = E1)
abline(h = 0, lty = 2)

plot(x = Enz$Cont, 
     y = E1)
abline(h = 0, lty = 2)

plot(x = Enz$Tank, 
     y = E1)
abline(h = 0, lty = 2)
# Tank is taken care of

# 3. Look at influential observations
par(mfrow = c(1, 1))
plot(cooks.distance(M1), type = "h", ylim = c(0, 1))
abline(h = 1, col = 2,lwd = 3)

# PLOT THE MODEL

Enz$pH<-relevel(Enz$pH,"pH8")

pH8 <- expression("400 atm")
pH7.5 <- expression("1500 atm")
M<- expression("Contaminated")
C<- expression("Non-contaminated")
musc<-expression("Muscle")
gills<-expression("Gills")
liver<-expression("Liver")

ylab<- expression("CAT activity (" ~ inhibtion ~ mg ^{-1} ~ protein ~")")
xlab<-expression("pCO"[2])

jpeg(paste("CAT.jpg",sep=""), width=20, height=20, units="cm", res=500)
gp_p1<-ggplot(Enz, aes(x=Cont, y=CAT)) + geom_boxplot() + xlab("MeHg") + ylab(ylab) + scale_shape_discrete(labels=c("19 C","23 C")) + 
  scale_x_discrete(breaks = c("C","M"), labels=c("Non-contaminated","Contaminated")) +  scale_y_continuous(limits = c(0, 10)) +
  theme(
    plot.background = element_blank()
    ,panel.grid.major = element_blank()
    ,panel.grid.minor = element_blank()
    ,panel.border = element_blank())
gp_p1
dev.off()

jpeg(paste("Figure 3.jpg",sep=""), width=40, height=20, units="cm", res=500)
multiplot(gp_p1,gp_p, cols=2)
dev.off()

####################################################################################

#### SOD ####

# Import data
Enz <- read.csv(file = "enzimas.csv", 
                header = TRUE, sep=";")

str(Enz)
Enz$Tank<-factor(Enz$Tank)
summary(Enz)

# DATA EXPLORATION
# A. Outliers
# B. Outliers in the X
# C. Collinearity
# D. Conditional boxplot
# E. Interactions
# F. Zero inflation
# G. Balance

# A and B. Outliers
par(mfrow = c(1, 2))
boxplot(Enz$SOD, 
        main = "SOD")
dotchart(Enz$SOD, 
         xlab = "Range of data", 
         ylab = "Order of the data")
#seems okay

# C. Collinearity (not really important here)
MyVar <- c("Temp", "pH", "Cont")
pairs(Enz[, MyVar])

# D. Conditional boxplots
par(mfrow = c(1, 1))
boxplot(SOD ~ factor(Tank), 
        data = Enz,
        ylab = "SOD concentration",
        xlab = "Tanks",
        cex.lab = 1.5)

# Not very bad it seems.. but we will add random effects (Tank) regardless

# E. Interactions
# Every covariate is categorical

# F. Zero inflation
#We won't have it here

# G. Balance
table(Enz$Temp)
table(Enz$pH)
table(Enz$Cont) #perfect

# MODELLING

#Without random effects
M1<- glm(SOD~ Temp*pH*Cont ,data = Enz)
summary(M1) #AIC: 239.12
stepAIC(M1)
M2<- glm(SOD~ Temp*pH+pH*Cont ,data = Enz)
summary(M2) #237.05

#Including Tank as random effect
mod1<- lmer(SOD ~ Temp*pH*Cont + (1|Tank), data = Enz, REML=FALSE)
summary(mod1)
mod2<- lmer(SOD ~ Temp*pH+pH*Cont + (1|Tank), data = Enz, REML=FALSE)
summary(mod2)
AIC(M1,M2,mod1,mod2)
#     df      AIC
#M1    9 240.7132
#M2    7 237.3317
#mod1 10 242.7132
#mod2  8 239.3317

#M2 is the best model
summary(M2)

#Coefficients:
#Estimate Std. Error t value Pr(>|t|)    
#(Intercept)      9.496      1.040   9.135 2.45e-11 ***
#Temp23C         -3.346      1.200  -2.787 0.008091 ** 
#pHpH8           -1.264      1.484  -0.852 0.399148    
#ContM            1.614      1.200   1.344 0.186440    
#Temp23C:pHpH8    6.319      1.744   3.623 0.000812 ***
#pHpH8:ContM     -3.399      1.744  -1.949 0.058312 .  

#MODEL VALIDATION
#1. Homogeneity
#2. Independence
#3. Influential observations
#4. Normality

M2<- glm(SOD~ Temp*pH+pH*Cont ,data = Enz)
par(mfrow = c(2, 2))
plot(M2)

#1. Homogeneity
E1 <- resid(M2)   #or better: 
E1 <- rstandard(M2)
F1 <- fitted(M2)

plot(x = F1, 
     y = E1, 
     xlab = "Fitted values",
     ylab = "Residuals", 
     main = "Homogeneity?")
abline(h = 0, v = 0, lty = 2)
abline(v = 0, lwd = 2, col = 2)
# Good

# 2. Dependence due to model misfit
#Plot residuals versus covariates
plot(x = Enz$Temp, 
     y = E1)
abline(h = 0, lty = 2)

plot(x = Enz$pH, 
     y = E1)
abline(h = 0, lty = 2)

plot(x = Enz$Cont, 
     y = E1)
abline(h = 0, lty = 2)

plot(x = Enz$Tank, 
     y = E1)
abline(h = 0, lty = 2)
# Tank is taken care of

# 3. Look at influential observations
par(mfrow = c(1, 1))
plot(cooks.distance(M1), type = "h", ylim = c(0, 1))
abline(h = 1, col = 2,lwd = 3)


# PLOT THE MODEL
interaction.plot(Enz$Temp,Enz$pH,Enz$SOD,type="b", pch=c(1,4))

Enz$pH<-relevel(Enz$pH,"pH8")

pH8 <- expression("400 atm")
pH7.5 <- expression("1500 atm")
M<- expression("Contaminated")
C<- expression("Non-contaminated")
musc<-expression("Muscle")
gills<-expression("Gills")
liver<-expression("Liver")

#Calculate mean and se
meanSOD1<-aggregate(Enz$SOD,c(list(Enz$pH),list(Enz$Temp)),mean)
meanSOD1$se<- aggregate(Enz$SOD,c(list(Enz$pH),list(Enz$Temp)), function(x) sd(x)/sqrt(6))[,3]

require(ggplot2)
library(ggplot2)

ylab<- expression("SOD activity (" ~ inhibtion ~ mg ^{-1} ~ protein ~")")
xlab<-expression("pCO"[2])

jpeg(paste("SOD.jpg",sep=""), width=20, height=20, units="cm", res=500)
gp_p <- ggplot(meanSOD1,aes(x=as.factor(Group.1), y=x, group=as.factor(Group.2))) + geom_line(aes(linetype=as.factor(Group.2)),size=.6) +
  geom_point(aes(shape=as.factor(Group.2)),size=4) + geom_errorbar(aes(ymax=x+se, ymin=x-se), width=.1) +
  xlab(xlab) + ylab(ylab) + labs(title="", shape="", linetype="") + theme_set(theme_classic(base_size = 22)) +
  scale_linetype_discrete(labels=c("19 C","23 C")) + scale_shape_discrete(labels=c("19 C","23 C")) + 
  scale_y_continuous(limits = c(0, 13)) + scale_x_discrete(breaks = c("pH8", "pH7.5"), labels=c(pH8, pH7.5))
gp_p
dev.off()

################################################################################

#### GST ####

Enz <- read.csv(file = "enzimas.csv", 
                header = TRUE, sep=";")

str(Enz)
Enz$Tank<-factor(Enz$Tank)
summary(Enz)

# DATA EXPLORATION
# A. Outliers
# B. Outliers in the X
# C. Collinearity
# D. Conditional boxplot
# E. Interactions
# F. Zero inflation
# G. Balance

# A and B. Outliers
par(mfrow = c(1, 2))
boxplot(Enz$GST, 
        main = "GST")
dotchart(Enz$GST, 
         xlab = "Range of data", 
         ylab = "Order of the data")
#seems okay

# C. Collinearity (not really important here)
MyVar <- c("Temp", "pH", "Cont")
pairs(Enz[, MyVar])

# D. Conditional boxplots
par(mfrow = c(1, 1))
boxplot(GST ~ factor(Tank), 
        data = Enz,
        ylab = "GST concentration",
        xlab = "Tanks",
        cex.lab = 1.5)
# Not very bad it seems.. but we will add random effects (Tank) regardless

# E. Interactions
# Every covariate is categorical

# F. Zero inflation
#We won't have it here

# G. Balance
table(Enz$Temp)
table(Enz$pH)
table(Enz$Cont) #perfect

# MODELLING


# Frequentist approach to GST Model 
M1<- glm(GST ~ Temp*pH*Cont ,data = Enz)
summary(M1)
stepAIC(M1) #AIC: 186.08

par(mfrow = c(2, 2))
plot(M1)

mod1<- lmer(GST ~ Temp*pH*Cont + (1|Tank), data = Enz, REML=FALSE)
summary(mod1)
VarExplained<-(0.000/(0.000+2.262))*100
VarExplained #Random effect explains 0% of residual variance
step(mod1)
AIC(M1,mod1)
#     df      AIC
#M1    9 186.0863
#mod1 10 188.0863

#M1 is the best model! 
summary(M1)

#Coefficients:
#Estimate Std. Error t value Pr(>|t|)    
#(Intercept)           7.5609     0.6755  11.193 1.36e-13 ***
#Temp23C              -1.1743     0.9553  -1.229  0.22657    
#pHpH8                -2.3200     0.9553  -2.428  0.02001 *  
#ContM                -2.0543     0.9553  -2.150  0.03796 *  
#Temp23C:pHpH8         4.0757     1.3510   3.017  0.00454 ** 
#Temp23C:ContM         2.3754     1.3510   1.758  0.08677 .  
#pHpH8:ContM           4.4271     1.3510   3.277  0.00225 ** 
#Temp23C:pHpH8:ContM  -3.4219     1.9695  -1.737  0.09041 .  

#MODEL VALIDATION
#1. Homogeneity
#2. Independence
#3. Influential observations

M1<- glm(GST ~ Temp*pH*Cont ,data = Enz)
summary(M1)
par(mfrow = c(2, 2))
plot(mod1)

#1. Homogeneity
E1 <- resid(M1)   #or better: 
E1 <- rstandard(M1)
F1 <- fitted(M1)

plot(x = F1, 
     y = E1, 
     xlab = "Fitted values",
     ylab = "Residuals", 
     main = "Homogeneity?")
abline(h = 0, v = 0, lty = 2)
# Good

# 2. Dependence due to model misfit
#Plot residuals versus covariates
plot(x = Enz$Temp, 
     y = E1)
abline(h = 0, lty = 2)

plot(x = Enz$pH, 
     y = E1)
abline(h = 0, lty = 2)

plot(x = Enz$Cont, 
     y = E1)
abline(h = 0, lty = 2)

plot(x = Enz$Tank, 
     y = E1)
abline(h = 0, lty = 2)
# Tank was taken care of

# 3. Look at influential observations
par(mfrow = c(1, 1))
plot(cooks.distance(M1), type = "h", ylim = c(0, 1))
abline(h = 1, col = 2,lwd = 3)

# 4. Normality
hist(E1, main = "Normality", breaks=10)
#Or qq-plot
qqnorm(E1)
qqline(E1)

# PLOT THE MODEL

interaction.plot(Enz$Cont,Enz$pH,Enz$GST,type="b", pch=c(1,4))
interaction.plot(Enz$Temp,Enz$pH,Enz$GST,type="b", pch=c(1,4))

Enz$pH<-relevel(Enz$pH,"pH8")

pH8 <- expression("400 atm")
pH7.5 <- expression("1500 atm")
M<- expression("Contaminated")
C<- expression("Non-contaminated")
musc<-expression("Muscle")
gills<-expression("Gills")
liver<-expression("Liver")

#Calculate mean and se
meanGST1<-aggregate(Enz$GST,c(list(Enz$pH),list(Enz$Temp)),mean)
meanGST1$se<- aggregate(Enz$GST,c(list(Enz$pH),list(Enz$Temp)), function(x) sd(x)/sqrt(6))[,3]

require(ggplot2)
library(ggplot2)

ylab<- expression("GST activity (" ~ inhibtion ~ mg ^{-1} ~ protein ~")")
xlab<-expression("pCO"[2])

jpeg(paste("GSTa.jpg",sep=""), width=20, height=20, units="cm", res=500)
gp_p2 <- ggplot(meanGST1,aes(x=as.factor(Group.1), y=x, group=as.factor(Group.2))) + geom_line(aes(linetype=as.factor(Group.2)),size=.6) +
  geom_point(aes(shape=as.factor(Group.2)),size=4) + geom_errorbar(aes(ymax=x+se, ymin=x-se), width=.1) +
  xlab(xlab) + ylab(ylab) + labs(title="", shape="", linetype="") + theme_set(theme_classic(base_size = 22)) +
  scale_linetype_discrete(labels=c("19 C","23 C")) + scale_shape_discrete(labels=c("19 C","23 C")) + 
  scale_y_continuous(limits = c(0, 11)) + scale_x_discrete(breaks = c("pH8", "pH7.5"), labels=c(pH8, pH7.5))
gp_p2
dev.off()

#b
meanGST2<-aggregate(Enz$GST,c(list(Enz$pH),list(Enz$Cont)),mean)
meanGST2$se<- aggregate(Enz$GST,c(list(Enz$pH),list(Enz$Cont)), function(x) sd(x)/sqrt(6))[,3]

jpeg(paste("GSTb.jpg",sep=""), width=24, height=20, units="cm", res=500)
gp_p3 <- ggplot(meanGST2,aes(x=as.factor(Group.1), y=x, group=as.factor(Group.2))) + geom_line(aes(linetype=as.factor(Group.2)),size=.6) +
  geom_point(aes(shape=as.factor(Group.2)),size=4) + geom_errorbar(aes(ymax=x+se, ymin=x-se), width=.1) +
  xlab(xlab) + ylab(ylab) + labs(title="", shape="", linetype="") + theme_set(theme_classic(base_size = 22)) +
  scale_linetype_discrete(labels=c("Non-contaminated", "Contaminated")) + scale_shape_discrete(labels=c("Non-contaminated", "Contaminated")) + 
  scale_y_continuous(limits = c(0, 11)) + scale_x_discrete(breaks = c("pH8", "pH7.5"), labels=c(pH8, pH7.5))
gp_p3
dev.off()

multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)
  
  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)
  
  numPlots = length(plots)
  
  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  
  if (numPlots==1) {
    print(plots[[1]])
    
  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    
    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

jpeg(paste("Figure 4.jpg",sep=""), width=40, height=20, units="cm", res=500)
multiplot(gp_p2, gp_p3, cols=2)
dev.off()

#############################################################################

#### HSP ####

RHSP <- read.csv(file = "RHSP.csv", 
                 header = TRUE, sep=",")

str(RHSP)
RHSP$Tank<-factor(RHSP$Tank)
summary(RHSP)

# DATA EXPLORATION
# A. Outliers
# B. Outliers in the X
# C. Collinearity
# D. Conditional boxplot
# E. Interactions
# F. Zero inflation
# G. Balance

# A and B. Outliers
par(mfrow = c(1, 2))
boxplot(RHSP$HSP, 
        main = "HSP")
dotchart(RHSP$HSP, 
         xlab = "Range of data", 
         ylab = "Order of the data")
#seems okay

# C. Collinearity (not really important here)

# D. Conditional boxplots
par(mfrow = c(2, 2))
boxplot(HSP ~ factor(tissue), 
        data = RHSP,
        ylab = "HSP concentration",
        xlab = "Organs",
        cex.lab = 1.5)

boxplot(HSP ~ factor(Temp), 
        data = RHSP,
        ylab = "HSP concentration",
        xlab = "Temperature",
        cex.lab = 1.5)

boxplot(HSP ~ factor(pH), 
        data = RHSP,
        ylab = "hsp concentration",
        xlab = "pH",
        cex.lab = 1.5)

boxplot(HSP ~ factor(Cont), 
        data = RHSP,
        ylab = "hsp concentration",
        xlab = "MeHg",
        cex.lab = 1.5)

boxplot(HSP ~ factor(Tank), 
        data = RHSP,
        ylab = "HSP concentration",
        xlab = "Tanks",
        cex.lab = 1.5)

# Not very bad it seems.. but we will add random effects (Tank) regardless

# E. Interactions
# Every covariate is categorical

# F. Zero inflation
#We won't have it here

# G. Balance
table(RHSP$Temp)
table(RHSP$pH)
table(RHSP$tissue)
table(RHSP$Cont) #perfect


#MODELLING 

M1<- glm(HSP~ tissue*Temp*pH*Cont, data = RHSP)
summary(M1) #AIC: 518.6
stepAIC(M1)
M2<- glm(HSP~ tissue*Temp*pH + tissue*Cont*pH, data = RHSP)
summary(M2)

#Including Tank as random effect
mod1<- lmer(HSP~ tissue*Temp*pH*Cont  + (1|Tank), data = RHSP, REML=FALSE)
summary(mod1)
#VarExplained<-(0.0000/(0.0000+1.791))*100
#VarExplained #Random effect explains 0% of residual variance (Not significant)
mod2<- lmer(HSP~ tissue*Temp*pH+ tissue*Cont*pH  + (1|Tank), data = RHSP, REML=FALSE)
summary(mod2)

AIC(M1,M2,mod1,mod2)

#M2 is the best model

#MODEL VALIDATION
#1. Homogeneity
#2. Independence
#3. Influential observations
#4. Normality

M2<- glm(HSP~ tissue*Temp*pH+ tissue*Cont*pH,data = RHSP)
par(mfrow = c(2, 2))
plot(M2)

#1. Homogeneity
E1 <- resid(M2)   #or better: 
E1 <- rstandard(M2)
F1 <- fitted(M2)

plot(x = F1, 
     y = E1, 
     xlab = "Fitted values",
     ylab = "Residuals", 
     main = "Homogeneity?")
abline(h = 0, v = 0, lty = 2)
abline(v = 0, lwd = 2, col = 2)
#Okay

# 2. Dependence due to model misfit
#Plot residuals versus covariates
plot(x = RHSP$Tank, 
     y = E1)
abline(h = 0, lty = 2)
# Tank is taken care of

# 3. Look at influential observations
par(mfrow = c(1, 1))
plot(cooks.distance(M1), type = "h", ylim = c(0, 1))
abline(h = 1, col = 2,lwd = 3)
# Perfect!!


#However, we will break the model, in order to better explain the triple interaction:
# - 1(M1a) model for preferential HSP expression in organ tissue
# - 3 (M1b, M1c and M1d) models (1 for each organ) describing 
# pH and Temperature effects within organs


# HgT accumulation in the organs
M1a<- glm(HSP~ tissue ,data = RHSP)
summary(M1a) 

RHSP$tissue<-relevel(RHSP$tissue,"muscle")
M1a<- glm(HSP~ tissue, data = RHSP)
summary(M1a) #AIC: 521.85

# M1a. model HgT per tissue:

# Coefficients:
#Estimate Std. Error t value Pr(>|t|)    
#(Intercept)    3.6054     0.2353  15.321  < 2e-16 ***
#gills-liver    1.6075     0.3346   4.804 4.11e-06 ***
#gills-muscle   0.9749     0.3328   2.929  0.00399 **
#muscle-liver   0.6326     0.3346   1.890  0.06087 .  

anova(M1a, test="F") 
#F value = 11.732
#p value = 2.018e-05

#Subsetting:
HSPm<-subset(RHSP,tissue=="muscle")
HSPg<-subset(RHSP,tissue=="gills")
HSPl<-subset(RHSP,tissue=="liver")

# HSP in the muscle
M1b<- glm(HSP~ Temp*pH*Cont, data = HSPm)
stepAIC(M1b)
M1b<- glm(HSP~ Temp*pH, data = HSPm)
summary(M1b) #AIC: 137

#Coefficients:
#Estimate Std. Error t value Pr(>|t|)    
#(Intercept)     4.6707     0.2907  16.066  < 2e-16 ***
#Temp23C        -0.2938     0.4111  -0.715 0.478851    
#pHpH8          -0.9549     0.4111  -2.323 0.025119 *  
#Temp23C:pHpH8   2.3312     0.5958   3.913 0.000328 ***

# HSP in the gills
M1c<- glm(HSP~ Temp*pH*Cont, data = HSPg)
stepAIC(M1c)
M1c<- glm(HSP~ Temp+Cont, data = HSPg)
summary(M1c) #AIC: 146.38

#Coefficients:
#Estimate Std. Error t value Pr(>|t|)    
#(Intercept)   3.5611     0.2546  13.989   <2e-16 ***
#Temp23C      -0.5297     0.2985  -1.775   0.0831 .  
#ContM         0.6223     0.2985   2.085   0.0431 *  

#HSP in the liver
M1d<- glm(HSP~ Temp*pH*Cont, data = HSPl)
stepAIC(M1d)
M1d<- glm(HSP~ pH*Cont, data = HSPl)
summary(M1d) #AIC: 73.298

#Coefficients:
#Estimate Std. Error t value Pr(>|t|)    
#(Intercept)     5.3755     0.5931   9.064 2.42e-11 ***
#pHpH8        -0.5884     0.8387  -0.702   0.4869    
#ContM        -1.3147     0.8387  -1.567   0.1247    
#pHpH8:ContM   3.6272     1.2346   2.938   0.0054 ** 

# PLOT THE MODELS

# Figure 5a
pH8 <- expression("400atm")
pH7.5 <- expression("1500atm")
M<- expression("Contaminated")
C<- expression("Non-contaminated")
"19C"<-expression("19 C")
"23C"<-expression("23 C")

require(ggplot2)
library(ggplot2)

theme_set(theme_classic(base_size = 22)) 
ylab<- expression("HSP concentration (" ~ ng~ mg ^{-1} ~ protein ~")")
jpeg(paste("HSPa.jpg",sep=""), width=20, height=20, units="cm", res=500)
gp_p<-ggplot(RHSP,aes(x=tissue,y=HSP)) + geom_boxplot(color="black") + ylab(ylab) + theme_set(theme_classic(base_size = 22)) + 
  scale_x_discrete(breaks = c("liver","muscle","gills"), labels=c("Liver","Muscle", "Gills")) + scale_y_continuous(limits = c(0, 10)) +
  theme(
    plot.background = element_blank()
    ,panel.grid.major = element_blank()
    ,panel.grid.minor = element_blank()
    ,panel.border = element_blank()
    ,axis.title.x=element_blank())
gp_p
dev.off()

# Figure 5c (muscle)
xlab<-expression("pCO"[2])
HSPm$pH<-relevel(HSPm$pH,"pH8")

meanHSPm<-aggregate(HSPm$HSP,c(list(HSPm$pH),list(HSPm$Temp)),mean)
meanHSPm$se<- aggregate(HSPm$HSP,c(list(HSPm$pH),list(HSPm$Temp)), function(x) sd(x)/sqrt(6))[,3]

require(ggplot2)
library(ggplot2)

theme_set(theme_classic(base_size = 22))
jpeg(paste("HSPb.jpg",sep=""), width=24, height=20, units="cm", res=500)
gp_p1 <- ggplot(meanHSPm,aes(x=as.factor(Group.1), y=x, group=as.factor(Group.2))) + geom_line(aes(linetype=as.factor(Group.2)),size=.6) +
  geom_point(aes(shape=as.factor(Group.2)),size=4) + geom_errorbar(aes(ymax=x+se, ymin=x-se), width=.1) +
  xlab(xlab) + ylab(ylab) + labs(title="", shape="", linetype="") + theme_set(theme_classic(base_size = 22)) +
  scale_linetype_discrete(labels=c("19 C", "23 C")) + scale_shape_discrete(labels=c("19 C", "23 C")) + 
  scale_y_continuous(limits = c(0, 8.5)) + scale_x_discrete(breaks = c("pH8", "pH7.5"), labels=c(pH8, pH7.5))
gp_p1
dev.off()

#Figure 5b (gills)

require(ggplot2)
library(ggplot2)

ylab<- expression("HSP concentration (" ~ ng~ mg ^{-1} ~ protein ~")")
xlab<-expression("MeHg")

jpeg(paste("HSPc.jpg",sep=""), width=20, height=20, units="cm", res=500)
gp_p2<-ggplot(HSPg, aes(x=Cont, y=HSP)) + geom_boxplot(color="black") + xlab(NULL) + ylab(ylab) + scale_shape_discrete(labels=c("C","M")) + 
  scale_x_discrete(breaks = c("C","M"), labels=c("Non-contaminated","Contaminated")) +  scale_y_continuous(limits = c(0, 10)) +
  theme(
    plot.background = element_blank()
    ,panel.grid.major = element_blank()
    ,panel.grid.minor = element_blank()
    ,panel.border = element_blank())
gp_p2
dev.off()


#Figure 5d (liver)
HSPl$pH<-relevel(HSPl$pH,"pH8")

meanHSPl<-aggregate(HSPl$HSP,c(list(HSPl$pH),list(HSPl$Cont)),mean)
meanHSPl$se<- aggregate(HSPl$HSP,c(list(HSPl$pH),list(HSPl$Cont)), function(x) sd(x)/sqrt(6))[,3]

require(ggplot2)
library(ggplot2)

ylab<- expression("HSP concentration (" ~ ng~ mg ^{-1} ~ protein ~")")
xlab<-expression("pCO"[2])
ylab

jpeg(paste("HSPd.jpg",sep=""), width=20, height=20, units="cm", res=500)
gp_p3 <- ggplot(meanHSPl,aes(x=as.factor(Group.1), y=x, group=as.factor(Group.2))) + geom_line(aes(linetype=as.factor(Group.2)),size=.6) +
  geom_point(aes(shape=as.factor(Group.2)),size=4) + geom_errorbar(aes(ymax=x+se, ymin=x-se), width=.1) +
  xlab(xlab) + ylab(ylab) + labs(title="", shape="", linetype="") + theme_set(theme_classic(base_size = 22)) +
  scale_linetype_discrete(labels=c("Non-contaminated","Contaminated")) + scale_shape_discrete(labels=c("Non-contaminated","Contaminated")) + 
  scale_y_continuous(limits = c(0, 8.5)) + scale_x_discrete(breaks = c("pH8", "pH7.5"), labels=c(pH8, pH7.5))
gp_p3
dev.off()

#FULL FIGURE 5
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)
  
  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)
  
  numPlots = length(plots)
  
  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  
  if (numPlots==1) {
    print(plots[[1]])
    
  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    
    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

jpeg(paste("HSPfull.jpg",sep=""),width=40, height=40, units="cm", res=500)
multiplot(gp_p,gp_p1,gp_p2,gp_p3, cols =2)
dev.off


