library(STEPCAM)
library(MCMCglmm)
library(vegan)
library(rgdal)
library(raster)
library(gdistance)
library(geoclust) # not on CRAN http://lbbe.univ-lyon1.fr/Download-5012.html?lang=fr
library(compositions)
library(gtools)

# Using one CPU core only, the whole distance-based STEPCAM will last months (almost a year)

# Therefore, this script can perform the following tasks: 
# 1. Run the whole analysis with the settings used in Hauffe et al. (months)
# 2. Run the whole analysis with inappropriate settings. 
#    Distance-based STEPCAM will not converge (about one day).
# 3. Just load the results of the spatial clustering and distance-based STEPCAM
#    and will plot the figures and run the Bayesian GLM of Hauffe et al. (less than an hour)

Run <- 3 # Third option of the list above

# The geoclust function is only working on Linux and possibly Mac
# If this script is run on Windows, a already performed clusteringshould be loaded.
switch(Sys.info()[['sysname']],
       Windows = {OS <- 0},
       Linux = {OS <- 1},
       Darwin = {OS <- 1})

if(Run == 1){
  numParticles <- 500
  stopRate <- 0.0001
  SD <- 500
}

if(Run == 2){
  numParticles <- 50
  stopRate <- 0.1
  SD <- 25
}

if(Run == 3){
  OS <- 0
}

# Set working directory
setwd("/home/.../Supplement")

# Load the distance-based stepwise community assembly
# This script uses a lot of code from the packages FD and STEPCAM
# Please cite them correctly
# citation("FD")
# citation("STEPCAM")

# The code here is consequently distributed under the same license: GPL-2
# See https://cran.r-project.org/web/licenses/GPL-2
source("dbSTEPCAM.R")
source("auxiliaryFunctions.R")

OhridComm <- read.csv("OhridComm.csv", sep = ",", header = TRUE, row.names = 1)
TaxDist <- read.csv("TaxDistance.csv", sep = ",", header = TRUE, row.names = 1)
Localities <- read.csv("Localities.csv", sep = ",", header = TRUE, row.names = 1)


##############################################
#                                            #
#  S P A T I A L L Y  C O N S T R A I N E D  #
#          C L U S T E R I N G  O F          #
#    E C O L O G I C A L  N E T W O R K S    #
#                                            #
##############################################

# This step is only working on Linux and possibly Mac (not tested)
if(Run == 1 & OS == 1){ # Slow version
  # Ecological network
  OhridCommDist <- as.matrix(vegdist(OhridComm))   
  # Spatial (structural) network
  # 1. Depth differences:
  OhridDepthDist <- as.matrix(dist(Localities$Depth))  
  MantelCorlg <- mantel.correlog(as.dist(OhridCommDist), as.dist(OhridDepthDist), nperm = 9999)
  MantelCorlg
  # 2. cost layer:
  OhridShp <- readOGR(dsn = getwd(), layer="Ohrid")
  OhridRas <- raster(xmn = bbox(OhridShp)[1,1], xmx = bbox(OhridShp)[1,2], 
                     ymn = bbox(OhridShp)[2,1],ymx = bbox(OhridShp)[2,2], 
                     crs = OhridShp@proj4string)
  res(OhridRas) <- 10 # Grain size
  # Lasts ~ 1h
  OhridRas <- rasterize(OhridShp, OhridRas) 
  OhridTrans <- transition(OhridRas, mean, 16) 
  CostDist <- costDistance(OhridTrans, as.matrix(Localities[,c("Eastings","Northings")])) 
  CostDist <- as.matrix(CostDist)
  
  # Normalize the [0,1] community dissimilarities
  OhridCommDistAsin <- asin(as.matrix(OhridCommDist)) 
  #boxplot(c(OhridCommDistAsin))  
  # Take the 8.2 m from the mantel correlogram
  # Create structural network (as in Miele et al., 2014) 
  # by removing vertical (bathymetrical) links
  StructNet <- CostDist * ifelse(OhridDepthDist > 8.2, 0, 1) 
  
  # Actual spatial clustering
  # Explore different settings for the number of connected spatial neighbors 
  # in the structural network (i.e., modify the connectivity of the x and y dimension)
  # Every locality is connected with at minimum 15 localities
  K <- seq(15, 265, by = 10)
  GC <- list() # Geographic clustering
  for(k in 1:length(K)){ # ~1 day on my Atom CPU
    print(K[k])
    StructNetTmp <- apply(StructNet, 2, function(x) pruneK(x, K = K[k]))
    ListWStructNet <- mat2listw(as.matrix(StructNetTmp), style = "M")
    GC[[k]] <-  geoclust(m = OhridCommDistAsin, lw = ListWStructNet, 
                         k = 1:10, clump = 1.570796)
    # Layers from Hauffe et al., 2011
    # SL: Surface Layer
    # IL: Intermediate Layer
    # DL: Deep Layer
    # FS: Feeder Springs
    print(table(GC[[k]]$groups, Localities$Layer))
    # Group number may differ between runs, though same settings are used!
  }
}

# Fast version or if geoclust cannot be used (no Linux/Mac)
if(Run %in% c(2,3) | OS != 0 ){ 
  GC <- readRDS("SpatiallyConstrainedClustering.rds")
}

# Check robustness of clustering
################################
# Spatially constrained clustering uses arbitrary numbers ('names') for the clusters 
# and repeated runs with the same settings may also use different numbers.
# Here, cluster-numbers are replaced by the name of the eco-zones used in Hauffe et al.
EcoZones <- lapply(GC, function(x) nameGeoCluster(Loc = Localities, Groups = x$groups))
# Order all according to a referenz cluster (here 65 neighbors)
G <- 6
Localities <- data.frame(Localities, GeoClus = EcoZones[[G]])

RefClus <- order(EcoZones[[G]])
EcoZones <- lapply(EcoZones, function(x) x[RefClus])

# Color coded cluster assignment of all 264 localities
par(las = 2)
palette(c("#b2182b", "#ef8a62", "#fddbc7", "#f7f7f7", "#d1e5f0", "#67a9cf", "#2166ac"))
plot(1:length(EcoZones), 1:length(EcoZones), type = "n", ylim = c(1, length(RefClus)),
     xlab = "Neighbors", ylab = "Locality", yaxt = "n", xaxt = "n")
axis(side = 1, at = 1:length(EcoZones), labels = seq(15, 265, by = 10))
for(i in 1:length(EcoZones)){
  points(rep(i, length(RefClus)), 1:length(RefClus), pch = 15, col = as.factor(EcoZones[[i]]))  
}             


# Figure 1 of Hauffe et al.
###########################
Ohrid0mShp <- readOGR(dsn = getwd(), layer="0m")
layout(matrix(1:3, nc = 3, nr = 1))
par(las = 2, mar = c(8.1, 4.5, 0.1, 0.1))
boxplot(Localities[Localities$GeoClus == "1 SE upper littoral zone 1", "Depth"]+0.1, # 
        Localities[Localities$GeoClus == "2 SE upper littoral zone 2", "Depth"]+0.1, 
        Localities[Localities$GeoClus == "3 SE upper littoral zone 3", "Depth"]+0.1, 
        Localities[Localities$GeoClus == "4 Non-SE upper littoral", "Depth"]+0.1,
        Localities[Localities$GeoClus == "5 lower littoral", "Depth"]+0.1,
        Localities[Localities$GeoClus == "6 upper sublittoral", "Depth"]+0.1,
        Localities[Localities$GeoClus == "7 lower sublittoral", "Depth"]+0.1, # 4
        col = c("#b2182b", "#ef8a62", "#fddbc7", "#f7f7f7", "#d1e5f0", "#67a9cf", "#2166ac"), 
        names = c("SE upper littoral\nzone 1","SE upper littoral\nzone 2","SE upper littoral\nzone 3",
                  "Non-SE upper\nlittoral","lower\nlittoral","upper\nsublittoral", "lower\nsublittoral"),
        log = "y", ylab = "Depth (m)")
mtext("A", side = 2, line = 3.1, at = 60, cex = 1.4)
par(las = 2, mar = c(0.1, 0.1, 0.1, 0.1))
plot(Ohrid0mShp, col = "grey90")
# Different order to minimize issues with overplotting
points(Localities[Localities$GeoClus == "4 Non-SE upper littoral", 4:5], 
       pch = 21, cex = 1.5, bg = "#f7f7f7")
points(Localities[Localities$GeoClus == "3 SE upper littoral zone 3", 4:5], 
       pch = 21, cex = 1.5, bg = "#fddbc7")
points(Localities[Localities$GeoClus == "2 SE upper littoral zone 2", 4:5], 
       pch = 21, cex = 1.5, bg = "#ef8a62")
points(Localities[Localities$GeoClus == "1 SE upper littoral zone 1", 4:5], 
       pch = 21, cex = 1.5, bg = "#b2182b")
text(x = par("usr")[1] + par("usr")[1]/500, y = par("usr")[4] - par("usr")[4]/7000, 
     labels = "B", cex = 2)
plot(Ohrid0mShp, col = "grey90")
points(Localities[Localities$GeoClus == "5 lower littoral", 4:5], 
       pch = 21, cex = 1.5, bg = "#d1e5f0")
points(Localities[Localities$GeoClus == "6 upper sublittoral", 4:5], 
       pch = 21, cex = 1.5, bg = "#67a9cf")
points(Localities[Localities$GeoClus == "7 lower sublittoral", 4:5], 
       pch = 21, cex = 1.5, bg = "#2166ac")
text(x = par("usr")[1] + par("usr")[1]/500, y = par("usr")[4] - par("usr")[4]/7000, 
     labels = "C", cex = 2)

################################################
#                                              #
#  D I S T A N C E - B A S E D  S T E P C A M  #
#                                              #
################################################

if(Run %in% c(1,2)){ # Slow version
  for(i in 1:nrow(OhridComm)){
    O <- dbSTEPCAM_ABC(OhridComm, as.dist(TaxDist), 
                       numParticles = numParticles,
                       plot_number = i, stopRate = stopRate, 
                       SD = SD, ContinuePrevious = FALSE)
    # Save output:
    f <- list.files(pattern = "particles_t=")
    f <- mixedsort(f)
    d <- read.table(f[length(f)-1], header = FALSE) # Read last run
    write.table(d, paste(getwd(),"/", i,"_t=", 
                         length(f)-1,".txt", sep = ""), 
                row.names = FALSE, col.names = FALSE)
    # Clean up:
    unlink(paste(getwd(), f, sep = "/"))
  }  
}

# Posterior mean of STEPCAM
###########################
if(Run %in% c(1,2)){
  OhridRuns <- list.files(getwd(), pattern = "_t=")
  OhridRuns <- mixedsort(OhridRuns)
  # Load the posterior distribution of process-importance for all 264 localities
  StepcamRes <- sapply(OhridRuns, function(x) 
    read.table(paste(getwd(), "/", x, sep = ""), sep = "", header = FALSE), 
    simplify = FALSE)
  # Process-importance in the interval [0,1]
  ProImp <- lapply(StepcamRes, function(x) rowMeans(apply(x, 1, function(y) y[1:3]/sum(y[1:3]))) ) 
  ProImp <- matrix(unlist(ProImp), nc = 3, byrow = TRUE) 
  colnames(ProImp) <- c("Dispersal", "Environment", "Interaction")
  # Result: 
  Res <- data.frame(Localities, ProImp)
}
# STEPCAM results of Hauffe et al.
if(Run == 3){
  Res <- read.csv("StepcamResultHauffe.csv", sep = ",", header = TRUE, row.names = 1)
}


# Figure 2 of Hauffe et al.
layout(matrix(1:3, nc = 3, nr = 1))
par(las = 2, mar = c(8.1, 4.1, 0.1, 0.1))
boxplot(Dispersal ~ GeoClus, data = Res, 
        #col = c("magenta", "red4", "green4", "cyan", "cornflowerblue", "blue", "blue4"), 
        col = c("#b2182b", "#ef8a62", "#fddbc7", "#f7f7f7", "#d1e5f0", "#67a9cf", "#2166ac"), 
        names = c("SE upper littoral\nzone 1","SE upper littoral\nzone 2","SE upper littoral\nzone 3",
                  "Non-SE upper\nlittoral","lower\nlittoral","upper\nsublittoral", "lower\nsublittoral"),
        ylab = "Dispersal limitation importance", ylim = c(0,1))
mtext("A", side = 2, line = 2.5, at = 1, cex = 1.4)
boxplot(Environment ~ GeoClus, data = Res, 
        col = c("#b2182b", "#ef8a62", "#fddbc7", "#f7f7f7", "#d1e5f0", "#67a9cf", "#2166ac"),  
        names = c("SE upper littoral\nzone 1","SE upper littoral\nzone 2","SE upper littoral\nzone 3",
                  "Non-SE upper\nlittoral","lower\nlittoral","upper\nsublittoral", "lower\nsublittoral"),
        ylab = "Environmental filtering importance", ylim = c(0,1))
mtext("B", side = 2, line = 2.5, at = 1, cex = 1.4)
boxplot(Interaction ~ GeoClus, data = Res, 
        col = c("#b2182b", "#ef8a62", "#fddbc7", "#f7f7f7", "#d1e5f0", "#67a9cf", "#2166ac"),  
        names = c("SE upper littoral\nzone 1","SE upper littoral\nzone 2","SE upper littoral\nzone 3",
                  "Non-SE upper\nlittoral","lower\nlittoral","upper\nsublittoral", "lower\nsublittoral"),
        ylab = "Species interaction importance", ylim = c(0,1))
mtext("C", side = 2, line = 2.5, at = 1, cex = 1.4)


# Mean process-importance and bootstrapped confidence interval of the mean
colMeans(Res[, c("Dispersal", "Environment", "Interaction")])
MeansBoot <- matrix(NA, nc = 3, nr = 10000)
for(i in 1:nrow(MeansBoot)){
  bsample <- sample(1:nrow(Res), nrow(Res), replace = TRUE )
  ResBoot <- Res[bsample,]
  MeansBoot[i,] <- colMeans(ResBoot[, c("Dispersal", "Environment", "Interaction")])  
}
apply(MeansBoot, 2, function(x) quantile(x, c(0.025, 0.975)))


###################################################
#                                                 #
#                B A Y E S I A N                  #
#  G E N E R A L I Z E D  L I N E A R  M O D E L  #
#                                                 #
###################################################

# Test whether process importance changes with limnological characteristics 
# Two competing hypotheses: either lake depth or eco-zones are 
# better correlated with processes
# Operational criterion: deviance information criterion (Lower DIC is better)

# additive planar transformation
Res <- data.frame(Res, apt(Res[,9:7]))

# Correlation of process-importance with depth (Do processes change with lake depth?)
McmcDepth1 <- MCMCglmm(cbind(Interaction.1, Environment.1) ~ # Multivariate dependent variables
                         trait:Depth + trait:SpeciesRichness, # Predictors/independend
                      data = Res, rcov=~us(trait):units, 
                      family = rep("gaussian", 2),
                      nitt = 30000, thin = 20, burnin = 5000, 
                      verbose = FALSE)
summary(McmcDepth1)
# Check temporal autocorrelation of MCMC samples
autocorr.plot( McmcDepth1$Sol ) # QQ-plot (as in regular glm does not exist for BGLMs)
effectiveSize( McmcDepth1$Sol ) 
# 2-way interaction between depth and species richness 
# In the analyses of Hauffe et al., the interaction is significant
# and it does improve the model fit (DIC)
McmcDepth2 <- MCMCglmm(cbind(Interaction.1, Environment.1) ~ 
                         trait:Depth + trait:SpeciesRichness + trait:Depth:SpeciesRichness, 
                       data = Res, rcov=~us(trait):units, 
                       family = rep("gaussian", 2),
                       nitt = 60000, thin = 40, burnin = 10000, 
                       verbose = FALSE)
summary(McmcDepth2)
autocorr.plot( McmcDepth2$Sol ) 
effectiveSize( McmcDepth2$Sol )

# Check spatial autocorrelation
# Prediction
# It is not possible to use predict with newdata (as in regular base:::glm)
# That is why I cannot plot a single regression line showing how process importance 
# changes with depth or species richness
PredictMcmcDepth <- aptInv(matrix(predict(McmcDepth2), nc = 2, byrow = FALSE))[,3:1]
ResidualsMcmcDepth <- Res[,c("Dispersal", "Environment", "Interaction")] - PredictMcmcDepth
# No way to test three-dimensional spatial autocorrelation
# Spatial autocorrelation in x and y axes:
MantelCorlgDepth <- mantel.correlog(dist(ResidualsMcmcDepth), 
                                    dist(Localities[,c("Eastings","Northings")]), nperm = 999)
# Only first lag distance with significant 
# but very little autocorrelation (rM = 0.02)
MantelCorlgDepth 
# Spatial autocorrelation in z axis:
MantelCorlgDepth <- mantel.correlog(dist(ResidualsMcmcDepth), 
                                    dist(Localities[,"Depth"]), nperm = 999)
# Only second lag distance with significant autocorrelation (rM = 0.06)
MantelCorlgDepth

# Check modeled trend in process-importance
layout(matrix(1:6, nr = 2, nc = 3, byrow = TRUE))
plot(PredictMcmcDepth[,1] ~ Res$Depth, xlab = "Depth (m)", 
     ylab = "Dispersal limitation importance")
plot(PredictMcmcDepth[,2] ~ Res$Depth, xlab = "Depth (m)", 
     ylab = "Environmental filtering importance")
plot(PredictMcmcDepth[,3] ~ Res$Depth, xlab = "Depth (m)", 
     ylab = "Species interaction importance")
plot(PredictMcmcDepth[,1] ~ Res$SpeciesRichness, xlab = "Species Richness", 
     ylab = "Dispersal limitation importance")
plot(PredictMcmcDepth[,2] ~ Res$SpeciesRichness, xlab = "Species Richness", 
     ylab = "Environmental filtering importance")
plot(PredictMcmcDepth[,3] ~ Res$SpeciesRichness, xlab = "Species Richness", 
     ylab = "Species interaction importance")

# Correlation of process-importance with eco-zones (Do processes change with eco-zones?)
McmcEcoZone1 <- MCMCglmm(cbind(Interaction.1, Environment.1) ~
                           trait:GeoClus + trait:SpeciesRichness, 
                         data = Res, rcov=~us(trait):units, 
                         family = rep("gaussian", 2),
                         nitt = 30000, thin = 20, burnin = 5000, 
                         verbose = FALSE)
summary(McmcEcoZone1)
layout(matrix(1:16, 4, 4))
autocorr.plot( McmcEcoZone1$Sol, auto.layout = FALSE ) 
effectiveSize( McmcEcoZone1$Sol )
# 2-way interaction between eco-zones and species richness 
# In the analyses of Hauffe et al., the interaction is significant
# and it does improve the model fit (DIC)
McmcEcoZone2 <- MCMCglmm(cbind(Interaction.1, Environment.1) ~
                           trait:GeoClus + trait:SpeciesRichness + trait:GeoClus:SpeciesRichness, 
                         data = Res, rcov=~us(trait):units, 
                         family = rep("gaussian", 2),
                         nitt = 60000, thin = 40, burnin = 10000, 
                         verbose = FALSE)
summary(McmcEcoZone2)
layout(matrix(1:30, 5, 6))
autocorr.plot( McmcEcoZone2$Sol, auto.layout = FALSE ) 
effectiveSize( McmcEcoZone2$Sol )

# Check spatial autocorrelation
PredictMcmcEcoZone <- aptInv(matrix(predict(McmcEcoZone2), nc = 2, byrow = FALSE))[,3:1]
ResidualsMcmcEcoZone <- Res[,c("Dispersal", "Environment", "Interaction")] - PredictMcmcEcoZone
# Spatial autocorrelation in x and y axes:
MantelCorlgEcoZone <- mantel.correlog(dist(ResidualsMcmcEcoZone), 
                                      dist(Localities[,c("Eastings","Northings")]), nperm = 999)
# No significant autocorrelation
MantelCorlgEcoZone 
# Spatial autocorrelation in z axis:
MantelCorlgEcoZone <- mantel.correlog(dist(ResidualsMcmcEcoZone), 
                                      dist(Localities[,"Depth"]), nperm = 999)
# Only second and 6th lag distance with significant 
# but very low autocorrelation (rM = 0.06)
MantelCorlgEcoZone

# Check modeled trend in process-importance
layout(matrix(1:6, nr = 2, nc = 3, byrow = TRUE))
par(las = 2, mar = c(8.1, 4.1, 0.1, 0.1))
boxplot(PredictMcmcEcoZone[,1] ~ Res$GeoClus, 
        ylab = "Dispersal limitation importance", ylim = c(0,1))
boxplot(PredictMcmcEcoZone[,2] ~ Res$GeoClus, 
        ylab = "Environmental filtering importance", ylim = c(0,1))
boxplot(PredictMcmcEcoZone[,3] ~ Res$GeoClus, 
        ylab = "Species interaction importance", ylim = c(0,1))
par(las = 2, mar = c(4.1, 4.1, 3.1, 0.1))
plot(PredictMcmcEcoZone[,1] ~ Res$SpeciesRichness, xlab = "Species Richness", 
     ylab = "Dispersal limitation importance", pch = 16, col = as.factor(Res$GeoClus))
plot(PredictMcmcEcoZone[,2] ~ Res$SpeciesRichness, xlab = "Species Richness", 
     ylab = "Environmental filtering importance", pch = 16, col = as.factor(Res$GeoClus))
plot(PredictMcmcEcoZone[,3] ~ Res$SpeciesRichness, xlab = "Species Richness", 
     ylab = "Species interaction importance", pch = 16, col = as.factor(Res$GeoClus))
