
# coding: utf-8

# In[1]:

# python packages 
import numpy as np
import datetime
import netCDF4
import sys
import math 
from pandas import DataFrame 
from collections import Counter

# python packages to handle R functions
import rpy2.robjects as robjects
import rpy2.robjects.packages as rpackages # import rpy2's package module
from rpy2.robjects.packages import importr # import R's utility package
# This is the command to call R: r('R_expression')
r = robjects.r
# Some R packages
base = importr('base')
psych = importr('psych')
car = importr('car')
reshape2 = importr('reshape2')
ggplot2 = importr('ggplot2')
mgcv = importr('mgcv')
utils = importr('utils')
importr("data.table")
# select a mirror for R packages
utils.chooseCRANmirror(ind=1) # select the first mirror in the list
from rpy2.robjects import FloatVector
import rpy2.robjects.numpy2ri # For the conversion of numpy objects into R 
rpy2.robjects.numpy2ri.activate()


# In[3]:

#East Eurasia with 1000 subsamples 
def gam_1000subsample_ea_east(dataforr_ea_east):

    columns=["TCF", "MAR", "Mean_Spring_SM", "Mean_Tmin", "PZI", "FF", "GDD0", "PTD", "ST", "State","Elev", "Lon", "Lat" ]
    dataframe_ea_east = DataFrame(dataforr_ea_east[:,0:13], columns=columns[0:13])

    r.assign('datainr_ea_east',dataforr_ea_east); # However, this is a matrix and we need a data.frame object in R
    r('data_ea_east <- data.table(datainr_ea_east)');
    r('names(data_ea_east) <- c("VCFF", "MAR", "MSSM", "MTmin", "PZI", "FF", "GDD0", "PTD", "ST", "State","Elev", "Lon", "Lat" )')
    #r('print(data_ea_east)')


    # with subsampling
    index = np.random.random_integers(0, len(dataforr_ea_east[:,0])-1, 1000)
    subsample_forr_ea_east = np.transpose(np.array([np.take(dataforr_ea_east[:,0],index), np.take(dataforr_ea_east[:,1],index),                                            np.take(dataforr_ea_east[:,2],index), np.take(dataforr_ea_east[:,3],index),                                                 np.take(dataforr_ea_east[:,4],index),                                           np.take(dataforr_ea_east[:,5],index), np.take(dataforr_ea_east[:,6],index),                                           np.take(dataforr_ea_east[:,7],index), np.take(dataforr_ea_east[:,8],index)])) 
    sub_lons_ea_east = np.take(dataforr_ea_east[:,-2], index)
    sub_lats_ea_east = np.take(dataforr_ea_east[:,-1], index)
    r.assign('subinr_ea_east',subsample_forr_ea_east); # However, this is a matrix and we need a data.frame object in R
    r('sub_ea_east <- data.table(subinr_ea_east)');
    r('names(sub_ea_east) <- c("TCF", "MAR", "MSSM", "MTmin", "PZI", "FF", "GDD0", "PTD", "ST")')

    r('mod_gam_sub <- gam(TCF ~ s(MAR), data = sub_ea_east)'); # gam to explain VCFF using s(MAR) 
    print r('summary(mod_gam_sub)')
    

    r('mod_gam_sub <- gam(TCF ~ s(MSSM), data = sub_ea_east)'); 
    print r('summary(mod_gam_sub)')
    

    r('mod_gam_sub <- gam(TCF ~ s(MTmin), data = sub_ea_east)'); 
    print r('summary(mod_gam_sub)')
    

    r('mod_gam_sub <- gam(TCF ~ s(PZI), data = sub_ea_east)');
    print r('summary(mod_gam_sub)')
    

    r('mod_gam_sub <- gam(TCF ~ s(FF), data = sub_ea_east)'); 
    print r('summary(mod_gam_sub)')
    

    r('mod_gam_sub <- gam(TCF ~ s(GDD0), data = sub_ea_east)');
    print r('summary(mod_gam_sub)')
    

    r('mod_gam_sub <- gam(TCF ~ s(PTD), data = sub_ea_east)'); 
    print r('summary(mod_gam_sub)')
    

    r('mod_gam_sub <- gam(TCF ~ s(ST), data = sub_ea_east)'); 
    print r('summary(mod_gam_sub)')
    
    r('mod_gam_sub <- gam(TCF ~ s(MAR)+s(MSSM), data = sub_ea_east)'); 
    print r('summary(mod_gam_sub)')
    

    r('mod_gam_sub <- gam(TCF ~ s(MTmin)+s(GDD0), data = sub_ea_east)'); 
    print r('summary(mod_gam_sub)')
    

    r('mod_gam_sub <- gam(TCF ~ s(PZI)+s(FF), data = sub_ea_east)'); 
    print r('summary(mod_gam_sub)')
    

    r('mod_gam_sub <- gam(TCF ~ s(MAR)+s(MSSM)+s(MTmin)+s(PZI)+s(FF)+s(GDD0)+s(ST), data = sub_ea_east)'); 
    print r('summary(mod_gam_sub)')
    
    # to print to a txt, substitute print(r('summary(mod_gam_sub)')) with 
    # path = 'path_to_txt'
    #f = open('path/gam_ea_east_all.txt', 'a')
    #print >> f, "Northern East Eurasia - 1000 Subsampling",'\n',r('summary(mod_gam)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam)')  # or f.write('...\n') #from second on

    #f.close()
    
    
    return

def gam_1000subsample_ea_west(dataforr_ea_west):

    columns=["TCF", "MAR", "Mean_Spring_SM", "Mean_Tmin", "PZI", "FF", "GDD0", "PTD", "ST", "State","Elev", "Lon", "Lat" ]
    dataframe_ea_west = DataFrame(dataforr_ea_west[:,0:13], columns=columns[0:13])

    r.assign('datainr_ea_west',dataforr_ea_west); # However, this is a matrix and we need a data.frame object in R
    r('data_ea_west <- data.table(datainr_ea_west)');
    r('names(data_ea_west) <- c("TCF", "MAR", "MSSM", "MTmin", "PZI", "FF", "GDD0", "PTD", "ST", "State","Elev", "Lon", "Lat" )')
    #r('print(data_ea_west)')


    # with subsampling
    index = np.random.random_integers(0, len(dataforr_ea_west[:,0])-1, 1000)
    subsample_forr_ea_west = np.transpose(np.array([np.take(dataforr_ea_west[:,0],index), np.take(dataforr_ea_west[:,1],index),                                                np.take(dataforr_ea_west[:,2],index), np.take(dataforr_ea_west[:,3],index),                                                     np.take(dataforr_ea_west[:,4],index),                                               np.take(dataforr_ea_west[:,5],index), np.take(dataforr_ea_west[:,6],index),                                               np.take(dataforr_ea_west[:,7],index), np.take(dataforr_ea_west[:,8],index)])) 
    sub_lons_ea_west = np.take(dataforr_ea_west[:,-2], index)
    sub_lats_ea_west = np.take(dataforr_ea_west[:,-1], index)
    r.assign('subinr_ea_west',subsample_forr_ea_west); # However, this is a matrix and we need a data.frame object in R
    r('sub_ea_west <- data.table(subinr_ea_west)');
    r('names(sub_ea_west) <- c("VCFF", "MAR", "MSSM", "MTmin", "PZI", "FF", "GDD0", "PTD", "ST")')

    r('mod_gam_sub <- gam(TCF ~ s(MAR), data = sub_ea_west)'); 
    print r('summary(mod_gam_sub)') # print full summary
    # to print on txt use the following 
    # f = open('path_to_txt/gam_ea_west_single_1000_MAR.txt', 'a')
    #for full output
    #print >> f, "Northern West Eurasia - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    # for deviance only output 
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(MSSM), data = sub_ea_west)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_ea_west_single_1000_MASM.txt', 'a')
    #print >> f, "Northern West Eurasia - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(MTmin), data = sub_ea_west)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_ea_west_single_1000_MTmin.txt', 'a')
    #print >> f, "Northern West Eurasia - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(PZI), data = sub_ea_west)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_ea_west_single_1000_PZI.txt', 'a')
    #print >> f, "Northern West Eurasia - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(FF), data = sub_ea_west)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_ea_west_single_1000_FF.txt', 'a')
    #print >> f, "Northern West Eurasia - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(GDD0), data = sub_ea_west)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_ea_west_single_1000_GDD0.txt', 'a')
    #print >> f, "Northern West Eurasia - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(PTD), data = sub_ea_west)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_ea_west_single_1000_PTD.txt', 'a')
    #print >> f, "Northern West Eurasia - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()
    
    k_ST = np.unique(subsample_forr_ea_west[:,8]).shape[0] #ST has less than 10 unique values, so we need to reassign the maximum number of smooths
    r.assign('k_ST',k_ST);
    r('mod_gam_sub <- gam(TCF ~ s(ST, k=k_ST), data = sub_ea_west)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_ea_west_single_1000_ST.txt', 'a')
    #print >> f, "Northern West Eurasia - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first #
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(MAR)+s(MSSM), data = sub_ea_west)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_ea_west_double_1000_MAR+MASM.txt', 'a')
    #print >> f, "Northern West Eurasia - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(MTmin)+s(GDD0), data = sub_ea_west)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_ea_west_double_1000_MTmin+GDD0.txt', 'a')
    #print >> f, "Northern West Eurasia - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(PZI)+s(FF), data = sub_ea_west)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_ea_west_double_1000_PZI+FF.txt', 'a')
    #print >> f, "Northern West Eurasia - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(MAR)+s(MSSM)+s(MTmin)+s(PZI)+s(FF)+s(GDD0)+s(ST, k=k_ST), data = sub_ea_west)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_ea_west_all_1000.txt', 'a')
    #print >> f, "Northern West Eurasia - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()
    
    return


def gam_1000subsample_na_west(dataforr_na_west):

    columns=["TCF", "MAR", "Mean_Spring_SM", "Mean_Tmin", "PZI", "FF", "GDD0", "PTD", "ST", "State","Elev", "Lon", "Lat" ]
    dataframe_na_west = DataFrame(dataforr_na_west[:,0:13], columns=columns[0:13])

    r.assign('datainr_na_west',dataforr_na_west); # However, this is a matrix and we need a data.frame object in R
    r('data_na_west <- data.table(datainr_na_west)');
    r('names(data_na_west) <- c("TCF", "MAR", "MSSM", "MTmin", "PZI", "FF", "GDD0", "PTD", "ST", "State","Elev", "Lon", "Lat" )')
    #r('print(data_na_west)')


    # with subsampling
    index = np.random.random_integers(0, len(dataforr_na_west[:,0])-1, 1000)
    subsample_forr_na_west = np.transpose(np.array([np.take(dataforr_na_west[:,0],index), np.take(dataforr_na_west[:,1],index),                                                np.take(dataforr_na_west[:,2],index), np.take(dataforr_na_west[:,3],index),                                                     np.take(dataforr_na_west[:,4],index),                                               np.take(dataforr_na_west[:,5],index), np.take(dataforr_na_west[:,6],index),                                               np.take(dataforr_na_west[:,7],index), np.take(dataforr_na_west[:,8],index)])) 
    sub_lons_na_west = np.take(dataforr_na_west[:,-2], index)
    sub_lats_na_west = np.take(dataforr_na_west[:,-1], index)
    r.assign('subinr_na_west',subsample_forr_na_west); # However, this is a matrix and we need a data.frame object in R
    r('sub_na_west <- data.table(subinr_na_west)');
    r('names(sub_na_west) <- c("TCF", "MAR", "MSSM", "MTmin", "PZI", "FF", "GDD0", "PTD", "ST")')

    r('mod_gam_sub <- gam(TCF ~ s(MAR), data = sub_na_west)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_na_west_single_1000_MAR.txt', 'a')
    #print >> f, "Western North America - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(MSSM), data = sub_na_west)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_na_west_single_1000_MSSM.txt', 'a')
    #print >> f, "Western North America - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(MTmin), data = sub_na_west)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_na_west_single_1000_MTmin.txt', 'a')
    #print >> f, "Western North America - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(PZI), data = sub_na_west)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_na_west_single_1000_PZI.txt', 'a')
    #print >> f, "Western North America - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(FF), data = sub_na_west)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_na_west_single_1000_FF.txt', 'a')
    #print >> f, "Western North America - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(GDD0), data = sub_na_west)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_na_west_single_1000_GDD0.txt', 'a')
    #print >> f, "Western North America - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(PTD), data = sub_na_west)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_na_west_single_1000_PTD.txt', 'a')
    #print >> f, "Western North America - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(ST), data = sub_na_west)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_na_west_single_1000_ST.txt', 'a')
    #print >> f, "Western North America - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first #
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(MAR)+s(MSSM), data = sub_na_west)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_na_west_double_1000_MAR+MSSM.txt', 'a')
    #print >> f, "Western North America - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(MTmin)+s(GDD0), data = sub_na_west)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_na_west_double_1000_MTmin+GDD0.txt', 'a')
    #print >> f, "Western North America - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(PZI)+s(FF), data = sub_na_west)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_na_west_double_1000_PZI+FF.txt', 'a')
    #print >> f, "Western North America - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(MAR)+s(MSSM)+s(MTmin)+s(PZI)+s(FF)+s(GDD0)+s(ST), data = sub_na_west)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_na_west_all_1000.txt', 'a')
    #print >> f, "Western North America - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()
    
    return


#East North America with 1000 subsamples 
#dataforr_na_east = np.loadtxt('boreal_na_east_database_canadianupdate.txt')
def gam_1000subsample_na_east(dataforr_na_east):
     
#("boreal_na_west_database_canadianupdate.txt")
#'boreal_na_east_database.txt'
#boreal_ea_west_russia_database#
#boreal_na_east_database_canadianupdate
    columns=["TCF", "MAR", "Mean_Spring_SM", "Mean_Tmin", "PZI", "FF", "GDD0", "PTD", "ST", "State","Elev", "Lon", "Lat" ]
    dataframe_na_east = DataFrame(dataforr_na_east[:,0:13], columns=columns[0:13])

    r.assign('datainr_na_east',dataforr_na_east); # However, this is a matrix and we need a data.frame object in R
    r('data_na_east <- data.table(datainr_na_east)');
    r('names(data_na_east) <- c("TCF", "MAR", "MSSM", "MTmin", "PZI", "FF", "GDD0", "PTD", "ST", "State","Elev", "Lon", "Lat" )')
    #r('print(data_na_east)')


    # with subsampling
    index = np.random.random_integers(0, len(dataforr_na_east[:,0])-1, 1000)
    subsample_forr_na_east = np.transpose(np.array([np.take(dataforr_na_east[:,0],index), np.take(dataforr_na_east[:,1],index),                                                np.take(dataforr_na_east[:,2],index), np.take(dataforr_na_east[:,3],index),                                                     np.take(dataforr_na_east[:,4],index),                                               np.take(dataforr_na_east[:,5],index), np.take(dataforr_na_east[:,6],index),                                               np.take(dataforr_na_east[:,7],index), np.take(dataforr_na_east[:,8],index)])) 
    sub_lons_na_east = np.take(dataforr_na_east[:,-2], index)
    sub_lats_na_east = np.take(dataforr_na_east[:,-1], index)
    r.assign('subinr_na_east',subsample_forr_na_east); # However, this is a matrix and we need a data.frame object in R
    r('sub_na_east <- data.table(subinr_na_east)');
    r('names(sub_na_east) <- c("TCF", "MAR", "MSSM", "MTmin", "PZI", "FF", "GDD0", "PTD", "ST")')

    r('mod_gam_sub <- gam(TCF ~ s(MAR), data = sub_na_east)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_na_east_single_1000_MAR.txt', 'a')
    #print >> f, "Eastern North America - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(MSSM), data = sub_na_east)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_na_east_single_1000_MSSM.txt', 'a')
    #print >> f, "Eastern North America - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(MTmin), data = sub_na_east)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_na_east_single_1000_MTmin.txt', 'a')
    #print >> f, "Eastern North America - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(PZI), data = sub_na_east)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_na_east_single_1000_PZI.txt', 'a')
    #print >> f, "Eastern North America - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(FF), data = sub_na_east)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_na_east_single_1000_FF.txt', 'a')
    #print >> f, "Eastern North America - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(GDD0), data = sub_na_east)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_na_east_single_1000_GDD0.txt', 'a')
    #print >> f, "Eastern North America - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(PTD), data = sub_na_east)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_na_east_single_1000_PTD.txt', 'a')
    #print >> f, "Eastern North America - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(ST), data = sub_na_east)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_na_east_single_1000_ST.txt', 'a')
    #print >> f, "Eastern North America - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first #
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(MAR)+s(MSSM), data = sub_na_east)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_na_east_double_1000_MAR+MSSM.txt', 'a')
    #print >> f, "Eastern North America - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(MTmin)+s(GDD0), data = sub_na_east)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_na_east_double_1000_MTmin+GDD0.txt', 'a')
    #print >> f, "Eastern North America - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(PZI)+s(FF), data = sub_na_east)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_na_east_double_1000_PZI+FF.txt', 'a')
    #print >> f, "Eastern North America - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()

    r('mod_gam_sub <- gam(TCF ~ s(MAR)+s(MSSM)+s(MTmin)+s(PZI)+s(FF)+s(GDD0)+s(ST), data = sub_na_east)'); 
    print r('summary(mod_gam_sub)')
    #f = open('path_to_txt/gam_na_east_all_1000.txt', 'a')
    #print >> f, "Eastern North America - Subsample n=1000",'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') # for first 
    #print >> f,'\n',r('summary(mod_gam_sub)')  # or f.write('...\n') #from second on
    #print >> f,'\n',r('summary(mod_gam_sub)$dev.expl')
    #f.close()
    
    return


# In[2]:

# dataset to be used
#East Eurasia without subsamples

dataforr_ea_east = np.loadtxt('boreal_ea_east_database.txt') #eventually add path to database. 
columns=["TCF", "MAR", "Mean_Spring_SM", "Mean_Tmin", "PZI", "FF", "GDD0", "PTD", "ST", "State","Elev", "Lon", "Lat" ]
dataframe_ea_east = DataFrame(dataforr_ea_east[:,0:13], columns=columns[0:13]) # transformed into a dataframe, as needed for R

# assign dataframe to R
r.assign('datainr_ea_east',dataforr_ea_east); # However, this is a matrix and we need a data.frame object in R
r('data_ea_east <- data.table(datainr_ea_east)'); #transform it to a data.table for fast handling
r('names(data_ea_east) <- c("TCF", "MAR", "MSSM", "MTmin", "PZI", "FF", "GDD0", "PTD", "ST", "State","Elev", "Lon", "Lat" )');
#r('print(data_ea_east)')

# no subsampling 
r('mod_gam <- gam(TCF ~ s(MAR)+s(MTmin)+s(MSSM)+s(ST)+s(PZI)+s(FF), data = data_ea_east)'); # explains TCF using specified variables
print r('summary(mod_gam)')
# let's print to a txt
#f = open('/pathtotxt/gam_ea_east_all.txt', 'a')
#print >> f, "Northern East Eurasia - No Subsampling",'\n',r('summary(mod_gam)')  # or f.write('...\n') # for first 
#print >> f,'\n',r('summary(mod_gam)')  # or f.write('...\n') #from second on

#f.close()


# In[5]:

#West Eurasia without subsamples 
dataforr_ea_west = np.loadtxt('boreal_ea_west_russia_database.txt') 
columns=["TCF", "MAR", "Mean_Spring_SM", "Mean_Tmin", "PZI", "FF", "GDD0", "PTD", "ST", "State","Elev", "Lon", "Lat" ]
dataframe_ea_west = DataFrame(dataforr_ea_west[:,0:13], columns=columns[0:13])

r.assign('datainr_ea_west',dataforr_ea_west); # However, this is a matrix and we need a data.frame object in R
r('data_ea_west <- data.table(datainr_ea_west)');
r('names(data_ea_west) <- c("TCF", "MAR", "MSSM", "MTmin", "PZI", "FF", "GDD0", "PTD", "ST", "State","Elev", "Lon", "Lat" )')
#r('print(data_ea_west)')

# no subsample for West Eurasia
k_ST = np.unique(dataforr_ea_west[:,8]).shape[0] #ST has only 9 different values in this dataset. gam autoamtically
# assign a maximum number of degree of freedom k = 10, since 9<10 we need to specify in the smooth that we want at maximum 
# k=9 dof
r.assign('k_ST', k_ST);

r('mod_gam <- gam(TCF ~ s(MAR)+s(FF)+s(MTmin)+s(PZI)+s(ST,k=k_ST)+s(MSSM)+s(GDD0)+s(PTD), data = data_ea_west)'); #+s(MASM)+s(Tmin)+s(PZI)+s(FF)+s(PTD)+s(GDD0)
print r('summary(mod_gam)')
# let's print to a txt
#f = open('path_to_txt/gam_ea_west_settuple.txt', 'a')
#print >> f, "Northern West Eurasia - No Subsampling",'\n',r('summary(mod_gam)')  # or f.write('...\n') # for first 
#print >> f,'\n',r('summary(mod_gam)')  # or f.write('...\n') #from second on

#f.close()


# In[6]:

#West North America without subsamples
dataforr_na_west = np.loadtxt('boreal_na_west_database_canadianupdate.txt') 
columns=["TCF", "MAR", "Mean_Spring_SM", "Mean_Tmin", "PZI", "FF", "GDD0", "PTD", "ST", "State","Elev", "Lon", "Lat" ]
dataframe_na_west = DataFrame(dataforr_na_west[:,0:13], columns=columns[0:13])

r.assign('datainr_na_west',dataforr_na_west); # However, this is a matrix and we need a data.frame object in R
r('data_na_west <- data.table(datainr_na_west)');
r('names(data_na_west) <- c("TCF", "MAR", "MSSM", "MTmin", "PZI", "FF", "GDD0", "PTD", "ST", "State","Elev", "Lon", "Lat" )')
#r('print(data_na_west)')

#no subsample 
r('mod_gam <- gam(TCF ~ s(MAR)+s(FF)+s(MTmin)+s(PZI)+s(ST)+s(MSSM)+s(GDD0)+s(PTD), data = data_na_west)'); 
print r('summary(mod_gam)')
# let's print to a txt
#f = open('path_to_txt/gam_na_west_settuple.txt', 'a')
#print >> f, "Western North America - No Subsampling",'\n',r('summary(mod_gam)')  # or f.write('...\n') # for first 
#print >> f,'\n',r('summary(mod_gam)')  # or f.write('...\n') #from second on

#f.close()


# In[7]:

#East North America without subsamples
dataforr_na_east = np.loadtxt('boreal_na_east_database_canadianupdate.txt')
columns=["TCF", "MAR", "Mean_Spring_SM", "Mean_Tmin", "PZI", "FF", "GDD0", "PTD", "ST", "State","Elev", "Lon", "Lat" ]
dataframe_na_east = DataFrame(dataforr_na_east[:,0:13], columns=columns[0:13])
r.assign('datainr_na_east',dataforr_na_east); # However, this is a matrix and we need a data.frame object in R
r('data_na_east <- data.table(datainr_na_east)');
r('names(data_na_east) <- c("TCF", "MAR", "MSSM", "MTmin", "PZI", "FF", "GDD0", "PTD", "ST", "State","Elev", "Lon", "Lat" )')

# no subsample
r('mod_gam <- gam(TCF ~ s(MTmin)+s(MAR)+s(PZI), data = data_na_east)'); #+s(MSSM)+s(Tmin)+s(PZI)+s(FF)+s(PTD)+s(GDD0)
print r('summary(mod_gam)')
# let's print to a txt
#f = open('path_to_txt/gam_na_east_settuple.txt', 'a')
#print >> f, "Eastern North America - No Subsampling",'\n',r('summary(mod_gam)')  # or f.write('...\n') # for first 
#print >> f,'\n',r('summary(mod_gam)')  # or f.write('...\n') #from second on

#f.close()


# In[92]:

#All regions with 1000 gridcells subsamples


# In[8]:

n_samples = 30 # number of samples to be taken
for i in range(n_samples):
    gam_1000subsample_ea_east(dataforr_ea_east)
    gam_1000subsample_ea_west(dataforr_ea_west)


# In[10]:

for i in range(n_samples):
    gam_1000subsample_na_east(dataforr_na_east)
    gam_1000subsample_na_west(dataforr_na_west)


# In[ ]:



