
# coding: utf-8

# In[1]:

import numpy as np
import datetime
import netCDF4
import sys
import math 

# sys.path.append("/Users/Beniamino/Documents/OneDrive/MPI-M/iPython_Notebooks/") # Edited .zshrc profile with PYTHONPATH 
from pandas import DataFrame 
from collections import Counter



# In[2]:


# path_to_dataset = /Users/Beniamino/Documents/Datasets/Boreal
# execute this only if you want to do the basemap plot

#nc005ea_east = netCDF4.Dataset('/path_to_dataset/NEA_East_50_shift_Bor_MOD44B_V5__VCFFplusVCF__2010__0.05deg__UHAM-ICDC.nc')
ncea_east = netCDF4.Dataset('/path_to_dataset/NEA_East_50_shift_Bor_MOD44B_V5__VCFFplusVCF__2010__0.5deg__UHAM-ICDC.nc')
nc2ea_east = netCDF4.Dataset('/path_to_dataset/NEA_East_50_shift_Bor_MAR_cru_ts3.22.1998.2010.pre.dat.nc')
nc3ea_east = netCDF4.Dataset('/path_to_dataset/NEA_East_50_seasonal_soilw.mon.mean.1998-2010.v2.nc')
#nc3ea_east = netCDF4.Dataset('/path_to_dataset/NEA_East_shift_min_bor_yearmean_soilw.mon.mean.1998-2010.v2.nc')
nc4ea_east = netCDF4.Dataset('/path_to_dataset/NEA_East_50_shift_0_bor_M6H_0.5_tmin.2m.gauss.1998-2010.nc')
nc5ea_east = netCDF4.Dataset('/path_to_dataset/NEA_East_50_shift_bor_GLC2000_0.5.nc')
nc6ea_east = netCDF4.Dataset('/path_to_dataset/NEA_East_50_shift_0_bor_0.5_PZI.nc')
nc7ea_east = netCDF4.Dataset('/path_to_dataset/NEA_East_50_shift_masked_bor_0.5_Year_Fire_Frequency_1996-2012.nc')
# nc8ea_east = netCDF4.Dataset('/path_to_dataset/NEA_East_shift_bor_0.5_Burned_1996-2012_SUM.nc')
nc9ea_east = netCDF4.Dataset('/path_to_dataset/NEA_East_50_shift_Bor_ReBil_GloElev_05.nc')
nc10ea_east = netCDF4.Dataset('/path_to_dataset/NEA_East_50_Boreal_Mean_GDD0_1998-2010_0.5.nc')
nc11ea_east = netCDF4.Dataset('/path_to_dataset/NEA_East_50_Bor_Ease25_Northen_Emisphere_Mean_Thawing_Depth_1901-2002.nc')
nc12ea_east = netCDF4.Dataset('/Users/Beniamino/Documents/Datasets/Soil_texture/NEA_East_50_soil_texture_0.5.nc')
nc13ea_east = netCDF4.Dataset('/path_to_dataset/Terra_NEA_East_50_MODIS_0.5_C6_GSSNDVI_2000_2015_Decadal_Trend_SigOnly.nc')
#nc13ea_east = netCDF4.Dataset('/path_to_dataset/Terra_NEA_East_50_MODIS_0.5_C6_GSSLAI_2000_2015_Decadal_Trend_SigOnly.nc')

#nc005ea_west = netCDF4.Dataset('/path_to_dataset/NEA_West_Russia_50_shift_Bor_MOD44B_V5__VCFFplusVCF__2010__0.05deg__UHAM-ICDC.nc')
ncea_west = netCDF4.Dataset('/path_to_dataset/NEA_West_Russia_50_shift_Bor_MOD44B_V5__VCFFplusVCF__2010__0.5deg__UHAM-ICDC.nc')
nc2ea_west = netCDF4.Dataset('/path_to_dataset/NEA_West_Russia_50_shift_Bor_MAR_cru_ts3.22.1998.2010.pre.dat.nc')
nc3ea_west = netCDF4.Dataset('/path_to_dataset/NEA_West_Russia_50_seasonal_soilw.mon.mean.1998-2010.v2.nc')
#nc3ea_west = netCDF4.Dataset('/path_to_dataset/NEA_West_Russia_shift_min_bor_yearmean_soilw.mon.mean.1998-2010.v2.nc')
nc4ea_west = netCDF4.Dataset('/path_to_dataset/NEA_West_Russia_50_shift_0_bor_M6H_0.5_tmin.2m.gauss.1998-2010.nc')
nc5ea_west = netCDF4.Dataset('/path_to_dataset/NEA_West_Russia_50_shift_bor_GLC2000_0.5.nc')
nc6ea_west = netCDF4.Dataset('/path_to_dataset/NEA_West_Russia_50_shift_0_bor_0.5_PZI.nc')
nc7ea_west = netCDF4.Dataset('/path_to_dataset/NEA_West_Russia_50_shift_masked_bor_0.5_Year_Fire_Frequency_1996-2012.nc')
# nc8ea_west = netCDF4.Dataset('/path_to_dataset/NEA_West_Russia_shift_bor_0.5_Burned_1996-2012_SUM.nc')
nc9ea_west = netCDF4.Dataset('/path_to_dataset/NEA_West_Russia_50_shift_Bor_ReBil_GloElev_05.nc')
nc10ea_west = netCDF4.Dataset('/path_to_dataset/NEA_West_Russia_50_Boreal_Mean_GDD0_1998-2010_0.5.nc')
nc11ea_west = netCDF4.Dataset('/path_to_dataset/NEA_West_Russia_50_Bor_Ease25_Northen_Emisphere_Mean_Thawing_Depth_1901-2002.nc')
nc12ea_west = netCDF4.Dataset('/Users/Beniamino/Documents/Datasets/Soil_texture/NEA_West_Russia_50_soil_texture_0.5.nc')
nc13ea_west = netCDF4.Dataset('/path_to_dataset/Terra_NEA_West_Russia_50_MODIS_0.5_C6_GSSNDVI_2000_2015_Decadal_Trend_SigOnly.nc')
#nc13ea_west = netCDF4.Dataset('/path_to_dataset/Terra_NEA_West_Russia_50_MODIS_0.5_C6_GSSLAI_2000_2015_Decadal_Trend_SigOnly.nc')

lon_ea_east = ncea_east.variables['lon']#.tolist()
lon_ea_west = ncea_west.variables['lon']

lat_ea_east = ncea_east.variables['lat']
lat_ea_west = ncea_west.variables['lat']


#lon005ea_east = nc005ea_east.variables['lon']#.tolist()
#lon005ea_west = nc005ea_west.variables['lon']

#lat005ea_east = nc005ea_east.variables['lat']


vcff_ea_east = ncea_east.variables['vcff'][0]
#vcff005_ea_east = nc005ea_east.variables['vcff'][0]
pcp_ea_east = nc2ea_east.variables['pre'][0]
sm_ea_east = nc3ea_east.variables['soilw']
tm_ea_east = nc4ea_east.variables['tmin'][0]-273.15
glc_ea_east = nc5ea_east.variables['var'][:]
pzi_ea_east = nc6ea_east.variables['pzi'][:]
ff_ea_east = nc7ea_east.variables['firefreq'][:]
# ba_ea_east = nc8ea_east.variables['burnedarea_east'][:]
elev_ea_east = nc9ea_east.variables['elev'][:]
gdd0_ea_east = nc10ea_east.variables['gdd0'][0]
ptd_ea_east = nc11ea_east.variables['thw_depth'][0]
st_ea_east = nc12ea_east.variables['var600']
lai_ea_east = nc13ea_east.variables['t_trend'][:]

vcff_ea_west = ncea_west.variables['vcff'][0]
#vcff005_ea_west = nc005ea_west.variables['vcff'][0]
pcp_ea_west = nc2ea_west.variables['pre'][0]
sm_ea_west = nc3ea_west.variables['soilw']
tm_ea_west = nc4ea_west.variables['tmin'][0]-273.15
glc_ea_west = nc5ea_west.variables['var'][:]
pzi_ea_west = nc6ea_west.variables['pzi'][:]
ff_ea_west = nc7ea_west.variables['firefreq'][:]
# ba_ea_west = nc8ea_west.variables['burnedarea'][:]
elev_ea_west = nc9ea_west.variables['elev'][:]
gdd0_ea_west = nc10ea_west.variables['gdd0'][0]
ptd_ea_west = nc11ea_west.variables['thw_depth'][0]
st_ea_west = nc12ea_west.variables['var600']
lai_ea_west = nc13ea_west.variables['t_trend'][:]



picctr = 0


# In[3]:

# create new variable of state accordind to the results of the optimisation
vcf_state_ea_east = np.empty(shape=vcff_ea_east.shape)
for i in range(vcff_ea_east.shape[0]):
    for j in range(vcff_ea_east.shape[1]):
        if vcff_ea_east[i][j]<0: 
            vcf_state_ea_east[i][j] = 0  # null
        elif vcff_ea_east[i][j]<10:
            vcf_state_ea_east[i][j] = 1  # bare, the first peak
        elif vcff_ea_east[i][j]<20:
            vcf_state_ea_east[i][j] = 1  # bare
        elif vcff_ea_east[i][j]<45:
            vcf_state_ea_east[i][j] = 3  # savanna, the third peak
        elif vcff_ea_east[i][j]<100:
            vcf_state_ea_east[i][j] = 4  # forest, the fourth peak
        else:
            vcf_state_ea_east[i][j] = 5  # null


            # create new variable of state accordind to the results of the optimisation
vcf_state_ea_west = np.empty(shape=vcff_ea_west.shape)
for i in range(vcff_ea_west.shape[0]):
    for j in range(vcff_ea_west.shape[1]):
        if vcff_ea_west[i][j]<0: 
            vcf_state_ea_west[i][j] = 0  # null
        elif vcff_ea_west[i][j]<10:
            vcf_state_ea_west[i][j] = 1  # bare, the first peak
        elif vcff_ea_west[i][j]<20:
            vcf_state_ea_west[i][j] = 1  # bare
        elif vcff_ea_west[i][j]<45:
            vcf_state_ea_west[i][j] = 3  # savanna, the third peak
        elif vcff_ea_west[i][j]<100:
            vcf_state_ea_west[i][j] = 4  # forest, the fourth peak
        else:
            vcf_state_ea_west[i][j] = 5  # null

        


# Let's create a dataset with raveled arrays of data
# 

# In[4]:

# no need to execute this egain
# create dataset for R with the raveled arrays of data

## IMPORTANT: changes in numpy -> do np.ma.getdata(np.ravel())

lons_ea_east,lats_ea_east = np.ma.getdata(np.meshgrid(lon_ea_east,lat_ea_east))
arlons_ea_east, arlats_ea_east = np.ma.getdata(np.ravel(lons_ea_east)), np.ma.getdata(np.ravel(lats_ea_east))
arsm_ea_east = np.ma.getdata(np.ravel(sm_ea_east[1]))
arvcff_ea_east = np.ma.getdata(np.ravel(vcff_ea_east))
arpcp_ea_east = np.ma.getdata(np.ravel(pcp_ea_east))
artm_ea_east = np.ma.getdata(np.ravel(tm_ea_east))
arglc_ea_east = np.ma.getdata(np.ravel(glc_ea_east))
arpzi_ea_east = np.ma.getdata(np.ravel(pzi_ea_east))
arff_ea_east = np.ma.getdata(np.ravel(ff_ea_east))
#arba_ea_east = np.ravel(ba_ea_east)
arelev_ea_east = np.ma.getdata(np.ravel(elev_ea_east))
argdd0_ea_east = np.ma.getdata(np.ravel(gdd0_ea_east))
arptd_ea_east = np.ma.getdata(np.ravel(ptd_ea_east))
arstate_ea_east = np.ma.getdata(np.ravel(vcf_state_ea_east))
arst_ea_east = np.ma.getdata(np.ravel(st_ea_east))
arlai_ea_east = np.ma.getdata(np.ravel(lai_ea_east))


count = 0
index = []
for i in range(len(arsm_ea_east)): 
    if (arsm_ea_east[i]<0 or arpcp_ea_east[i]>4000 or arvcff_ea_east[i]>100 or arglc_ea_east[i]>14         or arpzi_ea_east[i]<0 or arff_ea_east[i]<0 or arelev_ea_east[i]>2000 or arelev_ea_east[i]<0 or argdd0_ea_east[i]<0 or        arptd_ea_east[i]<0):
        index.append(i)
        count+=1
data_sm_ea_east = np.delete(arsm_ea_east,index)
data_vcff_ea_east = np.delete(arvcff_ea_east,index)
data_pcp_ea_east = np.delete(arpcp_ea_east,index)
data_tm_ea_east = np.delete(artm_ea_east, index)
data_pzi_ea_east = np.delete(arpzi_ea_east, index)
data_ff_ea_east = np.delete(arff_ea_east, index)
#data_ba_ea_east = np.delete(arba, index)
data_elev_ea_east = np.delete(arelev_ea_east, index)
data_gdd0_ea_east = np.delete(argdd0_ea_east, index)
data_ptd_ea_east = np.delete(arptd_ea_east, index)
data_state_ea_east = np.delete(arstate_ea_east, index)
data_st_ea_east = np.delete(arst_ea_east, index)
data_lai_ea_east = np.delete(arlai_ea_east, index)


data_lons_ea_east, data_lats_ea_east = np.delete(arlons_ea_east,index), np.delete(arlats_ea_east,index)


#dataforr_ea_east = np.transpose(np.array([data_vcff_ea_east, data_pcp_ea_east, data_sm_ea_east, data_tm_ea_east, data_pzi_ea_east,\
#                                     data_ff_ea_east, data_gdd0_ea_east, data_ptd_ea_east, data_st_ea_east, data_state_ea_east, data_lai_ea_east, data_elev_ea_east, data_lons_ea_east, data_lats_ea_east])) 
#dataframe_ea_east_all = DataFrame(dataforr_ea_east[:,0:10], columns=["VCFF", "MAR", "MASM", "Tmin", "PZI", "FF", "GDD0", "PTD", "ST","State"])


# In[5]:

# save the data to a txt that can be reloaded  
# it is executed twice, once including lai, once including ndvi
lai_ea_east = np.loadtxt('boreal_ea_east_database_lai_included.txt')[:,10]  #what is here called lai is actually ndvi, whereas this lai_na_east is the actual lai
dataforr_ea_east = np.transpose(np.array([data_vcff_ea_east, data_pcp_ea_east, data_sm_ea_east, data_tm_ea_east, data_pzi_ea_east,                                     data_ff_ea_east, data_gdd0_ea_east, data_ptd_ea_east, data_st_ea_east, data_state_ea_east, lai_ea_east, data_lai_ea_east, data_elev_ea_east, data_lons_ea_east, data_lats_ea_east])) 

np.savetxt("boreal_ea_east_database_lai_ndvi_included.txt", dataforr_ea_east)


# In[6]:

# no need to execute this egain
# create dataset for R with the raveled arrays of data

## IMPORTANT: changes in numpy -> do np.ma.getdata(np.ravel())

lons_ea_west,lats_ea_west = np.ma.getdata(np.meshgrid(lon_ea_west,lat_ea_west))
arlons_ea_west, arlats_ea_west = np.ma.getdata(np.ravel(lons_ea_west)), np.ma.getdata(np.ravel(lats_ea_west))
arsm_ea_west = np.ma.getdata(np.ravel(sm_ea_west[1]))
arvcff_ea_west = np.ma.getdata(np.ravel(vcff_ea_west))
arpcp_ea_west = np.ma.getdata(np.ravel(pcp_ea_west))
artm_ea_west = np.ma.getdata(np.ravel(tm_ea_west))
arglc_ea_west = np.ma.getdata(np.ravel(glc_ea_west))
arpzi_ea_west = np.ma.getdata(np.ravel(pzi_ea_west))
arff_ea_west = np.ma.getdata(np.ravel(ff_ea_west))
#arba_ea_west = np.ravel(ba_ea_west)
arelev_ea_west = np.ma.getdata(np.ravel(elev_ea_west))
argdd0_ea_west = np.ma.getdata(np.ravel(gdd0_ea_west))
arptd_ea_west = np.ma.getdata(np.ravel(ptd_ea_west))
arstate_ea_west = np.ma.getdata(np.ravel(vcf_state_ea_west))
arst_ea_west = np.ma.getdata(np.ravel(st_ea_west))
arlai_ea_west = np.ma.getdata(np.ravel(lai_ea_west))


count = 0
index = []
for i in range(len(arsm_ea_west)): 
    if (arsm_ea_west[i]<0 or arpcp_ea_west[i]>4000 or arvcff_ea_west[i]>100 or arglc_ea_west[i]>14         or arpzi_ea_west[i]<0 or arff_ea_west[i]<0 or arelev_ea_west[i]>2000 or arelev_ea_west[i]<0 or argdd0_ea_west[i]<0 or        arptd_ea_west[i]<0):
        index.append(i)
        count+=1
data_sm_ea_west = np.delete(arsm_ea_west,index)
data_vcff_ea_west = np.delete(arvcff_ea_west,index)
data_pcp_ea_west = np.delete(arpcp_ea_west,index)
data_tm_ea_west = np.delete(artm_ea_west, index)
data_pzi_ea_west = np.delete(arpzi_ea_west, index)
data_ff_ea_west = np.delete(arff_ea_west, index)
#data_ba_ea_west = np.delete(arba, index)
data_elev_ea_west = np.delete(arelev_ea_west, index)
data_gdd0_ea_west = np.delete(argdd0_ea_west, index)
data_ptd_ea_west = np.delete(arptd_ea_west, index)
data_state_ea_west = np.delete(arstate_ea_west, index)
data_st_ea_west = np.delete(arst_ea_west, index)
data_lai_ea_west = np.delete(arlai_ea_west, index)

data_lons_ea_west, data_lats_ea_west = np.delete(arlons_ea_west,index), np.delete(arlats_ea_west,index)


#dataframe_ea_west_all = DataFrame(dataforr_ea_west[:,0:9], columns=["VCFF", "MAR", "MASM", "Tmin", "PZI", "FF", "GDD0", "PTD","State"])


# In[7]:

# save the data to a txt that can be reloaded  

lai_ea_west = np.loadtxt('boreal_ea_west_database_lai_included.txt')[:,10]  #what is here called lai is actually ndvi, whereas this lai_na_east is the actual lai

dataforr_ea_west = np.transpose(np.array([data_vcff_ea_west, data_pcp_ea_west, data_sm_ea_west, data_tm_ea_west, data_pzi_ea_west,                                     data_ff_ea_west, data_gdd0_ea_west, data_ptd_ea_west, data_st_ea_west, data_state_ea_west, lai_ea_west, data_lai_ea_west, data_elev_ea_west, data_lons_ea_west, data_lats_ea_west])) 

np.savetxt("boreal_ea_west_database_lai_ndvi_included.txt", dataforr_ea_west)


# In[ ]:




# In[ ]:

# from here on it is actually north america


# In[ ]:

# execute this only if you want to do the basemap plot
# pah_to_dataset = Users/Beniamino/Documents/Datasets/Boreal
nc005ea = netCDF4.Dataset('/path_to_dataset/NA_West_shift_Bor_MOD44B_V5__VCFFplusVCF__2010__0.05deg__UHAM-ICDC.nc')
ncea = netCDF4.Dataset('/path_to_dataset/NA_West_shift_Bor_MOD44B_V5__VCFFplusVCF__2010__0.5deg__UHAM-ICDC.nc')
nc2ea = netCDF4.Dataset('/path_to_dataset/NA_West_shift_Bor_MAR_cru_ts3.22.1998.2010.pre.dat.nc')
nc3ea = netCDF4.Dataset('/path_to_dataset/NA_West_seasonal_soilw.mon.mean.1998-2010.v2.nc')
#nc3ea = netCDF4.Dataset('/path_to_dataset/NA_West_shift_min_bor_yearmean_soilw.mon.mean.1998-2010.v2.nc')
nc4ea = netCDF4.Dataset('/path_to_dataset/NA_West_shift_0_bor_M6H_0.5_tmin.2m.gauss.1998-2010.nc')
nc5ea = netCDF4.Dataset('/path_to_dataset/NA_West_shift_bor_GLC2000_0.5.nc')
nc6ea = netCDF4.Dataset('/path_to_dataset/NA_West_shift_0_bor_0.5_PZI.nc')
nc7ea = netCDF4.Dataset('/path_to_dataset/NA_West_shift_masked_bor_0.5_Year_Fire_Frequency_1996-2012.nc')
# nc8ea = netCDF4.Dataset('/path_to_dataset/NA_West_shift_bor_0.5_Burned_1996-2012_SUM.nc')
nc9ea = netCDF4.Dataset('/path_to_dataset/NA_West_shift_Bor_ReBil_GloElev_05.nc')
nc10ea = netCDF4.Dataset('/path_to_dataset/NA_West_Boreal_Mean_GDD0_1998-2010_0.5.nc')
nc11ea = netCDF4.Dataset('/path_to_dataset/NA_West_Bor_Ease25_Northen_Emisphere_Mean_Thawing_Depth_1901-2002.nc')
nc12ea = netCDF4.Dataset('/Users/Beniamino/Documents/Datasets/Soil_texture/NA_West_soil_texture_0.5.nc')
#nc13ea = netCDF4.Dataset('/path_to_dataset/Terra_NA_West_MODIS_0.5_C6_GSSLAI_2000_2015_Decadal_Trend_SigOnly.nc')
nc13ea = netCDF4.Dataset('/path_to_dataset/Terra_NA_West_MODIS_0.5_C6_GSSNDVI_2000_2015_Decadal_Trend_SigOnly.nc')

nc005na = netCDF4.Dataset('/path_to_dataset/NA_East_shift_Bor_MOD44B_V5__VCFFplusVCF__2010__0.05deg__UHAM-ICDC.nc')
ncna = netCDF4.Dataset('/path_to_dataset/NA_East_shift_Bor_MOD44B_V5__VCFFplusVCF__2010__0.5deg__UHAM-ICDC.nc')
nc2na = netCDF4.Dataset('/path_to_dataset/NA_East_shift_Bor_MAR_cru_ts3.22.1998.2010.pre.dat.nc')
nc3na = netCDF4.Dataset('/path_to_dataset/NA_East_seasonal_soilw.mon.mean.1998-2010.v2.nc')
#nc3na = netCDF4.Dataset('/path_to_dataset/NA_shift_min_bor_yearmean_soilw.mon.mean.1998-2010.v2.nc')
nc4na = netCDF4.Dataset('/path_to_dataset/NA_East_shift_0_bor_M6H_0.5_tmin.2m.gauss.1998-2010.nc')
nc5na = netCDF4.Dataset('/path_to_dataset/NA_East_shift_bor_GLC2000_0.5.nc')
nc6na = netCDF4.Dataset('/path_to_dataset/NA_East_shift_0_bor_0.5_PZI.nc')
nc7na = netCDF4.Dataset('/path_to_dataset/NA_East_shift_masked_bor_0.5_Year_Fire_Frequency_1996-2012.nc')
# nc8na = netCDF4.Dataset('/path_to_dataset/NA_shift_bor_0.5_Burned_1996-2012_SUM.nc')
nc9na = netCDF4.Dataset('/path_to_dataset/NA_East_shift_Bor_ReBil_GloElev_05.nc')
nc10na = netCDF4.Dataset('/path_to_dataset/NA_East_Boreal_Mean_GDD0_1998-2010_0.5.nc')
nc11na = netCDF4.Dataset('/path_to_dataset/NA_East_Bor_Ease25_Northen_Emisphere_Mean_Thawing_Depth_1901-2002.nc')
nc12na = netCDF4.Dataset('/Users/Beniamino/Documents/Datasets/Soil_texture/NA_East_soil_texture_0.5.nc')
#nc13na = netCDF4.Dataset('/path_to_dataset/Terra_NA_East_MODIS_0.5_C6_GSSLAI_2000_2015_Decadal_Trend_SigOnly.nc')
nc13na = netCDF4.Dataset('/path_to_dataset/Terra_NA_East_MODIS_0.5_C6_GSSNDVI_2000_2015_Decadal_Trend_SigOnly.nc')


lon_ea = ncea.variables['lon']#.tolist()
lon_na = ncna.variables['lon']

lat = ncea.variables['lat']

lon005ea = nc005ea.variables['lon']#.tolist()
lon005na = nc005na.variables['lon']

lat005ea = nc005ea.variables['lat']


vcff_ea = ncea.variables['vcff'][0]
vcff005_ea = nc005ea.variables['vcff'][0]

pcp_ea = nc2ea.variables['pre'][0]
sm_ea = nc3ea.variables['soilw']
tm_ea = nc4ea.variables['tmin'][0]-273.15
glc_ea = nc5ea.variables['var'][:]
pzi_ea = nc6ea.variables['pzi'][:]
ff_ea = nc7ea.variables['firefreq'][:]
# ba_ea = nc8ea.variables['burnedarea'][:]
elev_ea = nc9ea.variables['elev'][:]
gdd0_ea = nc10ea.variables['gdd0'][0]
st_ea = nc12ea.variables['var600']
ptd_ea = nc11ea.variables['thw_depth'][0]
lai_ea = nc13ea.variables['t_trend'][:]


vcff_na = ncna.variables['vcff'][0]
vcff005_na = nc005na.variables['vcff'][0]

pcp_na = nc2na.variables['pre'][0]
sm_na = nc3na.variables['soilw']
tm_na = nc4na.variables['tmin'][0]-273.15
glc_na = nc5na.variables['var'][:]
pzi_na = nc6na.variables['pzi'][:]
ff_na = nc7na.variables['firefreq'][:]
# ba_na = nc8na.variables['burnedarea'][:]
elev_na = nc9na.variables['elev'][:]
gdd0_na = nc10na.variables['gdd0'][0]
ptd_na = nc11na.variables['thw_depth'][0]
lai_na = nc13na.variables['t_trend'][:]
st_na = nc12na.variables['var600']


picctr = 0


# In[ ]:

# create new variable of state accordind to the results of the optimisation
vcf_state_na = np.empty(shape=vcff_na.shape)
for i in range(vcff_na.shape[0]):
    for j in range(vcff_na.shape[1]):
        if vcff_na[i][j]<0: 
            vcf_state_na[i][j] = 0  # null
        elif vcff_na[i][j]<10:
            vcf_state_na[i][j] = 1  # bare, the first peak
        elif vcff_na[i][j]<20:
            vcf_state_na[i][j] = 1  # sparse savanna, the second peak
        elif vcff_na[i][j]<45:
            vcf_state_na[i][j] = 3  # savanna, the third peak
        elif vcff_na[i][j]<100:
            vcf_state_na[i][j] = 4  # forest, the fourth peak
        else:
            vcf_state_na[i][j] = 5  # null
            
            
# create new variable of state accordind to the results of the optimisation
vcf_state_ea = np.empty(shape=vcff_ea.shape)
for i in range(vcff_ea.shape[0]):
    for j in range(vcff_ea.shape[1]):
        if vcff_ea[i][j]<0: 
            vcf_state_ea[i][j] = 0  # null
        elif vcff_ea[i][j]<10:
            vcf_state_ea[i][j] = 1  # bare, the first peak
        elif vcff_ea[i][j]<20:
            vcf_state_ea[i][j] = 1  # sparse savanna, the second peak
        elif vcff_ea[i][j]<45:
            vcf_state_ea[i][j] = 3  # savanna, the third peak
        elif vcff_ea[i][j]<100:
            vcf_state_ea[i][j] = 4  # forest, the fourth peak
        else:
            vcf_state_ea[i][j] = 5  # null

        


# In[ ]:

# no need to execute this egain
# create dataset for R with the raveled arrays of data

## IMPORTANT: changes in numpy -> do np.ma.getdata(np.ravel())

lons,lats = np.ma.getdata(np.meshgrid(lon_na,lat))
arlons, arlats = np.ma.getdata(np.ravel(lons)), np.ma.getdata(np.ravel(lats))
arsm_na = np.ma.getdata(np.ravel(sm_na[1]))
arvcff_na = np.ma.getdata(np.ravel(vcff_na))
arpcp_na = np.ma.getdata(np.ravel(pcp_na))
artm_na = np.ma.getdata(np.ravel(tm_na))
arglc_na = np.ma.getdata(np.ravel(glc_na))
arpzi_na = np.ma.getdata(np.ravel(pzi_na))
arff_na = np.ma.getdata(np.ravel(ff_na))
#arba_na = np.ravel(ba_na)
arelev_na = np.ma.getdata(np.ravel(elev_na))
argdd0_na = np.ma.getdata(np.ravel(gdd0_na))
arptd_na = np.ma.getdata(np.ravel(ptd_na))
arstate_na = np.ma.getdata(np.ravel(vcf_state_na))
arst_na = np.ma.getdata(np.ravel(st_na))
arlai_na = np.ma.getdata(np.ravel(lai_na))


count = 0
index = []
for i in range(len(arsm_na)): 
    if (arsm_na[i]<0 or arpcp_na[i]>4000 or arvcff_na[i]>100 or arglc_na[i]>14         or arpzi_na[i]<0 or arff_na[i]<0 or arelev_na[i]>2000 or arelev_na[i]<0 or argdd0_na[i]<0 or        arptd_na[i]<0):
        index.append(i)
        count+=1
data_sm_na = np.delete(arsm_na,index)
data_vcff_na = np.delete(arvcff_na,index)
data_pcp_na = np.delete(arpcp_na,index)
data_tm_na = np.delete(artm_na, index)
data_pzi_na = np.delete(arpzi_na, index)
data_ff_na = np.delete(arff_na, index)
#data_ba_na = np.delete(arba, index)
data_elev_na = np.delete(arelev_na, index)
data_gdd0_na = np.delete(argdd0_na, index)
data_ptd_na = np.delete(arptd_na, index)
data_state_na = np.delete(arstate_na, index)
data_st_na = np.delete(st_na, index)
data_lai_na = np.delete(arlai_na, index)



data_lons, data_lats = np.delete(arlons,index), np.delete(arlats,index)


#dataforr_na = np.transpose(np.array([data_vcff_na, data_pcp_na, data_sm_na, data_tm_na, data_pzi_na,\
#                                     data_ff_na, data_gdd0_na, data_ptd_na, data_st_na, data_state_na, data_lai_na, data_elev_na, data_lons, data_lats])) 
dataframe_na_all = DataFrame(dataforr_na[:,0:10], columns=["VCFF", "MAR", "MASM", "Tmin", "PZI", "FF", "GDD0", "PTD", "ST","State"])


# In[ ]:

canadianupdate_na_east=np.loadtxt("boreal_na_east_database_canadianupdate_without_soiltexture.txt")
lai_na_east = np.loadtxt('boreal_na_east_database_lai_included.txt')[:,10]  #what is here called lai is actually ndvi, whereas this lai_na_east is the actual lai
dataforr_na = np.transpose(np.array([data_vcff_na, data_pcp_na, data_sm_na, data_tm_na, data_pzi_na,canadianupdate_na_east[:,5],                                      data_gdd0_na, data_ptd_na, data_st_na, data_state_na, lai_na_east, data_lai_na, data_elev_na, data_lons, data_lats])) 
np.savetxt("boreal_na_east_database_lai_ndvi_included.txt", dataforr_na)
#np.savetxt("boreal_na_east_database_lai_included.txt", dataforr_na)
#np.savetxt("boreal_na_east_database_canadianupdate.txt", dataforr_na)
# save the data to a txt that can be reloaded  
#np.savetxt("boreal_na_east_database_canadianupdate_without_soiltexture.txt", canadianupdate_na_east)


# In[ ]:

# no need to execute this egain
# create dataset for R with the raveled arrays of data

## IMPORTANT: changes in numpy -> do np.ma.getdata(np.ravel())

lons,lats = np.ma.getdata(np.meshgrid(lon_ea,lat))
arlons, arlats = np.ma.getdata(np.ravel(lons)), np.ma.getdata(np.ravel(lats))
arsm_ea = np.ma.getdata(np.ravel(sm_ea[1]))
arvcff_ea = np.ma.getdata(np.ravel(vcff_ea))
arpcp_ea = np.ma.getdata(np.ravel(pcp_ea))
artm_ea = np.ma.getdata(np.ravel(tm_ea))
arglc_ea = np.ma.getdata(np.ravel(glc_ea))
arpzi_ea = np.ma.getdata(np.ravel(pzi_ea))
arff_ea = np.ma.getdata(np.ravel(ff_ea))
#arba_ea = np.ravel(ba_ea)
arelev_ea = np.ma.getdata(np.ravel(elev_ea))
argdd0_ea = np.ma.getdata(np.ravel(gdd0_ea))
arptd_ea = np.ma.getdata(np.ravel(ptd_ea))
arstate_ea = np.ma.getdata(np.ravel(vcf_state_ea))
arst_ea = np.ma.getdata(np.ravel(st_ea))
arlai_ea = np.ma.getdata(np.ravel(lai_ea))


count = 0
index = []
for i in range(len(arsm_ea)): 
    if (arsm_ea[i]<0 or arpcp_ea[i]>4000 or arvcff_ea[i]>100 or arglc_ea[i]>14         or arpzi_ea[i]<0 or arff_ea[i]<0 or arelev_ea[i]>2000 or arelev_ea[i]<0 or argdd0_ea[i]<0 or        arptd_ea[i]<0):
        index.append(i)
        count+=1
data_sm_ea = np.delete(arsm_ea,index)
data_vcff_ea = np.delete(arvcff_ea,index)
data_pcp_ea = np.delete(arpcp_ea,index)
data_tm_ea = np.delete(artm_ea, index)
data_pzi_ea = np.delete(arpzi_ea, index)
data_ff_ea = np.delete(arff_ea, index)
#data_ba_ea = np.delete(arba, index)
data_elev_ea = np.delete(arelev_ea, index)
data_gdd0_ea = np.delete(argdd0_ea, index)
data_ptd_ea = np.delete(arptd_ea, index)
data_state_ea = np.delete(arstate_ea, index)
data_st_ea = np.delete(st_ea, index)
data_lai_ea = np.delete(arlai_ea, index)


data_lons, data_lats = np.delete(arlons,index), np.delete(arlats,index)


#dataforr_ea = np.transpose(np.array([data_vcff_ea, data_pcp_ea, data_sm_ea, data_tm_ea, data_pzi_ea,\
#                                     data_ff_ea, data_gdd0_ea, data_ptd_ea, data_st_ea, data_state_ea, data_lai_ea, data_elev_ea, data_lons, data_lats])) 
#dataframe_ea_all = DataFrame(dataforr_ea[:,0:10], columns=["VCFF", "MAR", "MASM", "Tmin", "PZI", "FF", "GDD0", "PTD", "ST", "State"])

# save the data to a txt that can be reloaded  
#np.savetxt("boreal_na_west_database.txt", dataforr_ea)


# In[ ]:

canadianupdate_na_west=np.loadtxt("boreal_na_west_database_canadianupdate_without_soiltexture.txt")
lai_na_west = np.loadtxt('boreal_na_west_database_lai_included.txt')[:,10]  #what is here called lai is actually ndvi, whereas this lai_na_east is the actual lai
dataforr_ea = np.transpose(np.array([data_vcff_ea, data_pcp_ea, data_sm_ea, data_tm_ea, data_pzi_ea,                                     canadianupdate_na_west[:,5], data_gdd0_ea, data_ptd_ea, data_st_ea, data_state_ea, lai_na_west, data_lai_ea, data_elev_ea, data_lons, data_lats])) 
np.savetxt("boreal_na_west_database_lai_ndvi_included.txt", dataforr_ea)
#np.savetxt("boreal_na_west_database_canadianupdate.txt", dataforr_ea)
#np.savetxt("boreal_na_west_database_canadianupdate_without_soiltexture.txt", canadianupdate_na_west)


# In[ ]:



