Load data and get libraries togther
knitr::opts_chunk$set(echo = TRUE)
library(rstan)
library(shinystan)
library(ggplot2)
library(dplyr)
library(readr)
ardata <- read_csv("ardata.csv")
sf6data <- read_csv("sf6data.csv")
First need to calculate saturation concentrations. These are based on Hamme and Emerson (2004)
watdens<-function(temp){
t<-temp
A <- 7.0132e-5
B <- 7.926295e-3
C <- -7.575477e-5
D<- 7.314701e-7
E <- -3.596363e-9
to<- 3.9818
dens<- (999.97358- (A*(t-to) + B*(t-to)^2 +C*(t-to)^3 + D*(t-to)^4+E*(t-to)^5) ) -4.873e-3 + 1.708e-4*t - 3.108e-6 * t^2
dens/1000
}
nsat<- function(temp, bp) {
ts<-log((298.15-temp) / (273.15 + temp))
a0<-6.42931
a1<-2.92704
a2<-4.32531
a3<-4.69149
u<-10^(8.10765-(1750.286/(235+temp)))
satn<-(exp(a0 + a1*ts + a2*ts^2 + a3*ts^3))*((bp-u)/(760-u))
watdens(temp)*satn*(28.014/1000)##converts umol/kg to mg/L
}
arsat<- function(temp, bp) {
ts<-log((298.15-temp) / (273.15 + temp))
a0<-2.79150
a1<-3.17609
a2<-4.13116
a3<-4.90379
u<-10^(8.10765-(1750.286/(235+temp)))
satar<-(exp(a0 + a1*ts + a2*ts^2 + a3*ts^3))*((bp-u)/(760-u))
watdens(temp)*satar*(39.948/1000)##converts umol/kg to mg/L
}
## Estimates K600 for KO2 at a given temperature. From Wanninkhof (1992).
K600fromO2<-function (temp, KO2) {
((600/(1800.6 - (120.1 * temp) + (3.7818 * temp^2) - (0.047608 * temp^3)))^-0.5) * KO2
}
## Estimates K600 for KAr at a given temperature. From Raymond et al (2012).
K600fromAr<-function (temp, KAr) {
((600/(1799 - (106.96 * temp) + (2.797 * temp^2) - (0.0289 * temp^3)))^-0.5) * KAr
}
Now calculate saturation concentrations, and split data into pre and post
ardata$arnsat <- arsat(ardata$temp,ardata$pressure) / nsat(ardata$temp,ardata$pressure)
ardatapre<- ardata[ardata$type=='pre', ]
ardatapost<- ardata[ardata$type=='post', ]
sf6datapost<- sf6data[sf6data$type=='post', ]
Plot of raw Ar:N2. Note the high varibility in some of the pre Ar data. We used the calculated Ar:N2 since it was in the middle of our samples, but not nearly as variable. The two lines on the Ar sat are the post and pre calculations; they vary because of temperature. The upper line is the warmer, pleateau collection temperature.
## Warning: Removed 16 rows containing missing values (geom_point).
## Warning: Removed 15 rows containing missing values (geom_point).
Now for calculations where we subtract background and correct for conductivity
ardatapost$arcorr<- ardatapost$arncalc - ardatapost$arnsat
#2.2. Calc mean of all pre cond by station and by site
ardataprecond <- ardatapre %>% group_by(Trial, stationcorr) %>% summarise(precond=mean(cond, na.rm=T))
## Warning: package 'bindrcpp' was built under R version 3.3.2
#join with the post cond
ardatapost<-merge(ardatapost,ardataprecond)
ardatapost$condcor<- ardatapost$cond - ardatapost$precond
ardatapost$arcond<- ardatapost$arcorr / ardatapost$condcor
##calc the mean for station 0
ardatapost_0<-ardatapost[ardatapost$stationcorr==0,]
ardata_0sum <- ardatapost_0 %>% group_by(Trial) %>% summarise(arcond_0=mean(arcond), arn_enrich=mean(arncalc/arnsat))
#join with ardatapost
ardatapost<-merge(ardatapost,ardata_0sum)
ardatapost$arcondnorm<-ardatapost$arcond/ardatapost$arcond_0
Prep SF6 in same way
#2.2. Calc mean of all pre cond by station and by site
ardataprecond <- ardatapre %>% group_by(Trial, stationcorr) %>% summarise(precond=mean(cond, na.rm=T))
#join with the post cond
sf6datapost<-merge(sf6datapost,ardataprecond) #yes, using pre from Ar
sf6datapost$condcor<- sf6datapost$cond - sf6datapost$precond
sf6datapost$sf6cond<- sf6datapost$sf6 / sf6datapost$condcor
##calc the mean for station 0
sf6datapost_0<-sf6datapost[sf6datapost$stationcorr==0,]
sf6data_0sum <- sf6datapost_0 %>% group_by(Trial) %>% summarise(sf6cond_0=mean(sf6cond))
#join with sf6datapost
sf6datapost<-merge(sf6datapost,sf6data_0sum)
sf6datapost$sf6condnorm<-sf6datapost$sf6cond/sf6datapost$sf6cond_0
Here is the Stan model that we used for the analysis. Details on priors etc are in the text.
######
sink("combineargonmodelb.stan")
cat("
data {
int <lower=1> N; //number of data points, ar
int <lower=1> NS; //number of data points, sf6
int <lower=1> sites; //number of sites
int <lower=0> sitear[N]; //stream number for ar indexing vector
int <lower=0> sitesf6[NS]; //stream number for sf6 indexing vector
vector [N] ar;//conductivity corrected Ar data
vector [N] distar;//
vector [NS] sf6;//conductivity corrected sf6 data
vector [NS] distsf6;//
}
parameters {
vector <lower=0> [sites] a;
real mu_a; //take out for hyperprior info // mean prior
real<lower=0, upper=0.5> sigma_ar; // error ar
real<lower=0, upper=0.5> sigma_sf6; // error sf6
vector[sites] k; // decline
real <lower=0, upper=2> Ar0;
real <lower=0, upper=2> SF60;
real <lower=0> sigma_a; // mean prior
}
model {
//priors.
k ~ normal(0, 10);
a~normal (mu_a,sigma_a); // mean prior
Ar0 ~normal(1,0.05);
SF60~normal(1,0.05);
mu_a~normal(1.35, 1); // hyper prior
sigma_a ~ normal(0, 2); //this is in fact half normal. See the constraint above.
//likelihood
for (i in 1:N) {
ar[i] ~ normal( Ar0 * exp(-k[sitear[i]]*distar[i]*0.01) , sigma_ar);
}
for (j in 1:NS) {
sf6[j] ~ normal( SF60 * exp(-k[sitesf6[j]]*distsf6[j]*0.01/a[sitesf6[j]]) , sigma_sf6);
}
}
"
,fill=TRUE)
sink()
Get data together into a list for Stan.
arstandata=list("ar"= ardatapost$arcondnorm, "distar"=ardatapost$stationcorr, "N"=length(ardatapost$stationcorr),
"sf6"= sf6datapost$sf6condnorm, "distsf6"=sf6datapost$stationcorr, "NS"=length(sf6datapost$stationcorr),
"sites"=max(ardatapost$Trial), "sitear" = ardatapost$Trial,
"sitesf6" = sf6datapost$Trial)
Run the stan model
arfit <- stan(file = "combineargonmodelb.stan", data = arstandata,
iter = 3000, chains = 4)
## The following numerical problems occured the indicated number of times after warmup on chain 1
## count
## Exception thrown at line 54: normal_log: Location parameter is nan, but must be finite! 2
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
## The following numerical problems occured the indicated number of times after warmup on chain 2
## count
## Exception thrown at line 51: normal_log: Location parameter is inf, but must be finite! 2
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
## The following numerical problems occured the indicated number of times after warmup on chain 3
## count
## Exception thrown at line 51: normal_log: Location parameter is inf, but must be finite! 3
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
## The following numerical problems occured the indicated number of times after warmup on chain 4
## count
## Exception thrown at line 51: normal_log: Location parameter is inf, but must be finite! 3
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
Print out parameters. a[j] is the ratio of \(K_{Ar}: K_{SF6}\) for stream j. k is \(K_{Ar,j}\), i.e., the per unit distance evaion rate of Ar. It is scaled 100 fold for the purposes of Stan, which like all parameters to be around -1 to 1. mu_a is the hyperparamter for \(a\)
print(arfit)
## Inference for Stan model: combineargonmodelb.
## 4 chains, each with iter=3000; warmup=1500; thin=1;
## post-warmup draws per chain=1500, total post-warmup draws=6000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff
## a[1] 2.01 0.00 0.28 1.51 1.81 1.99 2.18 2.61 5292
## a[2] 0.92 0.00 0.14 0.67 0.82 0.91 1.01 1.23 4668
## a[3] 1.76 0.00 0.22 1.37 1.61 1.75 1.90 2.22 4959
## a[4] 2.00 0.00 0.21 1.62 1.85 1.98 2.13 2.45 4735
## a[5] 1.46 0.00 0.17 1.15 1.34 1.45 1.57 1.83 4772
## a[6] 2.31 0.00 0.23 1.88 2.15 2.30 2.46 2.79 5015
## a[7] 0.94 0.00 0.12 0.73 0.86 0.93 1.01 1.18 4652
## a[8] 3.42 0.01 0.44 2.68 3.11 3.38 3.68 4.41 3796
## mu_a 1.79 0.00 0.35 1.09 1.58 1.80 2.01 2.48 6000
## sigma_ar 0.09 0.00 0.01 0.08 0.09 0.09 0.10 0.11 6000
## sigma_sf6 0.06 0.00 0.00 0.05 0.06 0.06 0.06 0.07 6000
## k[1] 0.18 0.00 0.02 0.14 0.17 0.18 0.19 0.22 6000
## k[2] 0.12 0.00 0.02 0.09 0.11 0.12 0.14 0.16 4752
## k[3] 0.51 0.00 0.05 0.42 0.47 0.51 0.54 0.61 4844
## k[4] 0.57 0.00 0.05 0.47 0.53 0.56 0.60 0.67 4940
## k[5] 1.78 0.00 0.19 1.44 1.65 1.77 1.90 2.18 4730
## k[6] 1.65 0.00 0.14 1.38 1.55 1.64 1.75 1.95 5388
## k[7] 2.54 0.00 0.26 2.08 2.36 2.53 2.71 3.08 4423
## k[8] 10.00 0.02 1.20 7.97 9.18 9.89 10.70 12.70 3884
## Ar0 0.99 0.00 0.01 0.96 0.98 0.99 1.00 1.02 6000
## SF60 0.99 0.00 0.01 0.97 0.98 0.99 1.00 1.01 6000
## sigma_a 0.99 0.00 0.36 0.52 0.74 0.92 1.15 1.92 6000
## lp__ 536.31 0.07 3.54 528.56 534.15 536.66 538.89 542.07 2259
## Rhat
## a[1] 1
## a[2] 1
## a[3] 1
## a[4] 1
## a[5] 1
## a[6] 1
## a[7] 1
## a[8] 1
## mu_a 1
## sigma_ar 1
## sigma_sf6 1
## k[1] 1
## k[2] 1
## k[3] 1
## k[4] 1
## k[5] 1
## k[6] 1
## k[7] 1
## k[8] 1
## Ar0 1
## SF60 1
## sigma_a 1
## lp__ 1
##
## Samples were drawn using NUTS(diag_e) at Tue Feb 27 12:07:55 2018.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
Extract parameters of interest. Note that the model multiplied k by 0.01 so that all parameters are on the same scale. We fix that here.
arfitsum<- summary(arfit)$summary
asum<-arfitsum[1:8,c("2.5%", "50%", "97.5%")]
ksum<-(arfitsum[12:19,c("2.5%", "50%", "97.5%")])*0.01# the 0.01 here rescales the k estimate
Calculate derived variables such as \(K\) (per time) and \(k600\)
streamslope<-c( 0.00703, 0.00703,0.015,0.015, 0.06, 0.11,0.12,0.12)
Q<- c(0.084,0.07,0.02,0.02,0.021,0.097,0.022,0.022)*60
w<- c(2.3,1.6,0.8,0.8,0.9,3.3,0.7,1.3)
ardataposttemp<-ardatapost %>% group_by(Trial) %>% summarise(temp=mean(temp, na.rm=T))
k<- ksum*Q*1440/w
k600<-K600fromAr(ardataposttemp$temp, k)
v<-c(12,15.4,9.5,9.5,3.1,4,6.3,5.2)
z<-(Q)/(w*v)
Kt<-v*ksum[,2]*1440
Enjoy.