#run from command line using R CMD BATCH amo_regression3.r 
#outputs to fig*.png and amo_regression3.r.Rout
#run from inside R using source("amo_regression3.r",echo=T)

#options
realdata=0 #0 for synthetics, 1 for real data, 2 for real data w/ generalized least sq.
smooth_amo=1 #1 to LOWESS smooth the AMO index, or 0 to skip smoothing.
nsims=100 #Number of Monte Carlo simulations, for synthetics only.
#Choose a set of parameters:
#1 - Dr. Tung's preferred simulation parameters.
#2 - Dumb Scientist's preferred simulation parameters.
#3 - Custom simulation parameters.
choice=3
if(choice==1){#1 - Dr. Tung's preferred simulation parameters.
  power=5 #Nonlinearity of human influence, 1 = linear.
  total_human=0.8#Total human influence.
  amo_amp=0.20#Synthetic, underlying AMO amplitude.
  global_noise=0.20 #Gaussian noise with this standard deviation for globe.
  atlantic_noise=0.10 #Gaussian noise w/ this st. dev. for N. Atlantic.
} else if(choice==2){#2 - Dumb Scientist's preferred simulation parameters.
  power=7 #Nonlinearity of human influence, 1 = linear.
  total_human=0.7#Total human influence.
  amo_amp=0.15#Synthetic, underlying AMO amplitude.
  global_noise=0.11 #Gaussian noise with this standard deviation for globe.
  atlantic_noise=0.11 #Gaussian noise w/ this st. dev. for N. Atlantic.
} else if(choice==3){#3 - Custom simulation parameters.
  power=7 #Nonlinearity of human influence, 1 = linear.
  total_human=0.7#Total human influence.
  amo_amp=0.15#Synthetic, underlying AMO amplitude.
  global_noise=0.20 #Gaussian noise with this standard deviation for globe.
  atlantic_noise=0.10 #Gaussian noise w/ this st. dev. for N. Atlantic.
} else{
  print("WARNING!!! choice not recognized!")
  doesnt_exist_so_this_will_crash
}
p=4;#ARMA(p,q), only used if realdata == 2
q=0;#ARMA(p,q), only used if realdata == 2
recent = 1979 #Calculate trends after this year.
xunits="year"
step=0.01#Stepsize for boxplot of uncertainties vs. trends, in °C/decade.
textsize=1.4
titlesize=1.8
col1="red"
col2="green"
col3="black"
col4="blue"
pch1=20#points
par(bg = "white")
desired_width=800
desired_height=800
desired_res=120

#Define synthetic human, natural influences.
t = 1856:2011
n = length(t)
human = (t-t[1])^power
human = total_human*human/human[n]
nature = amo_amp*cos(2*pi*(t-2000)/70) #70 yr AMO which peaks in year 2000.
#Plot synthetic human, natural influences.
png(file="fig01.png",width=desired_width,height=desired_height,res=desired_res)
title="Synthetic human and natural influences"
yunits="°C"
ylims=c(min(c(human,nature)),max(c(human,nature)))
plot(t,human,type="o",pch=pch1,lwd=2,cex.main=titlesize,cex.axis=textsize,cex.lab=textsize,xlab=xunits,ylab=yunits,main=title,col=col1,ylim=ylims)
points(t,nature,type="o",pch=pch1,lwd=2,col=col2)
legend(t[1],ylims[2],c("Human", "Natural"), pch = c(pch1,pch1), col = c(col1,col2), lty = c(1,1))
dev.off
#Calculate recent trends.
recent_t = t[which(t>recent)]
recent_human = human[which(t>recent)]
recent_human_trend = lm(recent_human~recent_t)
true_summary = summary(recent_human_trend)
true_human_trend = coef(true_summary)[2,1]
if(realdata){
  nsims=1 #Disable Monte-Carlo simulations if real data are loaded.
  qualifier = "Real"
  if(realdata == 2){
    library(nlme) #for generalised least squares
  }
  global = read.table("http://www.metoffice.gov.uk/hadobs/hadcrut4/data/current/time_series/HadCRUT.4.2.0.0.annual_ns_avg.txt")
  names(global)[1] = "t"
  names(global)[2] = "temp"
  n_atlantic = read.table("http://www.esrl.noaa.gov/psd/data/correlation/amon.us.long.mean.data",skip=1,nrows=157) #Skip partial 2013 data.
  amo = read.table("http://www.esrl.noaa.gov/psd/data/correlation/amon.us.long.data",skip=1,nrows=157) #Skip partial 2013 data.
  #Average monthly N. Atlantic SST and monthly AMO index values to get annual means.
  for(i in 1:length(amo[,1])){
    n_atlantic[i,2] = mean(as.numeric(n_atlantic[i,2:length(n_atlantic[i,])]))
    amo[i,2] = mean(as.numeric(amo[i,2:length(amo[i,])]))
  }
  names(n_atlantic)[1] = "t"
  names(n_atlantic)[2] = "temp"
  names(amo)[1] = "t"
  names(amo)[2] = "temp"
  keep = c("t","temp")
  global = global[keep]
  n_atlantic = n_atlantic[keep]
  amo = amo[keep]
  global = global$temp[which(global$t>=t[1] & global$t<=t[n])]
  n_atlantic = n_atlantic$temp[which(n_atlantic$t>=t[1] & n_atlantic$t<=t[n])]
  amo = amo$temp[which(amo$t>=t[1] & amo$t<=t[n])]
}
trends = matrix(data=0,nrow=nsims,ncol=1)
trenderrors = matrix(data=0,nrow=nsims,ncol=1)
quadratics = matrix(data=0,nrow=nsims,ncol=1)
quadraticerrors = matrix(data=0,nrow=nsims,ncol=1)
all_values = matrix(data=0,nrow=3,ncol=1)
error_bars = matrix(data=0,nrow=3,ncol=1)#2 sigma
global_atlantic_corr = matrix(data=0,nrow=nsims,ncol=1)
global_var = matrix(data=0,nrow=nsims,ncol=1)
atlantic_var = matrix(data=0,nrow=nsims,ncol=1)
successes=0#Number of times the true post-1979 trend is within error bars.
for(i in 1:nsims){

  #Define synthetic global, N. Atlantic temperature anomalies.
  if(!realdata){
    qualifier = "Synthetic"
    global = human + nature + rnorm(t,mean=0,sd=global_noise)
    #n_atlantic = global + rnorm(t,mean=0,sd=atlantic_noise)
    n_atlantic = human + nature + rnorm(t,mean=0,sd=sqrt(global_noise^2+atlantic_noise^2))
  }
  #Remove means so real and synthetic datasets all have exactly the same mean: 0.
  global = global - mean(global)
  n_atlantic = n_atlantic - mean(n_atlantic)
  global_atlantic_corr[i] = cor(global,n_atlantic)
  global_var[i] = var(global)
  atlantic_var[i] = var(n_atlantic)
  #Plot synthetic global, N. Atlantic temperature anomalies.
  if(i==1){
    png(file="fig02.png",width=desired_width,height=desired_height,res=desired_res)
    title="surface temperature anomalies"
    title=paste(qualifier, title, collapse = "") 
    #ylims=c(min(c(global,n_atlantic)),max(c(global,n_atlantic)))
    #Fix plot limits so real and synthetic plots are easier to compare.
    ylims=c(-0.7,0.7)
    plot(t,global,type="o",pch=pch1,lwd=2,cex.main=titlesize,cex.axis=textsize,cex.lab=textsize,xlab=xunits,ylab=yunits,main=title,col=col3,ylim=ylims)
    points(t,n_atlantic,type="o",pch=pch1,lwd=2,col=col1)
    if(realdata){
      legends = c("Global (HadCRUT4)", "N. Atlantic SST (NOAA)")
    }
    else{
      legends = c("Global", "N. Atlantic")
    }
    legend(t[1],ylims[2],legends, pch = c(pch1,pch1), col = c(col3,col1), lty = c(1,1))
    dev.off
  }
  #Define linearly-detrended synthetic AMO (real AMO downloaded separately).
  if(!realdata){
    n_atlantic_trend = lm(n_atlantic~t)
    amo = n_atlantic - coef(summary(n_atlantic_trend))[2,1]*(t-t[1])
  }
  if(smooth_amo){
    smoothed_amo = lowess(t,amo,f=25/n)$y
    #smoothed_amo = loess(amo~t,data=data.frame(t,amo),span=1)
  }
  #Plot detrended AMO.
  if(i==1){
    png(file="fig03.png",width=desired_width,height=desired_height,res=desired_res)
    title="linearly-detrended AMO index"
    title=paste(qualifier, title, collapse = "") 
    ylims=c(min(amo),max(amo))
    plot(t,amo,type="o",pch=pch1,lwd=2,cex.main=titlesize,cex.axis=textsize,cex.lab=textsize,xlab=xunits,ylab=yunits,main=title,col=col3,ylim=ylims)
    if(smooth_amo){
      points(t,smoothed_amo,type="o",pch=pch1,lwd=2,col=col4)
      legend(t[1],ylims[2],c("Original AMO index", "Smoothed AMO index"), pch = c(pch1,pch1), col = c(col3,col4), lty = c(1,1))
    }
    dev.off
  }
  if(smooth_amo){
    amo = smoothed_amo
  }
  #Regress global temperatures against human influence and AMO.
  human_p = human - mean(human) #Remove mean so it's handled by the intercept.
  amo_p = amo - mean(amo)
  nature_p = nature - mean(nature)
  if(realdata == 2){
    regression = gls(global~human_p+amo_p,corr=corARMA(p=p,q=q))
    regression_summary = summary(regression)
    all_values[1] = all_values[1] + regression_summary$tTable[1,1]
    all_values[2] = all_values[2] + regression_summary$tTable[2,1]
    all_values[3] = all_values[3] + regression_summary$tTable[3,1]
    error_bars[1] = error_bars[1] + 2*regression_summary$tTable[1,2]#2 sigma
    error_bars[2] = error_bars[2] + 2*regression_summary$tTable[2,2]#2 sigma
    error_bars[3] = error_bars[3] + 2*regression_summary$tTable[3,2]#2 sigma
    est_human = regression_summary$tTable[2,1]*human_p
  }
  else{
    regression = lm(global~human_p+amo_p)
    regression_summary = summary(regression)
    all_values[1] = all_values[1] + coef(regression_summary)[1,1]
    all_values[2] = all_values[2] + coef(regression_summary)[2,1]
    all_values[3] = all_values[3] + coef(regression_summary)[3,1]
    error_bars[1] = error_bars[1] + 2*coef(regression_summary)[1,2]#2 sigma
    error_bars[2] = error_bars[2] + 2*coef(regression_summary)[2,2]#2 sigma
    error_bars[3] = error_bars[3] + 2*coef(regression_summary)[3,2]#2 sigma
    est_human = coef(regression_summary)[2,1]*human_p
  }
  #print(regression_summary)
  #Plot residuals.
  if(i==1){
    png(file="fig04.png",width=desired_width,height=desired_height,res=desired_res)
    if(smooth_amo){
      title="Residuals of fit to human, smoothed AMO"
    }
    else{
      title="Residuals of fit to human, unsmoothed AMO"
    }
    residuals = resid(regression)
    plot(t,residuals,type="o",pch=pch1,lwd=2,cex.main=titlesize,cex.axis=textsize,cex.lab=textsize,xlab=xunits,ylab=yunits,main=title,col=col3)
    dev.off
  }
  #Add residual back.
  est_human2 = est_human + residuals
  recent_est_human2 = est_human2[which(t>recent)]
  if(realdata == 2){
    recent_est_human2_trend = gls(recent_est_human2~recent_t,corr=corARMA(p=p,q=q))
    est_summary2 = summary(recent_est_human2_trend)
    est_human2_trend = est_summary2$tTable[2,1]
    est_human2_error = 2*est_summary2$tTable[2,2]#2 sigma
  }
  else{
    recent_est_human2_trend = lm(recent_est_human2~recent_t)
    est_summary2 = summary(recent_est_human2_trend)
    est_human2_trend = coef(est_summary2)[2,1]
    est_human2_error = 2*coef(est_summary2)[2,2]#2 sigma
  }
  true_blurb = sprintf("True human. Post-%d trend = %+.2f%s/decade",recent,true_human_trend*10,yunits) 
  est_blurb2 = sprintf("Est. human. Post-%d trend = %+.2f ± %.2f",recent,est_human2_trend*10,est_human2_error*10) 
  if(true_human_trend <= est_human2_trend+est_human2_error & true_human_trend >= est_human2_trend-est_human2_error) successes = successes + 1
  #Plot true and estimated human influences.
  if(i==1){
    recent_trendline = est_human2_trend*(recent_t-recent_t[1])
    recent_trendline = recent_trendline - mean(recent_trendline) + mean(recent_est_human2)
    #Shift true human so its recent mean equals the mean of the estimated 
    #trendline, for easier comparison of recent trends.
    human = human - (mean(recent_human) - mean(recent_trendline))
    png(file="fig05.png",width=desired_width,height=desired_height,res=desired_res)
    title="True, estimated human influences"
    ylims=c(min(c(human,est_human2)),max(c(human,est_human2)))
    plot(t,human,type="o",pch=pch1,lwd=2,cex.main=titlesize,cex.axis=textsize,cex.lab=textsize,xlab=xunits,ylab=yunits,main=title,col=col3,ylim=ylims)
    points(t,est_human2,type="o",pch=pch1,lwd=2,col=col1)
    points(recent_t,recent_trendline,type="l",pch=pch1,lwd=6,lty=3,col=col4)
    lowerbound=recent_trendline-est_human2_error*recent_t
    lowerbound=lowerbound - mean(lowerbound) + mean(recent_trendline)
    points(recent_t,lowerbound,type="l",lwd=6,lty=1,col=col4)
    upperbound=recent_trendline+est_human2_error*recent_t
    upperbound=upperbound - mean(upperbound) + mean(recent_trendline)
    points(recent_t,upperbound,type="l",lwd=6,lty=1,col=col4)
    legend(t[1],ylims[2],c(true_blurb, est_blurb2), pch = c(pch1,pch1), col = c(col3,col1), lty = c(1,1))
    dev.off
    #Shift true human back to original baseline, just in case it's needed later.
    human = human + (mean(recent_human) - mean(recent_trendline))
  }
  trends[i] = est_human2_trend*10
  trenderrors[i] = est_human2_error*10
  #Calculate estimated quadratic term.
  if(realdata == 2){
    est_human2_quadratic = gls(est_human2~t+I(t^2),corr=corARMA(p=p,q=q))
    est_quadratic = summary(est_human2_quadratic)$tTable[3,1]
    est_quadraticerror = summary(est_human2_quadratic)$tTable[3,2]
  }
  else{
    est_human2_quadratic = lm(est_human2~t+I(t^2))
    est_quadratic = coef(summary(est_human2_quadratic))[3,1]
    est_quadraticerror = coef(summary(est_human2_quadratic))[3,2]
  }
  quadratics[i] = est_quadratic
  quadraticerrors[i] = est_quadraticerror
}
#Report results.
all_values = all_values/nsims
error_bars = error_bars/nsims
print(all_values)
print(error_bars)
mean(global_atlantic_corr)
mean(global_var)
mean(atlantic_var)
mean(trends)
mean(trenderrors)
true_human_quadratic = lm(human~t+I(t^2))
true_quadratic = coef(summary(true_human_quadratic))[3,1]
quadratic_blurb = sprintf("True quadratic = %.3e",true_quadratic) 
print(quadratic_blurb)
print(successes)
successes_blurb = sprintf("The true post-1979 trend was within the estimated 95%% confidence interval %.2f%% of the time.",successes/nsims*100) 
print(successes_blurb)
if(!realdata){
  library(MASS) #for fitdistr, to determine mean and 95% C.I. of distributions.
  this_dist = fitdistr(trends,"normal")
  this_estimate = this_dist$estimate[1]
  this_error = 2*this_dist$estimate[2]#2 sigma
  png(file="fig06.png",width=desired_width,height=desired_height,res=desired_res)
  title = sprintf("Est. post-%d human trend:\n %+.2f ± %.2f%s/decade",recent,this_estimate,this_error,yunits)
  xunits = "°C/decade"
  hist(trends,density=20,cex.main=titlesize,cex.axis=textsize,cex.lab=textsize,xlab=xunits,main=title)
  dev.off
  this_dist = fitdistr(quadratics,"normal")
  this_estimate = this_dist$estimate[1]
  this_error = 2*this_dist$estimate[2]#2 sigma
  png(file="fig07.png",width=desired_width,height=desired_height,res=desired_res)
  title = sprintf("Est. post-%d human quadratic term:\n %+.2e ± %.2e%s/year^2",t[1],this_estimate,this_error,yunits)
  xunits = "°C/year^2"
  hist(quadratics,density=20,cex.main=titlesize,cex.axis=textsize,cex.lab=textsize,xlab=xunits,main=title)
  dev.off
  this_dist = fitdistr(global_atlantic_corr,"normal")
  this_estimate = this_dist$estimate[1]
  this_error = 2*this_dist$estimate[2]#2 sigma
  png(file="fig08.png",width=desired_width,height=desired_height,res=desired_res)
  title = sprintf("Correlation b/w global, N. Atlantic temps:\n %.2f ± %.2f",this_estimate,this_error)
  xunits = "correlation"
  hist(global_atlantic_corr,density=20,cex.main=titlesize,cex.axis=textsize,cex.lab=textsize,xlab=xunits,main=title)
  dev.off
  this_dist = fitdistr(global_var,"normal")
  this_estimate = this_dist$estimate[1]
  this_error = 2*this_dist$estimate[2]#2 sigma
  png(file="fig09.png",width=desired_width,height=desired_height,res=desired_res)
  title = sprintf("Variance of global temps:\n %.2f ± %.2f%s^2",this_estimate,this_error,yunits)
  xunits = "°C^2"
  hist(global_var,density=20,cex.main=titlesize,cex.axis=textsize,cex.lab=textsize,xlab=xunits,main=title)
  dev.off
  this_dist = fitdistr(atlantic_var,"normal")
  this_estimate = this_dist$estimate[1]
  this_error = 2*this_dist$estimate[2]#2 sigma
  png(file="fig10.png",width=desired_width,height=desired_height,res=desired_res)
  title = sprintf("Variance of N. Atlantic temps:\n %.2f ± %.2f%s^2",this_estimate,this_error,yunits)
  xunits = "°C^2"
  hist(atlantic_var,density=20,cex.main=titlesize,cex.axis=textsize,cex.lab=textsize,xlab=xunits,main=title)
  dev.off
  png(file="fig11.png",width=desired_width,height=desired_height,res=desired_res)
  title = sprintf("Post-%d trend uncertainties vs. trends",recent) 
  xunits = "Trend - °C/decade"
  yunits = "95% Uncertainties - °C/decade"
  #step=(max(trends)-min(trends))/10
  groups=floor(trends/step)*step
  boxplot(trenderrors~groups,cex.main=titlesize,cex.axis=textsize,cex.lab=textsize,main=title,xlab=xunits,ylab=yunits)
  dev.off
}
print(successes_blurb)