library(plyr)
library(readstata13)
library(statar)
library(data.table)
library(plm)
library(ggplot2)
library(lattice)
library(zoo)
library(speedglm)

computer.code <- 2

# This code computes BPP coefficients from the simulations we have generated inside Matlab
# and the associated tables and Figures.
# For details in the procedure, see Blundell, Pistaferri and Preston (2008), in particular
# Appendix C.

min.age <- 25

file.name <- paste("dfp_simulation_choose1.csv",sep="")
dat <- as.data.table(read.table(file.name,sep=","))
dat <- rename(dat,c("V1"="id","V2"="age","V3"="ry","V4"="rye","V5"="sav","V6"="cons"))

df <- dat

file.name <- paste("dfp_simulation_choose2.csv",sep="")
dat <- as.data.table(read.table(file.name,sep=","))
dat <- rename(dat,c("V1"="id","V2"="age","V3"="ry","V4"="rye","V5"="sav","V6"="cons"))

ca <- dat

file.name <- paste("dfp_simulation_choose3.csv",sep="")
dat <- as.data.table(read.table(file.name,sep=","))
dat <- rename(dat,c("V1"="id","V2"="age","V3"="ry","V4"="rye","V5"="sav","V6"="cons"))

ko <- dat

file.name <- paste("dfp_simulation_choose4.csv",sep="")
dat <- as.data.table(read.table(file.name,sep=","))
dat <- rename(dat,c("V1"="id","V2"="age","V3"="ry","V4"="rye","V5"="sav","V6"="cons"))

d1 <- dat

df$logcons <- log(df$cons)
df$logy <- log(df$ry)
df$logye <- log(df$rye)
df$ty <- df$logy+df$logye
df[, nextcons:=shift(logcons,type="lead"),by="id"]
df[, nextcons2:=shift(logcons,type="lead",n=2),by="id"]
df[, nexty:=shift(logy,type="lead"),by="id"]
df[, nextye:=shift(logye,type="lead"),by="id"]
df[, ty1:=shift(ty,type="lead"),by="id"]
df[, ty2:=shift(ty,type="lead",n=2),by="id"]
df[, ty3:=shift(ty,type="lead",n=3),by="id"]

df$gc <- df$nextcons-df$logcons
df$gy <- df$nexty-df$logy
df$gye <- df$nextye-df$logye
df$dytmenos1 <- df$ty1-df$ty
df$dyt <- df$ty2-df$ty1
df$dytmas1 <- df$ty3-df$ty2
df$dct <- df$nextcons2-df$nextcons

ca$logcons <- log(ca$cons)
ca$logy <- log(ca$ry)
ca$logye <- log(ca$rye)
ca$ty <- ca$logy+ca$logye

ca[, nextcons:=shift(logcons,type="lead"),by="id"]
ca[, nexty:=shift(logy,type="lead"),by="id"]
ca[, nextye:=shift(logye,type="lead"),by="id"]
ca[, ty1:=shift(ty,type="lead"),by="id"]
ca[, ty2:=shift(ty,type="lead",n=2),by="id"]
ca[, ty3:=shift(ty,type="lead",n=3),by="id"]
ca[, nextcons2:=shift(logcons,type="lead",n=2),by="id"]

ca$gc <- ca$nextcons-ca$logcons
ca$gy <- ca$nexty-ca$logy
ca$gye <- ca$nextye-ca$logye

ca$dytmenos1 <- ca$ty1-ca$ty
ca$dyt <- ca$ty2-ca$ty1
ca$dytmas1 <- ca$ty3-ca$ty2
ca$dct <- ca$nextcons2-ca$nextcons



ko$logcons <- log(ko$cons)
ko$logy <- log(ko$ry)
ko$logye <- log(ko$rye)
ko$ty <- ko$logy+ko$logye

ko[, nextcons:=shift(logcons,type="lead"),by="id"]
ko[, nexty:=shift(logy,type="lead"),by="id"]
ko[, nextye:=shift(logye,type="lead"),by="id"]
ko[, ty1:=shift(ty,type="lead"),by="id"]
ko[, ty2:=shift(ty,type="lead",n=2),by="id"]
ko[, ty3:=shift(ty,type="lead",n=3),by="id"]
ko[, nextcons2:=shift(logcons,type="lead",n=2),by="id"]

ko$gc <- ko$nextcons-ko$logcons
ko$gy <- ko$nexty-ko$logy
ko$gye <- ko$nextye-ko$logye

ko$dytmenos1 <- ko$ty1-ko$ty
ko$dyt <- ko$ty2-ko$ty1
ko$dytmas1 <- ko$ty3-ko$ty2
ko$dct <- ko$nextcons2-ko$nextcons



d1$logcons <- log(d1$cons)
d1$logy <- log(d1$ry)
d1$logye <- log(d1$rye)
d1$ty <- d1$logy+d1$logye

d1[, nextcons:=shift(logcons,type="lead"),by="id"]
d1[, nexty:=shift(logy,type="lead"),by="id"]
d1[, nextye:=shift(logye,type="lead"),by="id"]
d1[, ty1:=shift(ty,type="lead"),by="id"]
d1[, ty2:=shift(ty,type="lead",n=2),by="id"]
d1[, ty3:=shift(ty,type="lead",n=3),by="id"]
d1[, nextcons2:=shift(logcons,type="lead",n=2),by="id"]

d1$gc <- d1$nextcons-d1$logcons
d1$gy <- d1$nexty-d1$logy
d1$gye <- d1$nextye-d1$logye

d1$dytmenos1 <- d1$ty1-d1$ty
d1$dyt <- d1$ty2-d1$ty1
d1$dytmas1 <- d1$ty3-d1$ty2
d1$dct <- d1$nextcons2-d1$nextcons


bpp.coefs <- speedlm(gc ~ gy, data=df)
bpp.transcoefs <- speedlm(gc ~ gye,data=df)

bpp.coefs.ca <- speedlm(gc ~ gy, data=ca)
bpp.transcoefs.ca <- speedlm(gc ~ gye,data=ca)

bpp.coefs.ko <- speedlm(gc ~ gy, data=ko)
bpp.transcoefs.ko <- speedlm(gc ~ gye,data=ko)

bpp.coefs.d1 <- speedlm(gc ~ gy, data=d1)
bpp.transcoefs.d1 <- speedlm(gc ~ gye,data=d1)

# Compute BPP-style moments

# Need the following covariances

df$phinum <- df$dct*(df$dytmenos1+df$dytmas1+df$dyt)
df$phiden <- df$dyt*(df$dytmenos1+df$dytmas1+df$dyt)

df.phi <- mean(df$phinum,na.rm=T)/mean(df$phiden,na.rm=T)

df$psinum <- df$dct*df$dytmas1
df$psiden <- df$dyt*df$dytmas1

df.psi <- mean(df$psinum,na.rm=T)/mean(df$psiden,na.rm=T)

# for ca...

ca$phinum <- ca$dct*(ca$dytmenos1+ca$dytmas1+ca$dyt)
ca$phiden <- ca$dyt*(ca$dytmenos1+ca$dytmas1+ca$dyt)

ca.phi <- mean(ca$phinum,na.rm=T)/mean(ca$phiden,na.rm=T)

ca$psinum <- ca$dct*ca$dytmas1
ca$psiden <- ca$dyt*ca$dytmas1

ca.psi <- mean(ca$psinum,na.rm=T)/mean(ca$psiden,na.rm=T)

# for ko... 

ko$phinum <- ko$dct*(ko$dytmenos1+ko$dytmas1+ko$dyt)
ko$phiden <- ko$dyt*(ko$dytmenos1+ko$dytmas1+ko$dyt)

ko.phi <- mean(ko$phinum,na.rm=T)/mean(ko$phiden,na.rm=T)

ko$psinum <- ko$dct*ko$dytmas1
ko$psiden <- ko$dyt*ko$dytmas1

ko.psi <- mean(ko$psinum,na.rm=T)/mean(ko$psiden,na.rm=T)

# for d1... 

d1$phinum <- d1$dct*(d1$dytmenos1+d1$dytmas1+d1$dyt)
d1$phiden <- d1$dyt*(d1$dytmenos1+d1$dytmas1+d1$dyt)

d1.phi <- mean(d1$phinum,na.rm=T)/mean(d1$phiden,na.rm=T)

d1$psinum <- d1$dct*d1$dytmas1
d1$psiden <- d1$dyt*d1$dytmas1

d1.psi <- mean(d1$psinum,na.rm=T)/mean(d1$psiden,na.rm=T)

# These are the values for Table 2

# By columns

1-ca.psi
1-df.psi
1-ko.psi
1-d1.psi

1-ca.phi
1-df.phi
1-ko.psi
1-d1.psi

1-bpp.coefs.ca$coefficients[2]
1-bpp.coefs$coefficients[2]
1-bpp.coefs.ko$coefficients[2]
1-bpp.coefs.d1$coefficients[2]

1-bpp.transcoefs.ca$coefficients[2]
1-bpp.transcoefs$coefficients[2]
1-bpp.transcoefs.ko$coefficients[2]
1-bpp.transcoefs.d1$coefficients[2]

# yt growth with ct growth
# yt+1 growth with ct growth
# yt with ct+1 

# By age:

df2 <- subset(df,age<60)
bpp.coefs.age <- df2[,coef(speedlm(gc ~ gy))[2],by="age"]

be2 <- subset(ca,age<60)
bpp.coefs.age.ca <- be2[,coef(speedlm(gc ~ gy))[2],by="age"]

ko2 <- subset(ko,age<60)
bpp.coefs.age.ko <- ko2[,coef(speedlm(gc ~ gy))[2],by="age"]

bpp.coefs.age$V1 <- 1-bpp.coefs.age$V1
bpp.coefs.age <- rename(bpp.coefs.age,c("V1"="NL"))
bpp.coefs.age.ca$V1 <- 1-bpp.coefs.age.ca$V1
bpp.coefs.age.ko$ko <- 1-bpp.coefs.age.ko$V1
bpp.coefs.age.ko$V1 <- NULL

plot.by.age <- merge(bpp.coefs.age,bpp.coefs.age.ca,by="age")
plot.by.age <- merge(plot.by.age,bpp.coefs.age.ko,by="age")

plot.by.age <- melt(plot.by.age,id="age")

plot.by.age$variable <- factor(plot.by.age$variable)

plot.by.age$variable <- mapvalues(plot.by.age$variable,from=c("NL","V1","ko"),
                                  to=c("NL process","Canonical","Normal, age-dependent"))

# This is Figure 11

y <- ggplot(plot.by.age,aes(x=age,y=value,colour=variable,group=variable,linetype=variable)) +
  scale_x_continuous(name="Age",expand=c(0,0)) + scale_y_continuous(name="BPP coefficients") +
  theme_light() + geom_line(size=0.3) +
  scale_color_manual(values=c("#000000","#000000","#000000")) +
  scale_linetype_manual(values=c("solid","dashed","solid"))+
  theme(axis.text=element_text(size=17,face="bold"),
        axis.title=element_text(size=17,face="bold"),
        legend.position="bottom",legend.title=element_blank(),
        legend.text=element_text(size=17))+
  theme(plot.margin=unit(c(0.25,0.4,0.25,0.25),"cm"))

y <- y + geom_point(data=(subset(plot.by.age,variable=="Normal, age-dependent")),size=0.75)+
  guides(fill = guide_legend(override.aes = list(linetype = 0, shape=''))
         , colour = guide_legend(override.aes = list(linetype=c(1,2,1)
                                                     , shape=c(NA,NA,16))))

ggsave(filename="f_bpp_abb_byage_ko_bn.pdf",plot=y,width=8,height=6)