#THIS CODE

library(data.table)
library(ggplot2)
library(Hmisc)
library(readstata13)
library(plyr)
library(psych)
library(R.matlab)
library(statar)
library(tidyr)
library(splitstackshape)

#Directory where PSID is stored

source('functions/gon_graphs_export.R')

binning <- cumsum(c(rep(0.05,2),rep(0.1,8),rep(0.05,2)))
min.age <- 25
folder <- "Outputs/"

sdlim <- c(0,1)
sklim <- c(-8,2)
ktlim <- c(0,126)
cslim <- c(0,26)
kelim <- c(-0.65,0.6)
plim <- c(0.5,1)

# Data from Guvenen, Karahan, Ozkan and Song
gkos <- as.data.table(read.table("Inputs/age_oneyear_moments.csv",sep=",",as.is=T,header=T))

# NOW for the new definitions (strictly labour earnings, based on W2-Guvenen stuff)

dem <- dam.du
dem <- rename(dem,c("Age"="age"))

# Equivalize HWTaxableIncome and ry via regression

dem <- subset(dem,LaborEarningsHead>1518)
dem <- subset(dem,pretaxincome>0)
dem <- subset(dem,ry>1518)
dem <- subset(dem,age>min.age)
dem <- subset(dem,age<61)

dem$loghead <- log(dem$LaborEarningsHead)
dem$loglab <- log(dem$labtotal)
dem$logy <- log(dem$ry)

regrea <- lm(loghead ~ factor(year) + factor(age), data=dem)
regre0 <- lm(loglab ~ factor(year) + factor(NumberPeople) + factor(age),data=dem)
regre2 <- lm(logy ~ factor(year) + factor(NumberPeople) + factor(age) ,data=dem)

dem2 <- subset(dem,Sex==1)

regre22 <- lm(loghead ~ factor(year),data=dem2)

dem$head <- residuals(regrea)
dem$lab <- residuals(regre0)
dem$post <- residuals(regre2)
dem2$head <- residuals(regre22)

# Now need to compute moments for all of these ry. variables

new.head <- subset(dem,select=c("personid","age","head"))
new.lab <- subset(dem,select=c("personid","age","lab"))
new.post <- subset(dem,select=c("personid","age","post"))
new.male <- subset(dem2,select=c("personid","age","head"))

# And go:

new.head <- rename(new.head,c("head"="ry"))
setkeyv(new.head,c("personid","age"))
new.head[, ny:= shift(ry,type="lead"),by="personid"]

new.male <- rename(new.male,c("head"="ry"))
setkeyv(new.male,c("personid","age"))
new.male[, ny:= shift(ry,type="lead"),by="personid"]

new.lab <- rename(new.lab,c("lab"="ry"))
setkeyv(new.lab,c("personid","age"))
new.lab[, ny:= shift(ry,type="lead"),by="personid"]

new.post <- rename(new.post,c("post"="ry"))
setkeyv(new.post,c("personid","age"))
new.post[, ny:= shift(ry,type="lead"),by="personid"]


types <- c("new.head","new.lab","new.post","new.male")

for (i in seq(1,4)) {

da <- get(types[i])

da <- subset(da,is.na(ny)==0)

setkeyv(da,c("age","personid"))
eje <- da[,xtile(ry,probs=binning),by=age]
da$pc <- eje$V1

setkeyv(da,c("age","personid"))
eje <- da[,xtile(ny,probs=binning),by=age]
da$npc <- eje$V1

persbyage <- da[,cov(ny,ry)/var(ry),by=age]
setkeyv(persbyage,"age")

aa <- da[,mean(ry),by=age]
setkeyv(aa,"age")

bb <- da[,sd(ry),by=age]
setkeyv(bb,"age")

da$skew <- ((da$ry - aa$V1[da$age-min.age+1])/bb$V1[da$age-min.age+1])^3

cc <- da[,mean(skew),by=age]

da$kurt <- ((da$ry - aa$V1[da$age-min.age+1])/bb$V1[da$age-min.age+1])^4

dd <- da[,mean(kurt),by=age]

da$skew <- NULL
da$kurt <- NULL


#Computing condmoments

da$agroup=0
da <- subset(da,age>26)
da <- subset(da,age<61)
da$agroup[da$age>26 & da$age<35] <- 1
da$agroup[da$age>34 & da$age<45] <- 2
da$agroup[da$age>44 & da$age<55] <- 3
da$agroup[da$age>54] <- 4

da <-subset(da,agroup<4)


da$ng <- da$ny - da$ry

da$agroup <- factor(da$agroup)
da$agroup <- mapvalues(da$agroup,from=seq(1,3),
                       to=c("25-34","35-44","45-54"))

da$pc <- mapvalues(da$pc,from=seq(1,length(binning)),
                   to=round(binning*100))


ee <- da[,mean(ng),by=c("agroup","pc")]
setkeyv(ee,c("agroup","pc"))

ff <- da[,sd(ng),by=c("agroup","pc")]

ffage <- da[,var(ng),by=age]

setkeyv(da,c("agroup","pc"))
setkeyv(ee,c("agroup","pc"))
ee2 <- ee
ee2$condmean <- ee2$V1
ee2$V1 <- NULL
dao <- merge(da,ee2)

setkeyv(ff,c("agroup","pc"))
ff2 <- ff
ff2$condsd <- ff2$V1
ff2$V1 <- NULL
dao <- merge(dao,ff2)

dao$skew <- ((dao$ng - dao$condmean)/dao$condsd)^3
dao$kurt <- ((dao$ng - dao$condmean)/dao$condsd)^4

gg <- dao[,mean(skew),by=c("agroup","pc")]
setkeyv(gg,c("agroup","pc"))

hh <- dao[,mean(kurt),by=c("agroup","pc")]
setkeyv(hh,c("agroup","pc"))

# P90 P10 measure of standard deviation

ii <- da[,list(p025=quantile(ng,0.025),p10=quantile(ng,0.1),
               p25=quantile(ng,0.25),p50=quantile(ng,0.5),p75=quantile(ng,0.75),
               p90=quantile(ng,0.9),p975=quantile(ng,0.975)),by=c("agroup","pc")]

ii$p9010 <- ii$p90-ii$p10
ii$kelley <- ((ii$p90-ii$p50)-(ii$p50-ii$p10))/(ii$p90-ii$p10)
ii$kelleynum <- (ii$p90-ii$p50)
ii$kelleyden <- (ii$p50-ii$p10)
ii$crowsid <- (ii$p975-ii$p025)/(ii$p75-ii$p25)


# Now gather what I need

assign(paste("persbyage.",types[i],sep=""),persbyage)
assign(paste("ee.",types[i],sep=""),ee)
assign(paste("ff.",types[i],sep=""),ff)
assign(paste("gg.",types[i],sep=""),gg)
assign(paste("hh.",types[i],sep=""),hh)
assign(paste("ii.",types[i],sep=""),ii)

}

gkos$agemedia <- 0
gkos$agemedia[gkos$Age.group==1 | gkos$Age.group ==2] <-1
gkos$agemedia[gkos$Age.group==3 | gkos$Age.group==4] <- 2
gkos$agemedia[gkos$Age.group==5 | gkos$Age.group==6] <-3

gkos$agemedia <- as.factor(gkos$agemedia)
gkos$agemedia <- mapvalues(gkos$agemedia,from=c("1","2","3"),to=
                             c("25-34","35-44","45-54"))


gkosm <- gkos[,mean(Standard.deviation),by=c("RE.pctile","agemedia")]
gkosm2 <- gkos[,mean(Skewness),by=c("RE.pctile","agemedia")]
gkosm3 <- gkos[,mean(Kurtosis),by=c("RE.pctile","agemedia")]
gkosm4 <- gkos[,mean(Kelley),by=c("RE.pctile","agemedia")]
gkosm5 <- gkos[,mean(Crows),by=c("RE.pctile","agemedia")]



# Figure 1

gkosm.sd <- cond.age.moments(gkosm,"RE.pctile","V1","agemedia",
                             "Percentile of previous earnings",
                             "Std dev of log earnings change","f_gkos_moments_sd.pdf",
                             c(0,1),7,NULL,26,NULL,1.5)
gkos.sk <- cond.age.moments(gkosm2,"RE.pctile","V1","agemedia",
                            "Percentile of previous earnings",
                            "Skewness of log earnings change","f_gkos_moments_sk.pdf",
                            c(-6,1),7,NULL,26,NULL,1.5)
gkos.kt <- cond.age.moments(gkosm3,"RE.pctile","V1","agemedia",
                            "Percentile of previous earnings",
                            "Kurtosis of log earnings change","f_gkos_moments_kt.pdf",
                            c(0,60),8,NULL,26,1.1,1.5)
male.sd <- cond.age.moments(ff.new.male,"pc","V1","agroup","Percentile of previous earnings",
                            "Std dev of log earnings change","f_psid_sd_male.pdf",c(0,1),7,NULL,26,NULL,1.5)
male.sk <- cond.age.moments(gg.new.male,"pc","V1","agroup","Percentile of previous earnings",
                            "Skewness of log earnings change","f_psid_sk_male.pdf",c(-6,1),7,NULL,26,NULL,1.5)
male.kt <- cond.age.moments(hh.new.male,"pc","V1","agroup","Percentile of previous earnings",
                            "Kurtosis of log earnings change","f_psid_kt_male.pdf",c(0,60),8,NULL,26,1.1,1.5)
post.sd <- cond.age.moments(ff.new.post,"pc","V1","agroup","Percentile of previous earnings",
                        "Std dev of log earnings change","f_psid_sd_post.pdf",c(0,1),7,NULL,26,NULL,1.5)
post.sk <- cond.age.moments(gg.new.post,"pc","V1","agroup","Percentile of previous earnings",
                            "Skewness of log earnings change","f_psid_sk_post.pdf",c(-6,1),7,NULL,26,NULL,1.5)
post.kt <- cond.age.moments(hh.new.post,"pc","V1","agroup","Percentile of previous earnings",
                            "Kurtosis of log earnings change","f_psid_kt_post.pdf",c(0,60),8,NULL,26,1.1,1.5)

# Figure C.1.

gkos.kt <- cond.age.moments(gkosm4,"RE.pctile","V1","agemedia",
                            "Percentile of previous earnings",
                            "Kelley's skewness","f_gkos_moments_ke.pdf",
                            c(-0.6,0.4),7,NULL,26,NULL,1.5)
gkos.cs <- cond.age.moments(gkosm5,"RE.pctile","V1","agemedia",
                            "Percentile of previous earnings",
                            "Crow-Siddiqui kurtosis","f_gkos_moments_cs.pdf",
                            c(3,17.5),8,NULL,26,4.5,1.5)
male.ke <- cond.age.moments(ii.new.male,"pc","kelley","agroup","Percentile of previous earnings",
                            "Kelley's skewness","f_psid_ke_male.pdf",c(-0.6,0.4),7,NULL,26,NULL,1.5)
male.cs <- cond.age.moments(ii.new.male,"pc","crowsid","agroup","Percentile of previous earnings",
                            "Crow-Siddiqui kurtosis","f_psid_cs_male.pdf",c(3,17.5),8,NULL,26,4.5,1.5)
post.ke <- cond.age.moments(ii.new.post,"pc","kelley","agroup","Percentile of previous earnings",
                            "Kelley's skewness","f_psid_ke_post.pdf",c(-0.6,0.4),7,NULL,26,NULL,1.5)
post.cs <- cond.age.moments(ii.new.post,"pc","crowsid","agroup","Percentile of previous earnings",
                            "Crow-Siddiqui kurtosis","f_psid_cs_post.pdf",c(3,17.5),8,NULL,26,4.5,1.5)


