## This code computes the state spaces and transition matrices for both persistent
## and transitory components

###### INPUTS ######

# Set working directory

setwd("C:/Google Drive/Research/DFP16/Submissions/Codes/JEEA/Earnings/Outputs/")

# The sizes for the bins (the length of these vectors is the number of bins)
# They must add up to one

bin.sizes.eta <- c(rep(0.02,5),rep(0.1,8),rep(0.02,5))
bin.sizes.eps <- c(0.025,0.025,0.05,0.4,0.4,0.05,0.025,0.025)

# The earnings data

#eta <- as.data.table(cbind(id=seq(1,3600),age=rep(c(25,60),100),ry=rnorm(3600),ny=rnorm(3600)))
#eps <- as.data.table(cbind(id=seq(1,3600),age=rep(c(25,60),100),ry=rnorm(3600)))

eta <- as.data.table(read.table("eta_sample.txt",sep=","))
eps <- as.data.table(read.table("eps_sample.txt",sep=","))

names(eta) <- c("id","age","ry","ny")
names(eps) <- c("id","age","ry","ny")

eta <- subset(eta,age>24 & age<61)
eps <- subset(eps,age<25 & age<61)

# This should be replaced by loading the appropriate simulated samples for the
# persistent (eta) and transitory (eps) components, and naming or renaming
# the columns such that they are id (household identifier), age, ry (current log
# earnings) and ny (log earnings in t+1)

# For the transitory component ny is not required because no transition matrix is
# computed

# If there is no persistent-transitory decomposition, just load the earnings data
# to eta, keep eps random and discard the eps results


###### Computations ######

# Load some libraries

library(plyr)
library(data.table)

# Add a zero to the binning vectors
# This is necessary to use .bincode and group earnings in
# bins
  
binning <- cumsum(c(0,bin.sizes.eta))
eps.probs <- cumsum(c(0,bin.sizes.eps))

# Create a variable pc that indicates, for each observation of current earnings,
# the corresponding bin.
# npc is the equivalent for next period's earnings

eta[, pc:=.bincode(ry,breaks=quantile(ry,probs=binning),include.lowest=TRUE),by=age]
eta[, npc:=.bincode(ny,breaks=quantile(ny,probs=binning),include.lowest=TRUE),by=age]
eps[, pc:=.bincode(ry,breaks=quantile(ry,probs=eps.probs),include.lowest=TRUE),by=age]

# Export state spaces, which are just the median earnings for each bin at each age
  
dfp_vector <- eta[,median(ry),by=list(age,pc)]
setkeyv(dfp_vector,c("age","pc"))
fname <- "eta_space.csv"
write.csv(dfp_vector,file=fname,row.names=F,col.names=F)
  
eps_vector <- eps[,median(ry),by=list(age,pc)]
setkeyv(eps_vector,c("age","pc"))
fname2 <- "eps_space.csv"
write.csv(eps_vector,file=fname2,row.names=F,col.names=F)

# Export transition matrices.
# These are counts of observations that move from a certain bin (age,pc) to a certain npc
# in the following age, divided by the number of observations in (age,pc)
  
totr <- subset(eta,select=c("id","age","pc","npc"))
# count how many observations move from a given pc to another npc
totr <- totr[,list(ct=length(id)),keyby=c("age","pc","npc")]
# expand the grid so that we also have zeros for the cases in which nobody moves
# from a certain bin to another specific bin
dat2 <- with(totr,expand.grid(age=min(age):max(age),pc=1:max(pc),npc=1:max(npc)))
totr <- merge(totr,dat2,all.y=TRUE)
totr$ct[is.na(totr$ct)] <- 0
# find total number of individuals in each bin (age,pc)
dat3 <- totr[,sum(ct),by=c("age","pc")]
totr <- merge(totr,dat3)
# divide to find probability of transitioning
totr$ct <- totr$ct/totr$V1
totr$V1 <- NULL
# export matrix
fname3 <- "eta_tranmatrices.csv"
write.csv(totr,file=fname3,row.names=F,col.names=F)
