## Part 2 a: Clustering  
# Get the clustering sample, perform k-means, perform Elbow and Silhouette analysis,
# and generate Tables 8 and 9
setwd(code_path_p2)
source("Part2_a_clustering.R")

# Description: 
# This file takes clean data panel from Stata as an input, clusters the data 
# by frailty between the ages of 52-53 and 60-61, and outputs the clustered data for
# each number of clusters ranging from 2-10.
# Output:
# 1) Databases PS_"N"_cluster, with cluster assignments, where N is the number of cluster considered
#  output: /dtafile
# 2) Database data_clustering: wide format clustering sample. Output: /dtafile
# 3) Appendix A: Tables 8 and 9. Output: /output/Part2_output/Part2_a_clustering
# 4) Appendix C: Elbow and Silhouette analysis. Output: /output/Part2_output/Part2_a_clustering




## Part 2 b: Health dynamics by cluster
setwd(code_path_p2)
source("Part2_b_graphs.R")
# Description: 
# This file takes the clustered data panel and produces the following graphs:
# Output:
# 1) Figure 2-1: Mean frailty by cluster and age
# 2) Figure 2-2: Mean frailty conditional on being alive by cluster and age
# 3) Figure 2-3: Fraction Dead by cluster and age
# 4) Appendix E: Figures 12-1,12-2,12-3
# 5) Appendix E: Figures 13-1,13-2,13-3
# 6) Appendix F: Figures 14-1,14-2,14-3
# 7) Appendix F: Figures 15-1,15-2,15-3
# Output: /output/Part2_output/Part2_b_graphs


## Part 2 c: Cross-validation
runcode<-1
if (runcode==1){
  setwd(code_path_p2)
  source("Part2_c_number_cluster.R")
}
# Description: 
# Produces cross-validated graphs of the predictive power of health types for 
# future frailty and mortality against the number of clusters used.
# Output:
# 1) Generate .csv with the output from the cross validation exercise 
#    for each specified model
# 2) Appendix C: Figure 7 and 8
# Output: /output/Part2_output/Part2_c_number_cluster



## Part 2 d: Health type and observable characteristics
setwd(code_path_p2)
source("Part2_d_cluster_statistics.R")
# Description: 
# This file takes the clustered data panel with 5 clusters as an input,
# and produce Tables with average statistics by cluster
# Output:
# 1) Table 4: Includes 6 sub-tables used to generate Table 4 (tab4-header,tab4-a,
#            tab4-b,tab4-c,tab4-d,tab4-e), and Table 4(tab4)
# Output: /output/Part2_output/Part2_d_cluster_statistics


## Part 2 e: Individual frailty dynamics (Heat maps)
setwd(code_path_p2)
source("Part2_e_heatmaps.R")
# Description: 
# This file takes the clustered data panel with 5 clusters as an input,
# and produce plots for individual health dynamics with-in cluster (Heat maps) 
# Output:
# 1) Figure 3: Individual frailty dynamics (Heat maps)
# Output: /output/Part2_output/Part2_e_heatmaps


## Part 2 f: Deficits prevalence analysis
setwd(code_path_p2)
source("Part2_f_deficit_prevalence.R")

# Description: 
# This file takes the clustered data panel with 5 clusters as an input,
# and produce tables of deficits by cluster at age 52-53 

# Output: Appendix G: Table 15
# Output: output/Part2_output/Part2_f_deficit_prevalence



## Part 2 g: Out-of-sample predictive exercise
setwd(code_path_p2)
source("Part2_g_predictive_exc.R")

# Description: 
# This file takes the clustered data panel, and runs outsample focasting exercise
# Output:
# 1) Table 5: 
# 2) Appendix G: Tables 12 and 13
# 3) Appendix G: Figures 10 and 11
# Output: /output/Part2_output/Part2_g_predictive_exc



## Part 2 h: Cause of death
setwd(code_path_p2)
source("Part2_h_cause_of_death.R")

# Description: 
# This file takes the clustered data panel, and examines the cause of death among those who die in the sample.
# Output:
# 1) An excel file containing data on the cause of death for individuals in the sample.
# Output: /output/Part2_output/Part2_h_cause_of_death/Cause_of_death.xlsx
# Note: Tables 3 and 11 are produced based on this excel file. 
# See "/output/Part2_output/Part2_h_cause_of_death/Cause_of_death_tables.xlsx"



## Part 2 i: Distribution of frailty
setwd(code_path_p2)
source("Part2_i_frailty_distribution.R")
# Description: 
# This file takes the clustered data panel, and examines the distributions of frailty and changes in frailty
# Output:
# 1) Appendix B: Table 10
# 2) Figure 1 
# Output: /output/Part2_output/Part2_i_frailty_distribution


## Part 2 j: Predicting health types
setwd(code_path_p2)
source("Part2_j_mlogit.R")
# Description: 
# This code estimate a multinomial logistic regression of health types against
# different individual characteristic at age 52-53
# Output:
# 1) Excel file with the results of those regressions models
# 2) Table 5
# 3) Appendix B: Table 14
# Output is stored in /output/Part2_output/Part2_j_mlogit

