setwd(main_path)

## Import relevant libraries
##
library(tidyverse)
library(haven)
library(stargazer)
library(kableExtra)
##

#Store Environment
to_keep <- ls()
start_time <- Sys.time()
file_name<- c('dtafiles/P52_5_Clusters.dta')

df <- read_dta(file_name) %>% 
  filter(!is.na(cluster_52_frailty_bl), agey_e>=Initial_Age, agey_e<= Final_Age) %>%
  group_by(rahhidpn) %>%
  mutate(hhearn = ifelse(is.na(iearnspouse), iearn, iearn+iearnspouse), 
         hcpl = ifelse(Dead==1, ifelse(is.na(remstat), lag(hcpl), as.numeric(remstat<=3)), hcpl),
         meanfrail = mean(frailty_bl), sdfrail = sd(frailty_bl), meanshlt = mean(shlt), Die = max(Dead, na.rm = TRUE),
         more_hs=ifelse(raedyrs>12,1,0)) %>%
  filter(row_number()==1) %>%
  ungroup()
  
  
df <- df %>%  select(c("cluster_52_frailty_bl", "raracem", "vigact", "ragender", "raedyrs", "smokev", "hcpl", "iearn", "hhearn", "meanfrail", "sdfrail", "meanshlt", "Die", "frailty_bl", "shlt","higov","govmr","govmd","prpcnt","more_hs")) %>% 
  mutate(Woman = ragender-1, Black = as.numeric(raracem==2),
         priv_cov=ifelse(prpcnt>0,1,0),
         any_cov= ifelse(priv_cov==1 |higov==1,1,0)) %>% 
  select(-c("raracem", "ragender"))

df1 <- df %>% filter(cluster_52_frailty_bl==1)
df2 <- df %>% filter(cluster_52_frailty_bl==2)
df3 <- df %>% filter(cluster_52_frailty_bl==3)
df4 <- df %>% filter(cluster_52_frailty_bl==4)
df5 <- df %>% filter(cluster_52_frailty_bl==5)
df6 <- df 

## Generate Table 4:
# Table 4 header

column <- function(df) {
  return(c(length(df$Woman)/4663)
  )
}

v1 = list(df6,df1, df2, df3, df4, df5)
tab = lapply(v1, column)
tab_h <- cbind(tab[[1]], tab[[2]], tab[[3]], tab[[4]], tab[[5]] , tab[[6]])
rownames(tab_h) <- c("Fraction of people")
colnames(tab_h) <- c("All sample","Type 1", "Type 2", "Type 3", "Type 4", "Type 5")
stargazer(tab_h, summary = F, out = "output/Part2_output/Part2_d_cluster_statistics/tab4-header.tex", digits=2)


## Tab 4-a: 
## Mean_frailty_clustering_period
## Fraction Dead by 60
## Overall Fraction of the population
column <- function(df) {
  return(c(round(mean(df$meanfrail*NaN, na.rm = TRUE),2),
           round(mean(df$meanfrail, na.rm = TRUE),2),
           round(mean(df$meanfrail*35, na.rm = TRUE),2),
           round(mean(df$Die, na.rm = TRUE),2)
           )
  )
}

v1 = list(df6,df1, df2, df3, df4, df5)
tab = lapply(v1, column)
tab_a <- cbind(tab[[1]], tab[[2]], tab[[3]], tab[[4]], tab[[5]] , tab[[6]])
rownames(tab_a) <- c("Health outcomes during clustering period","Average frailty","Average health deficit", "Fraction dead by 60")
colnames(tab_a) <- c("All sample","Type 1", "Type 2", "Type 3", "Type 4", "Type 5")
stargazer(tab_a, summary = F, out = "output/Part2_output/Part2_d_cluster_statistics/tab4-a.tex", digits=2)


## Tab 4-b: Health at 52
## Average frailty
## Average health deficit
## Average SRHS
## Std. Dev of frailty

column <- function(df) {
  return(c(round(mean(df$meanfrail*NaN, na.rm = TRUE),2),
           round(mean(df$frailty_bl, na.rm = TRUE),2),
           round(mean(df$frailty_bl*35, na.rm = TRUE),1),
           round(mean(df$shlt, na.rm = TRUE),2),
           round(sd(df$frailty_bl, na.rm = TRUE),2)))
}
v1 = list(df6,df1, df2, df3, df4, df5)
tab = lapply(v1, column)
tab_b <- cbind(tab[[1]], tab[[2]], tab[[3]], tab[[4]], tab[[5]] , tab[[6]])
rownames(tab_b) <- c("Health at 52","Average frailty", "Average health deficit","Average SRHS","Std. Dev. of frailty")
colnames(tab_b) <- c("All sample","Type 1", "Type 2", "Type 3", "Type 4", "Type 5")
stargazer(tab_b, summary = F, out = "output/Part2_output/Part2_d_cluster_statistics/tab4-b.tex", digits=2)


## Tab 4-c:  Demographics
## Fraction women
## Fraction black people
## Average years of education
## Fraction partnered at 52
## Average individual income at 52
## Average household  income at 52

column <- function(df) {
  return(c( round(mean(df$meanfrail*NaN, na.rm = TRUE),2),
            round(mean(df$Woman, na.rm = TRUE),2),
            round(mean(df$Black, na.rm = TRUE),2),
            round(mean(df$raedyrs, na.rm = TRUE),2),
            round(mean(df$hcpl, na.rm = TRUE),2),
            round(mean(df$iearn, na.rm = TRUE)),
            round(mean(df$hhearn, na.rm = TRUE))))
}
v1 = list(df6,df1, df2, df3, df4, df5)
tab = lapply(v1, column)
tab_c <- cbind(tab[[1]], tab[[2]], tab[[3]], tab[[4]], tab[[5]] , tab[[6]])
rownames(tab_c) <- c("Demographics","Fraction women", "Fraction black people", "Mean years of education", "Fraction partnered at 52", "Mean individual income at 52", "Mean household income at 52")
colnames(tab_c) <- c("All sample","Type 1", "Type 2", "Type 3", "Type 4", "Type 5")
stargazer(tab_c, summary = F, out = "output/Part2_output/Part2_d_cluster_statistics/tab4-c.tex", digits=2)

## Tab 4-d:  Health behaviors
## "Fraction ever smoked", 
## "Fraction vigorous activity at 52"
column <- function(df) {
  return(c(            round(mean(df$meanfrail*NaN, na.rm = TRUE),2),
                       round(mean(df$smokev, na.rm = TRUE),2),
                       round(mean(df$vigact, na.rm = TRUE),2)))
}
v1 = list(df6,df1, df2, df3, df4, df5)
tab = lapply(v1, column)
tab_d <- cbind(tab[[1]], tab[[2]], tab[[3]], tab[[4]], tab[[5]] , tab[[6]])
rownames(tab_d) <- c( "Health behaviours","Fraction ever smoked", "Fraction vigorous activity at 52")
colnames(tab_d) <- c("All sample","Type 1", "Type 2", "Type 3", "Type 4", "Type 5")
stargazer(tab_d, summary = F, out = "output/Part2_output/Part2_d_cluster_statistics/tab4-d.tex", digits=2)


## Tab 4-e:  Health insurance status
## Private health insurance at 52, 
## Public health insurance at 52,
## Medicaid,
## Medicare,
## Uninsured
column <- function(df){
  return(c(             round(mean(df$meanfrail*NaN, na.rm = TRUE),2),
                        round(mean(df$priv_cov, na.rm = TRUE),2),
                        round(mean(df$higov, na.rm = TRUE),2),
                        round(mean(df$govmd, na.rm = TRUE),2),
                        round(mean(df$govmr, na.rm = TRUE),2),
                        round(1-mean(df$any_cov, na.rm = TRUE),2)))
}

v1 = list(df6,df1, df2, df3, df4, df5)
tab = lapply(v1, column)
tab_e <- cbind(tab[[1]], tab[[2]], tab[[3]], tab[[4]], tab[[5]] , tab[[6]])
rownames(tab_e) <- c("Health insurance status","Private health insurance at 52","Public health insurance at 52","Medicaid","Medicare","Uninsured at 52")
colnames(tab_e) <- c("All sample","Type 1", "Type 2", "Type 3", "Type 4", "Type 5")
stargazer(tab_e, summary = F, out = "output/Part2_output/Part2_d_cluster_statistics/tab4-e.tex", digits=2)

# Concatenate vertically
merged_matrix <- do.call(rbind, list(tab_h, tab_a,tab_b,tab_c,tab_d,tab_e))

# Pass the matrix to stargazer
stargazer(
  merged_matrix, 
  summary = FALSE, 
  out = "output/Part2_output/Part2_d_cluster_statistics/tab4.tex", 
  digits = 2
)

end_time <- Sys.time()
runtime = end_time-start_time
print(runtime)

#Clear enviroment
rm(list = setdiff(ls(), c(to_keep)))
