
setwd(main_path)


## Import relevant libraries
library(tidyverse)
library(haven)
library(stats)
library(gcookbook)
library(ggplot2)
library(plot.matrix)
library(imguR)
library(directlabels)
library(ggrepel)
library(openxlsx)
library(knitr)
library(kableExtra)
library(ggplot2)
library(gridExtra)


#Store Environment
to_keep <- ls()
start_time <- Sys.time()
file_name<- c('dtafiles/P52_5_Clusters.dta')



df5 <- read_dta(file_name) %>% filter(!is.na(cluster_52_frailty_bl)) %>% filter(agey_e>=Initial_Age) %>% 
  arrange(rahhidpn,year) %>% group_by(rahhidpn) %>%
  mutate(posit=row_number())

df5<-df5 %>% arrange(rahhidpn, year) %>% group_by(rahhidpn) %>% mutate(n_def = NumObservedDeficits*frailty_bl) %>%
  select(rahhidpn, year,n_def,frailty_bl,NumObservedDeficits,Dead) %>% mutate(n_def= ifelse(Dead==1,NA,round(n_def))) %>% filter((!is.na(frailty_bl))) 


################################################################################
# Appendix B: Table 10
################################################################################

freq_table <- table(df5$n_def)

# Calculate the percentage for each value of n_def
percentage <- prop.table(freq_table) * 100

# Calculate the accumulated percentage
accumulated_percentage <- cumsum(percentage)

# Combine the frequency table, percentage, and accumulated percentage
result <- data.frame(
  n_def = as.numeric(names(freq_table)),
  frequency = as.numeric(freq_table),
  percentage = round(as.numeric(percentage),2),
  accumulated_percentage = round(as.numeric(accumulated_percentage),2)
)

result <- result %>% mutate(ave_frailty = round(n_def/35,2)) %>%
  select(n_def,ave_frailty,frequency,percentage,accumulated_percentage) %>%
  rename(`Number of Deficits` = n_def,
         `Average Frailty`=ave_frailty,
         `Freq.`=frequency,
         `Percent.`= percentage,
         `Cumul Percent.`=accumulated_percentage) 

total_number_of_observations <-sum(freq_table)

latex_table <- kable(
  result, 
  "latex", 
  booktabs = TRUE,  # Use booktabs for clean formatting
  align = c("c", "c", "c", "c", "c"), 
  row.names = FALSE
) %>%
  kable_styling(
    latex_options = c("hold_position"), 
    full_width = FALSE,
    stripe_color = NULL  # Prevent striped rows
  )

# Post-process the LaTeX output to remove \addlinespace
latex_table_cleaned <- gsub("\\\\addlinespace", "", latex_table)

print(latex_table)

writeLines(latex_table, "output/Part2_output/Part2_i_frailty_distribution/tab10.tex")

################################################################################
# Figure 1   
################################################################################

df_health_shock <-df5 %>% arrange(rahhidpn, year) %>%  group_by(rahhidpn) %>% 
  mutate(frailty_2s=n_def/35) %>%
  mutate(n_def2=ifelse(Dead==0,frailty_2s,
                       ifelse(frailty_bl==1,1,NA))) %>%
  mutate(health_shock_with_death=n_def2-lag(n_def2,1),
         health_shock_alive = frailty_2s-lag(frailty_2s,1),
         death_shock=ifelse(is.na(health_shock_alive),
                            ifelse(health_shock_with_death>0,
                                   health_shock_with_death,NA),NA),
         F_dead =lead(Dead,1),
         F2_dead=lead(Dead,2),
         F3_dead=lead(Dead,3),
         pre_death_shock=ifelse(F_dead==1 & Dead==0,health_shock_alive,NA),
         pre2_death_shock=ifelse(F2_dead==1 & F_dead==0 & Dead==0,health_shock_alive,NA),
         pre3_death_shock=ifelse(F3_dead==1 & F2_dead==0 & F_dead==0 & Dead==0,health_shock_alive,NA))


data_to_plot<-df_health_shock %>% select(health_shock_alive,pre_death_shock,death_shock)

# Generate some data
stacked_data <- data_to_plot %>%
  pivot_longer(cols = c(health_shock_alive,pre_death_shock, death_shock), names_to = "shock_origin", values_to = "shock") %>%
  mutate(shock_origin = factor(shock_origin, levels = c("health_shock_alive", "pre_death_shock", "death_shock"))) %>%
  filter(!is.na(shock)) %>%
  mutate(shock_origin = recode(shock_origin, "health_shock_alive" = "Alive", "death_shock" = "Dead this period","pre_death_shock" = "Dead next period"))


# Generate some tables
table <- stacked_data %>%
  group_by(shock_origin) %>%
  summarize(
    Mean = round(mean(shock),2),
    SD = round(sd(shock),2),
    P1 = round(quantile(shock, 0.01),2),
    P50 = round(quantile(shock, 0.5),2),
    P99 = round(quantile(shock, 0.99),2)
  ) %>% rename(`Shock` = shock_origin) %>% select(-Shock)


# Create the table with smaller text
small_table <- tableGrob(
  table, 
  rows = NULL, 
  theme = ttheme_minimal(
    core = list(fg_params = list(cex = 1.2)),  # Reduce font size in the table body
    colhead = list(fg_params = list(cex = 1.2))  # Reduce font size in the column headers
  )
)

# Define colors for each type of shock
shock_colors <- c("Alive" = "red","Dead next period" = "green", "Dead this period" = "blue")


plot <- ggplot(stacked_data, aes(shock, fill = shock_origin)) + 
  geom_histogram(alpha = 0.50, aes(y = ..density..), position = 'identity') +
  scale_fill_manual(values = shock_colors) +  # Specify manual colors
  labs(
    x = expression(bold(Delta ~ "Frailty")),  # Bold Delta symbol for x-axis
    y = "Density",  # Regular label for y-axis
    fill = expression(bold(Delta ~ "Frailty"))  # Bold Delta symbol for legend title
  ) +
  theme_minimal() +
  theme(
    panel.background = element_rect(fill = "white"),
    axis.text = element_text(size = 20, face = "bold"),
    axis.title = element_text(size = 22, face = "bold"),
    legend.title = element_text(size = 15, face = "bold"),  # Smaller title font
    legend.title.align = 0.5,  # Center the legend title
    legend.text = element_text(size = 14),  # Smaller text font
    legend.position = c(0.45, 0.75),
    legend.spacing.y = unit(0.4, "cm")
  ) +
  annotation_custom(small_table, xmin = 25/35, xmax = 30/35, ymin = 0.2*35, ymax = 0.257*35)

# Print the plot
print(plot)

# Save the plot as a PDF
ggsave(
  filename = "fig1.pdf",  # File name
  plot = plot,            # Plot object to save
  device = "pdf",         # File format
  path = "output/Part2_output/Part2_i_frailty_distribution/",  # Directory to save the file
  dpi = 300,              # Resolution in dots per inch
  width = 8,              # Width in inches
  height = 8              # Height in inches
)

# Conditional EPS export
if (eps == 1) {
  ggsave(
    filename = "fig1.eps",  # File name
    plot = plot,            # Plot object to save
    device = cairo_ps,         # File format
    path = "output/Part2_output/Part2_i_frailty_distribution/",  # Directory to save the file
    dpi = 300,              # Resolution in dots per inch
    width = 8,              # Width in inches
    height = 8              # Height in inches
  )
}


end_time <- Sys.time()
runtime <- end_time-start_time
print(runtime)


#Clear enviroment
rm(list = setdiff(ls(), c(to_keep)))

