
setwd(main_path)

## Import relevant libraries
library(tidyverse)
library(haven)
library(stargazer)
library(xtable)
##


#Store Environment
to_keep <- ls()

file_name<- c('dtafiles/P52_5_Clusters.dta')


df <- read_dta(file_name) %>% 
  filter(!is.na(cluster_52_frailty_bl), agey_e>=Initial_Age)


meanNA <- function(x){return(mean(x, na.rm = T))}

dictionary <- c("hosp" = "Hospital", "nrshom" = "Nursing Home", "hibpe" = "Hypertension",
                "diabe" = "Diabetes", "cancre" = "Cancer", "lunge" = "Lung Disease",
                "hearte" = "Heart Disease", "stroke" = "Stroke", "psyche" = "Psych Issues",
                "arthre" = "Arthritis", "bmigte30" = "Obesity", "walkra" = "Walk One Room",
                "batha" = "Bathing", "dressa" = "Dressing", "eata" = "Eating", 
                "beda" = "Leaving Bed", "toilta", "Using Toilet",
                "phonea" = "Using Phone", "medsa" = "Medication",
                "moneya" = "Using Money", "shopa" = "Buying Groceries",
                "mealsa" = "Prep Meals", "mapa" = "Using Maps",
                "walksa" = "Walk 2+ Blocks", "chaira" = "Leaving Chair",
                "clim1a" = "1 Flight Stairs", "stoopa" = "Crouching",
                "lifta" = "Lifting 10lb", "dimea" = "Picking Up Dime",
                "armsa" = "Raising Arms", "pusha" = "Push Large Object",
                "walk1a" = "Walk 1 Block", "climsa" = "2+ Flights Stairs",
                "sita" = "Sitting 2+ hrs", "smokev" = "Ever Smoke")

dfmain <- df %>% group_by(rahhidpn) %>%
  filter(!is.na(cluster_52_frailty_bl), agey_e>=Initial_Age, !is.na(frailty_bl)) %>%
  filter(row_number()==1) %>%
  ungroup() %>% 
  select("hosp", "nrshom", "hibpe", "diabe", "cancre", "lunge", 
         "hearte", "stroke", "psyche", "arthre", "bmigte30", "walkra",
         "batha", "dressa", "eata", "beda", "toilta", "phonea",
         "medsa", "moneya", "shopa", "mealsa", "mapa", "walksa",
         "chaira", "clim1a", "stoopa", "lifta", "dimea", "armsa",
         "pusha", "walk1a", "climsa", "sita", "smokev", "cluster_52_frailty_bl") %>%
  group_by(cluster_52_frailty_bl) %>%
  transmute(across(.cols = everything(),
                   .fns = list(meanNA),
                   .names = "{col}")) %>% unique() %>% arrange(cluster_52_frailty_bl)

dfmain2 <- df %>% 
  group_by(rahhidpn) %>%
  filter(!is.na(cluster_52_frailty_bl), agey_e >= Initial_Age, !is.na(frailty_bl)) %>%
  filter(row_number() == 1) %>%
  ungroup() %>% 
  select("hosp", "nrshom", "hibpe", "diabe", "cancre", "lunge", 
         "hearte", "stroke", "psyche", "arthre", "bmigte30", "walkra",
         "batha", "dressa", "eata", "beda", "toilta", "phonea",
         "medsa", "moneya", "shopa", "mealsa", "mapa", "walksa",
         "chaira", "clim1a", "stoopa", "lifta", "dimea", "armsa",
         "pusha", "walk1a", "climsa", "sita", "smokev", "cluster_52_frailty_bl") %>%
  group_by(cluster_52_frailty_bl) %>%
  summarise(across(everything(), list(meanNA), .names = "{col}"), n_obs = n()) %>%
  arrange(cluster_52_frailty_bl)

v_t_keep<- c('batha','dressa','eata','beda','toilta','walkra','walk1a','walksa',
             'shopa','phonea','moneya','mealsa','medsa','mapa',
             'clim1a','climsa','chaira','stoopa','lifta','armsa','dimea','pusha','sita',
             'hibpe','diabe','cancre','lunge', 'hearte','stroke','psyche','arthre',
             'hosp','nrshom',
             'bmigte30','smokev')

adl_index_var<-c('batha','dressa','eata','beda','toilta','walkra','walk1a','walksa')
iadl_index_var<-c('shopa','phonea','moneya','mealsa','medsa','mapa')
ofl_index_var<-c('clim1a','climsa','chaira','stoopa','lifta','armsa','dimea','pusha','sita')
diag_index_var<-c('hibpe','diabe','cancre','lunge', 'hearte','stroke','psyche','arthre')
hosp_index_var<-c('hosp','nrshom')
beh_index_var<-c('bmigte30','smokev')



calculate_tab_2 <- function(df, selection,name) {
  # Calculate mean for selected variables by row
  tab_1 <- data.frame(
    me = as.numeric(rowMeans(df[, selection], na.rm = TRUE)),
    ce = as.numeric(rowSums(!is.na(df[, selection]))),
    N = as.numeric(rowMeans(df[, "n_obs"]))
  )
  
  tab_1 <- tab_1 %>%
    mutate(
      N_deficit = me * ce,
      N_deficit_all = me * ce * N,
      N_total = sum(N),
      numerator = sum(N_deficit_all),
      contribution_def = N_deficit * N / N_total,
      total_def = sum(contribution_def),
      tot_N_deficit_all = sum(N_deficit_all),
      pop_ave = mean(tot_N_deficit_all)
    )
  
  a <- as.numeric(max(tab_1$total_def))
  
  tab_2 <- tab_1 %>% select(N_deficit)
  
  # Transpose tab_2 and assign column names
  colnames <- c("Type 1", "Type 2", "Type 3", "Type 4", "Type 5")
  tab_2 <- data.frame(t(tab_2))
  colnames(tab_2) <- colnames
  
  row_names <- name
  rownames(tab_2) <- row_names
  
  # Add a Total column
  tab_2$All_Sample <- a
  
  return(tab_2)
}

adl_index_var<-c('batha','dressa','eata','beda','toilta','walkra','walk1a','walksa')
adl_tab <- calculate_tab_2(dfmain2, adl_index_var,"ADLs")

iadl_index_var<-c('shopa','phonea','moneya','mealsa','medsa','mapa')
iadl_tab <- calculate_tab_2(dfmain2, iadl_index_var,"IADLs")

ofl_index_var<-c('clim1a','climsa','chaira','stoopa','lifta','armsa','dimea','pusha','sita')
ofl_tab <- calculate_tab_2(dfmain2, ofl_index_var,"Other functional lim")

diag_index_var<-c('hibpe','diabe','cancre','lunge', 'hearte','stroke','psyche','arthre')
diag_tab <- calculate_tab_2(dfmain2, diag_index_var,"Diagnoses")

hosp_index_var<-c('hosp','nrshom')
hosp_tab <- calculate_tab_2(dfmain2, hosp_index_var,"Health care utilization")

beh_index_var<-c('bmigte30','smokev')
beh_tab <- calculate_tab_2(dfmain2, beh_index_var,"Addictive")

total_tab<-calculate_tab_2(dfmain2, v_t_keep,"Deficits at 52")
total_tab_2 <- total_tab %>%
  mutate(Type_1_share = round((`Type 1` / sum(`Type 1`))*100,0),
         Type_2_share = round((`Type 2` / sum(`Type 2`))*100,0),
         Type_3_share = round((`Type 3` / sum(`Type 3`))*100,0),
         Type_4_share = round((`Type 4` / sum(`Type 4`))*100,0),
         Type_5_share = round((`Type 5` / sum(`Type 5`))*100,0),
         All_sample_share = round((All_Sample / sum(All_Sample))*100,0))
total_tab_2 <- total_tab_2 %>%
  select("All_sample_share","All_Sample","Type_1_share","Type 1","Type_2_share","Type 2","Type_3_share","Type 3","Type_4_share","Type 4","Type_5_share","Type 5")


final_table<-rbind(adl_tab,iadl_tab,ofl_tab,diag_tab,hosp_tab,beh_tab)
final_table_2 <- final_table %>%
  mutate(Type_1_share = round((`Type 1` / sum(`Type 1`))*100,0),
         Type_2_share = round((`Type 2` / sum(`Type 2`))*100,0),
         Type_3_share = round((`Type 3` / sum(`Type 3`))*100,0),
         Type_4_share = round((`Type 4` / sum(`Type 4`))*100,0),
         Type_5_share = round((`Type 5` / sum(`Type 5`))*100,0),
         All_sample_share = round((All_Sample / sum(All_Sample))*100,0))
final_table_2 <- final_table_2 %>%
  select("All_sample_share","All_Sample","Type_1_share","Type 1","Type_2_share","Type 2","Type_3_share","Type 3","Type_4_share","Type 4","Type_5_share","Type 5")

################################################################################
# Appendix G: Table 5
################################################################################
# Combine your data into the final table
Final_2 <- rbind(final_table_2, total_tab_2)
colnames(Final_2) <- gsub("_", " ", colnames(Final_2))

# Define the table as an xtable object
# Combine your data into the final table
Final_2 <- rbind(final_table_2, total_tab_2)
colnames(Final_2) <- gsub("_", " ", colnames(Final_2))

# Convert to xtable
Final_2 <- xtable(Final_2)
align(Final_2) <- c("l", "c", rep("c", ncol(Final_2) - 1))

# Create the two header rows
header_row1 <- paste(
  "& \\multicolumn{2}{c}{All Sample} & \\multicolumn{2}{c}{Type 1} & \\multicolumn{2}{c}{Type 2} &",
  "\\multicolumn{2}{c}{Type 3} & \\multicolumn{2}{c}{Type 4} & \\multicolumn{2}{c}{Type 5} \\\\"
)
header_row2 <- paste(
  "& Share & Level & Share & Level &",
  "Share & Level & Share & Level &",
  "Share & Level & Share & Level \\\\"
)

add_to_row <- list(
  pos = list(-1,-1, which(rownames(Final_2) == "Deficits at 52") - 1, nrow(Final_2)),  # Topline, above "Deficits at 52", and end
  command = c(
    "\\hline\n",  # Topline at the beginning
    paste(header_row1, header_row2, "\\hline\n", collapse = "\n"),  # Custom header with a line below
    "\\hline\n",  # Horizontal line above "Deficits at 52"
    "\\hline\n"   # Horizontal line at the end
  )
)

# Print the table to a LaTeX file
print(
  Final_2,
  type = "latex",
  file = "output/Part2_output/Part2_f_deficit_prevalence/tab15.tex",
  add.to.row = add_to_row,
  include.colnames = FALSE,  # Suppress default column names
  hline.after = NULL         # Avoid default hlines
)


#Clear enviroment
rm(list = setdiff(ls(), c(to_keep)))