setwd(main_path)

## Import relevant libraries
library(openxlsx)
library(nnet)  
library(kableExtra)

#Store Environment
to_keep <- ls()
start_time <- Sys.time()


df_imputed<- df_proc 

# STEP 1: Create the Initial Distribution at age 52-53
setwd(code_path_p3)
source("Part3_b_and_c_get_init_distribution.R")


# STEP 2: Create the complete health trajectories
# Get complete history of health : rahhidpn,age_1, shlt,'genderd', 'mstatusd', PI'
Data_History <- df_imputed %>% arrange(rahhidpn, agey_e) %>%
  group_by(rahhidpn) %>%
  mutate(n_obs = row_number(),
         out_init_dist=ifelse(is.na(PI) |is.na(mstatusd)  | is.na(genderd)| is.na(srhs) | is.na(cluster_52_frailty_bl) | is.na(educ_cat) ,1,0),
         age_1=50+2*n_obs,c_age= floor(age_1/2)*2,
         good_health= ifelse(srhs_1==1 | srhs_2==1 | srhs_3==1,1,0),
         bad_health = ifelse(srhs_4==1 | srhs_5==1,1,0)) %>% ungroup() %>% select(all_of(varlist_t_k),age_1,c_age,good_health,bad_health) 

Data_History <- subset(Data_History, rahhidpn %in% insample)

Data_Historys <- Data_History %>% group_by(c_age,cluster_52_frailty_bl) %>% summarise(total = n(), 
                                                                                      missing= sum(is.na(shlt)))


fill_mstatus <- function(Data_History){ 

Data_HistoryV2 <- Data_History %>% arrange(rahhidpn, agey_e) %>%
  group_by(rahhidpn) %>% mutate(L_mstatusd=lag(mstatusd,1))

Data_HistoryV2 <- Data_HistoryV2 %>% arrange(rahhidpn, agey_e) %>%
  group_by(rahhidpn) %>% mutate(mstatusd=ifelse(is.na(mstatusd),L_mstatusd,mstatusd))

return(Data_HistoryV2)                                
}                                                 

Data_Historyv2 <- fill_mstatus(Data_History)
for (i in 1:10){
Data_Historyv2 <- fill_mstatus(Data_Historyv2)
}

# STEP TREE: Define functions to perform simulation exercise

# Generate the following functions that are needed to run the simulation
# complete_data
# one_step_ahead
# gen_simul
complete_data<- function(new_data1,k){
  #Generete new variables that are functions of the variables in new_data1.
  new_data1 <-  new_data1 %>% mutate(mstatusd=get(paste("m_", k, sep="")),
                                     age_2=age_1*age_1,
                                     age_3=age_2*age_1,
                                     genderd_age_1=genderd*age_1,
                                     genderd_age_2=genderd*age_2,
                                     mstatusd_age_1=mstatusd*age_1,
                                     mstatusd_age_2=mstatusd*age_2,
                                     srhs_2_age_1= srhs_2*age_1,
                                     srhs_3_age_1= srhs_3*age_1,
                                     srhs_4_age_1= srhs_4*age_1,
                                     srhs_5_age_1= srhs_5*age_1,
                                     educ_cat_1_age_1=educ_cat_1*age_1,
                                     educ_cat_2_age_1=educ_cat_2*age_1,
                                     educ_cat_3_age_1=educ_cat_3*age_1)
  return(new_data1)
}


one_step_ahead <- function(data_in,invariant_states,model,k){
  #Get individuals who are already dead
  id_death<- data_in %>% filter(srhs_6==1)%>% select(id_sim)
  
  #Define sample that is dead: Health status do not change, but we update age
  data_in3 <- data_in %>% filter((id_sim %in% id_death$id_sim)) %>%
    select(id_sim,age_1,srhs_1,srhs_2,srhs_3,srhs_4,srhs_5,srhs_6) %>% mutate(age_1=age_1+2)                                                                                                                                        
  
  
  #Define sample that is alive
  data_in2 <- data_in %>% filter(!(id_sim %in% id_death$id_sim))
  
  # Create transitions probabilities
  e_p <- round(predict(model, newdata = data_in2, type = "probs"),10)
  e_n <- data_in2 %>% select(id_sim,age_1,d_srhs_1,d_srhs_2,d_srhs_3,d_srhs_4,d_srhs_5,d_srhs_6,
                             frailty_1,frailty_2,frailty_3,frailty_4,frailty_5,frailty_6) %>% mutate(age_1=age_1+2)
  e_1 <- cbind(e_n,e_p)
  names(e_1) <- c("id_sim","age_1","d_srhs_1","d_srhs_2","d_srhs_3","d_srhs_4","d_srhs_5","d_srhs_6",
                  "frailty_1","frailty_2","frailty_3","frailty_4","frailty_5","frailty_6","srhs_1","srhs_2","srhs_3","srhs_4","srhs_5","srhs_6")
  
  #Compute thresholds to simulate health transition
  thresholds<- e_1 %>% mutate(interval_1=srhs_1,
                              interval_2=interval_1 + srhs_2,
                              interval_3=interval_2 + srhs_3,
                              interval_4=interval_3 + srhs_4,
                              interval_5=interval_4 + srhs_5,
                              interval_6=interval_5 + srhs_6)
  #Generate random numbers
  thresholds$random_state<-runif(length(e_1$id_sim))
  
  # Based on thresholds and random numbers, assign health status
  thresholds <- thresholds %>% 
    mutate(srhs_1 = ifelse(random_state<=interval_1,1,0),
           srhs_2 = ifelse(random_state<=interval_2 & random_state>interval_1,1,0),
           srhs_3 = ifelse(random_state<=interval_3 & random_state>interval_2,1,0),
           srhs_4 = ifelse(random_state<=interval_4 & random_state>interval_3,1,0),
           srhs_5 = ifelse(random_state<=interval_5 & random_state>interval_4,1,0),
           srhs_6 = ifelse(random_state<=interval_6 & random_state>interval_5,1,0))
  
  #Option: Observed srhs during clustering period
  give_history<-0
  if (give_history==1){
    if (k <=5){
      thresholds<- thresholds %>% mutate(realized=get(paste("d_srhs_", k, sep=""))) %>%
        mutate(srhs_1=ifelse(!is.na(realized) & realized==1,1,
                             ifelse(!is.na(realized),0,NA)),
               srhs_2=ifelse(!is.na(realized) & realized==2,1,
                             ifelse(!is.na(realized),0,NA)),
               srhs_3=ifelse(!is.na(realized) & realized==3,1,
                             ifelse(!is.na(realized),0,NA)),
               srhs_4=ifelse(!is.na(realized) & realized==4,1,
                             ifelse(!is.na(realized),0,NA)),
               srhs_5=ifelse(!is.na(realized) & realized==5,1,
                             ifelse(!is.na(realized),0,NA)),
               srhs_6=ifelse(!is.na(realized) & realized==6,1,
                             ifelse(!is.na(realized),0,NA)))
    }
  }
  
  # Create final assignment for those alive
  e_1<-thresholds %>% select("id_sim","age_1","srhs_1","srhs_2","srhs_3","srhs_4","srhs_5","srhs_6")
  
  # Merge those who were dead and those who faced health transitions
  final_data<- rbind(e_1,data_in3)
  
  # Add invariant states for next period
  merged_data <- left_join(final_data, invariant_states, by = "id_sim")
  # Complete the data with covariates needed to calculate health transitions probabilities
  merged_data <- complete_data(merged_data,k) %>% arrange(id_sim)
  
  return(merged_data)
}


gen_simul <- function (e,model, invariant_states){
  # This function will compute the whole history of health transitions from age 52-53 until + 74-75 years
  trial<-one_step_ahead(e,invariant_states,model,2)
  trial2<-one_step_ahead(trial,invariant_states,model,3)
  trial3<-one_step_ahead(trial2,invariant_states,model,4)
  trial4<-one_step_ahead(trial3,invariant_states,model,5)
  trial5<-one_step_ahead(trial4,invariant_states,model,6)
  trial6<-one_step_ahead(trial5,invariant_states,model,7)
  trial7<-one_step_ahead(trial6,invariant_states,model,8)
  trial8<-one_step_ahead(trial7,invariant_states,model,9)
  trial9<-one_step_ahead(trial8,invariant_states,model,10)
  trial10<-one_step_ahead(trial9,invariant_states,model,11)
  trial11<-one_step_ahead(trial10,invariant_states,model,12)
  SIMU <- rbind(e,trial,trial2,trial3,trial4,trial5,trial6,trial7,trial8,trial9,trial10,trial11) %>% arrange(id_sim, age_1)
  return(SIMU)}



#To get the same Initial distribution
set.seed(123)

# Number of times to draw samples
num_draws <- 100000

# Draw samples with replacement
samples <- sample(1:nrow(init_distribution), size=num_draws, replace=T)
Joint_Init_dist<-init_distribution[samples,] %>% mutate(id_sim=row_number(),age_1=agey_e)

Joint_Init_distc<-complete_data(Joint_Init_dist,1)

#Step One: Get the distribution
e <- Joint_Init_dist %>%
  group_by(id_sim) %>%
  select(id_sim,age_1, cluster_52_frailty_bl, genderd,
         mstatusd, srhs_1, srhs_2, srhs_3, srhs_4, srhs_5, srhs_6, educ_cat_1, educ_cat_2,educ_cat_3,
         m_1,m_2,m_3,m_4,m_5,m_6,m_7,m_8,,m_9,m_10,m_11,m_12,
         d_srhs_1,d_srhs_2,d_srhs_3,d_srhs_4,d_srhs_5,d_srhs_6,
         frailty_1,frailty_2,frailty_3,frailty_4,frailty_5,frailty_6) %>%
  ungroup()

invariant_states<- e %>% select(id_sim,educ_cat_1, educ_cat_2,educ_cat_3,cluster_52_frailty_bl,genderd,mstatusd,
                                m_1,m_2,m_3,m_4,m_5,m_6,m_7,m_8,,m_9,m_10,m_11,m_12,
                                d_srhs_1,d_srhs_2,d_srhs_3,d_srhs_4,d_srhs_5,d_srhs_6,
                                frailty_1,frailty_2,frailty_3,frailty_4,frailty_5,frailty_6)

#Complete data: Initial Distribution
e<-complete_data(e,1)



# n_bar<- Avoid plotting fraction in good health if alive individuals are lower than n_bar
n_bar<-6

### STEP 4-a: Simulate health trajectories using State-of-art first order Markov: No health types  
#Define Model:
formula1 <- lead_srhs ~ (age_1 + age_2  + srhs_2 + srhs_3 + srhs_4 + srhs_5 
                         + educ_cat_2 + educ_cat_3 + mstatusd)*(factor(genderd))

## Estimate model's parameters
model_data <- subset(df_imputed, Death == 0 & rahhidpn %in% insample)
model1 <- multinom(formula1, data = model_data, maxit = 5000)

# Get simulation and create additional variables for analysis
SIMU_mod_1<- gen_simul(e,model1, invariant_states) %>% mutate(c_age= floor(age_1/2)*2,
                                   good_health= ifelse(srhs_1==1 | srhs_2==1 | srhs_3==1,1,0),
                                   bad_health= ifelse(srhs_4==1 | srhs_5==1,1,0))

# Get main statistics to plot
result_mod_1 <- SIMU_mod_1 %>% filter(c_age<=75) %>%
  group_by(c_age) %>%
  summarise(total_n = n(),
            total_n_with_data=sum(!is.na(srhs_6)),
            total_n_type1=sum(cluster_52_frailty_bl == 1 & !is.na(srhs_6) ,na.rm = TRUE),
            total_n_type2=sum(cluster_52_frailty_bl == 2 & !is.na(srhs_6) ,na.rm = TRUE),
            total_n_type3=sum(cluster_52_frailty_bl == 3 & !is.na(srhs_6) ,na.rm = TRUE),
            total_n_type4=sum(cluster_52_frailty_bl == 4 & !is.na(srhs_6) ,na.rm = TRUE),
            total_n_type5=sum(cluster_52_frailty_bl == 5 & !is.na(srhs_6) ,na.rm = TRUE),
            total_alive=sum(srhs_6==0,na.rm = TRUE),
            fraction_alive = total_alive/total_n_with_data,
            total_n_type1_alive=sum(cluster_52_frailty_bl == 1 & srhs_6 == 0,na.rm = TRUE),
            total_n_type2_alive=sum(cluster_52_frailty_bl == 2 & srhs_6 == 0,na.rm = TRUE),
            total_n_type3_alive=sum(cluster_52_frailty_bl == 3 & srhs_6 == 0,na.rm = TRUE),
            total_n_type4_alive=sum(cluster_52_frailty_bl == 4 & srhs_6 == 0,na.rm = TRUE),
            total_n_type5_alive=sum(cluster_52_frailty_bl == 5 & srhs_6 == 0,na.rm = TRUE),
            fraction_srhs_type1_alive = sum(cluster_52_frailty_bl == 1 & srhs_6 == 0 , na.rm = TRUE)/total_n_type1,
            fraction_srhs_type2_alive = sum(cluster_52_frailty_bl == 2 & srhs_6 == 0 , na.rm = TRUE)/total_n_type2,
            fraction_srhs_type3_alive = sum(cluster_52_frailty_bl == 3 & srhs_6 == 0 , na.rm = TRUE)/total_n_type3,
            fraction_srhs_type4_alive = sum(cluster_52_frailty_bl == 4 & srhs_6 == 0 , na.rm = TRUE)/total_n_type4,
            fraction_srhs_type5_alive = sum(cluster_52_frailty_bl == 5 & srhs_6 == 0 , na.rm = TRUE)/total_n_type5,
            fraction_goodhealth = sum(good_health == 1 , na.rm = TRUE)/total_alive,
            fraction_goodhealth_type1 = sum(cluster_52_frailty_bl == 1 & good_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 1 & srhs_6 == 0, na.rm = TRUE),
            fraction_goodhealth_type2 = sum(cluster_52_frailty_bl == 2 & good_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 2 & srhs_6 == 0, na.rm = TRUE),
            fraction_goodhealth_type3 = sum(cluster_52_frailty_bl == 3 & good_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 3 & srhs_6 == 0, na.rm = TRUE),
            fraction_goodhealth_type4 = sum(cluster_52_frailty_bl == 4 & good_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 4 & srhs_6 == 0, na.rm = TRUE),
            fraction_goodhealth_type5 = sum(cluster_52_frailty_bl == 5 & good_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 5 & srhs_6 == 0, na.rm = TRUE),
            fraction_badhealth = sum(bad_health == 1 , na.rm = TRUE)/total_alive,
            fraction_badhealth_type1 = sum(cluster_52_frailty_bl == 1 & bad_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 1 & srhs_6 == 0, na.rm = TRUE),
            fraction_badhealth_type2 = sum(cluster_52_frailty_bl == 2 & bad_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 2 & srhs_6 == 0, na.rm = TRUE),
            fraction_badhealth_type3 = sum(cluster_52_frailty_bl == 3 & bad_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 3 & srhs_6 == 0, na.rm = TRUE),
            fraction_badhealth_type4 = sum(cluster_52_frailty_bl == 4 & bad_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 4 & srhs_6 == 0, na.rm = TRUE),
            fraction_badhealth_type5 = sum(cluster_52_frailty_bl == 5 & bad_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 5 & srhs_6 == 0, na.rm = TRUE),
            counting_alive=sum(srhs_6 == 0, na.rm = TRUE),
            counting_alive_type1=sum(cluster_52_frailty_bl == 1 & srhs_6 == 0, na.rm = TRUE),
            counting_alive_type2=sum(cluster_52_frailty_bl == 2 & srhs_6 == 0, na.rm = TRUE),
            counting_alive_type3=sum(cluster_52_frailty_bl == 3 & srhs_6 == 0, na.rm = TRUE),
            counting_alive_type4=sum(cluster_52_frailty_bl == 4 & srhs_6 == 0, na.rm = TRUE),
            counting_alive_type5=sum(cluster_52_frailty_bl == 5 & srhs_6 == 0, na.rm = TRUE),
            cont_type1=counting_alive_type1/counting_alive,
            cont_type2=counting_alive_type2/counting_alive,
            cont_type3=counting_alive_type3/counting_alive,
            cont_type4=counting_alive_type4/counting_alive,
            cont_type5=counting_alive_type5/counting_alive,
            fraction_alive_educ_1=sum(srhs_6 == 0 & educ_cat_1==1, na.rm = TRUE)/sum(educ_cat_1==1, na.rm = TRUE),
            fraction_good_health_educ_1=sum(good_health == 1 & educ_cat_1==1, na.rm = TRUE)/sum(educ_cat_1==1 & srhs_6 == 0, na.rm = TRUE),
            fraction_alive_educ_2=sum(srhs_6 == 0 & educ_cat_2==1, na.rm = TRUE)/sum(educ_cat_2==1, na.rm = TRUE),
            fraction_good_health_educ_2=sum(good_health == 1 & educ_cat_2==1, na.rm = TRUE)/sum(educ_cat_2==1 & srhs_6 == 0, na.rm = TRUE),
            fraction_alive_educ_3=sum(srhs_6 == 0 & educ_cat_3==1, na.rm = TRUE)/sum(educ_cat_3==1, na.rm = TRUE),
            fraction_good_health_educ_3=sum(good_health == 1 & educ_cat_3==1, na.rm = TRUE)/sum(educ_cat_3==1 & srhs_6 == 0, na.rm = TRUE),
            fraction_alive_male=sum(srhs_6 == 0 & genderd==0, na.rm = TRUE)/sum(genderd==0, na.rm = TRUE),
            fraction_good_health_male=sum(good_health == 1 & genderd==0, na.rm = TRUE)/sum(genderd==0 & srhs_6 == 0, na.rm = TRUE),
            fraction_alive_women=sum(srhs_6 == 0 & genderd==1, na.rm = TRUE)/sum(genderd==1, na.rm = TRUE),
            fraction_good_health_women=sum(good_health == 1 & genderd==1, na.rm = TRUE)/sum(genderd==1 & srhs_6 == 0, na.rm = TRUE),
            fraction_alive_couple=sum(srhs_6 == 0 & mstatusd==1, na.rm = TRUE)/sum(mstatusd==1, na.rm = TRUE),
            fraction_good_health_couple=sum(good_health == 1 & mstatusd==1, na.rm = TRUE)/sum(mstatusd==1 & srhs_6 == 0, na.rm = TRUE),
            fraction_alive_single=sum(srhs_6 == 0 & mstatusd==0, na.rm = TRUE)/sum(mstatusd==0, na.rm = TRUE),
            fraction_good_health_single=sum(good_health == 1 & mstatusd==0, na.rm = TRUE)/sum(mstatusd==0 & srhs_6 == 0,na.rm = TRUE)) %>%
  mutate( fraction_goodhealth_type1 = ifelse(counting_alive_type1<n_bar,NA,fraction_goodhealth_type1),
          fraction_goodhealth_type2 = ifelse(counting_alive_type2<n_bar,NA,fraction_goodhealth_type2),
          fraction_goodhealth_type3 = ifelse(counting_alive_type3<n_bar,NA,fraction_goodhealth_type3),
          fraction_goodhealth_type4 = ifelse(counting_alive_type4<n_bar,NA,fraction_goodhealth_type4),
          fraction_goodhealth_type5 = ifelse(counting_alive_type5<n_bar,NA,fraction_goodhealth_type5),
          fraction_badhealth_type1 = ifelse(counting_alive_type1<n_bar,NA,fraction_badhealth_type1),
          fraction_badhealth_type2 = ifelse(counting_alive_type2<n_bar,NA,fraction_badhealth_type2),
          fraction_badhealth_type3 = ifelse(counting_alive_type3<n_bar,NA,fraction_badhealth_type3),
          fraction_badhealth_type4 = ifelse(counting_alive_type4<n_bar,NA,fraction_badhealth_type4),
          fraction_badhealth_type5 = ifelse(counting_alive_type5<n_bar,NA,fraction_badhealth_type5))

    
    
    
### STEP 4-b: Simulate health trajectories using State-of-art first order Markov: Including health types dummies  
#Define Model:
formula2 <- lead_srhs ~ (age_1 + age_2  + srhs_2 + srhs_3 + srhs_4 + srhs_5 
                         + educ_cat_2 + educ_cat_3 + mstatusd)*(factor(genderd))  + factor(cluster_52_frailty_bl)

## Estimate model's parameters
model_data <- subset(df_imputed, Death == 0 & rahhidpn %in% insample)
model2 <- multinom(formula2, data = model_data, maxit = 5000)

# Get simulation and create additional variables for analysis
SIMU_mod_2<-gen_simul(e,model2, invariant_states) %>% mutate(c_age= floor(age_1/2)*2,
                                                             good_health= ifelse(srhs_1==1 | srhs_2==1 | srhs_3==1,1,0),
                                                             bad_health= ifelse(srhs_4==1 | srhs_5==1,1,0))

result_mod_2 <- SIMU_mod_2 %>% filter(c_age<=75) %>%
  group_by(c_age) %>%
  summarise(total_n = n(),
            total_n_with_data=sum(!is.na(srhs_6)),
            total_n_type1=sum(cluster_52_frailty_bl == 1 & !is.na(srhs_6) ,na.rm = TRUE),
            total_n_type2=sum(cluster_52_frailty_bl == 2 & !is.na(srhs_6) ,na.rm = TRUE),
            total_n_type3=sum(cluster_52_frailty_bl == 3 & !is.na(srhs_6) ,na.rm = TRUE),
            total_n_type4=sum(cluster_52_frailty_bl == 4 & !is.na(srhs_6) ,na.rm = TRUE),
            total_n_type5=sum(cluster_52_frailty_bl == 5 & !is.na(srhs_6) ,na.rm = TRUE),
            total_alive=sum(srhs_6==0,na.rm = TRUE),
            fraction_alive = total_alive/total_n_with_data,
            total_n_type1_alive=sum(cluster_52_frailty_bl == 1 & srhs_6 == 0,na.rm = TRUE),
            total_n_type2_alive=sum(cluster_52_frailty_bl == 2 & srhs_6 == 0,na.rm = TRUE),
            total_n_type3_alive=sum(cluster_52_frailty_bl == 3 & srhs_6 == 0,na.rm = TRUE),
            total_n_type4_alive=sum(cluster_52_frailty_bl == 4 & srhs_6 == 0,na.rm = TRUE),
            total_n_type5_alive=sum(cluster_52_frailty_bl == 5 & srhs_6 == 0,na.rm = TRUE),
            fraction_srhs_type1_alive = sum(cluster_52_frailty_bl == 1 & srhs_6 == 0 , na.rm = TRUE)/total_n_type1,
            fraction_srhs_type2_alive = sum(cluster_52_frailty_bl == 2 & srhs_6 == 0 , na.rm = TRUE)/total_n_type2,
            fraction_srhs_type3_alive = sum(cluster_52_frailty_bl == 3 & srhs_6 == 0 , na.rm = TRUE)/total_n_type3,
            fraction_srhs_type4_alive = sum(cluster_52_frailty_bl == 4 & srhs_6 == 0 , na.rm = TRUE)/total_n_type4,
            fraction_srhs_type5_alive = sum(cluster_52_frailty_bl == 5 & srhs_6 == 0 , na.rm = TRUE)/total_n_type5,
            fraction_goodhealth = sum(good_health == 1 , na.rm = TRUE)/total_alive,
            fraction_goodhealth_type1 = sum(cluster_52_frailty_bl == 1 & good_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 1 & srhs_6 == 0, na.rm = TRUE),
            fraction_goodhealth_type2 = sum(cluster_52_frailty_bl == 2 & good_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 2 & srhs_6 == 0, na.rm = TRUE),
            fraction_goodhealth_type3 = sum(cluster_52_frailty_bl == 3 & good_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 3 & srhs_6 == 0, na.rm = TRUE),
            fraction_goodhealth_type4 = sum(cluster_52_frailty_bl == 4 & good_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 4 & srhs_6 == 0, na.rm = TRUE),
            fraction_goodhealth_type5 = sum(cluster_52_frailty_bl == 5 & good_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 5 & srhs_6 == 0, na.rm = TRUE),
            fraction_badhealth = sum(bad_health == 1 , na.rm = TRUE)/total_alive,
            fraction_badhealth_type1 = sum(cluster_52_frailty_bl == 1 & bad_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 1 & srhs_6 == 0, na.rm = TRUE),
            fraction_badhealth_type2 = sum(cluster_52_frailty_bl == 2 & bad_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 2 & srhs_6 == 0, na.rm = TRUE),
            fraction_badhealth_type3 = sum(cluster_52_frailty_bl == 3 & bad_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 3 & srhs_6 == 0, na.rm = TRUE),
            fraction_badhealth_type4 = sum(cluster_52_frailty_bl == 4 & bad_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 4 & srhs_6 == 0, na.rm = TRUE),
            fraction_badhealth_type5 = sum(cluster_52_frailty_bl == 5 & bad_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 5 & srhs_6 == 0, na.rm = TRUE),
            counting_alive=sum(srhs_6 == 0, na.rm = TRUE),
            counting_alive_type1=sum(cluster_52_frailty_bl == 1 & srhs_6 == 0, na.rm = TRUE),
            counting_alive_type2=sum(cluster_52_frailty_bl == 2 & srhs_6 == 0, na.rm = TRUE),
            counting_alive_type3=sum(cluster_52_frailty_bl == 3 & srhs_6 == 0, na.rm = TRUE),
            counting_alive_type4=sum(cluster_52_frailty_bl == 4 & srhs_6 == 0, na.rm = TRUE),
            counting_alive_type5=sum(cluster_52_frailty_bl == 5 & srhs_6 == 0, na.rm = TRUE),
            cont_type1=counting_alive_type1/counting_alive,
            cont_type2=counting_alive_type2/counting_alive,
            cont_type3=counting_alive_type3/counting_alive,
            cont_type4=counting_alive_type4/counting_alive,
            cont_type5=counting_alive_type5/counting_alive,
            fraction_alive_educ_1=sum(srhs_6 == 0 & educ_cat_1==1, na.rm = TRUE)/sum(educ_cat_1==1, na.rm = TRUE),
            fraction_good_health_educ_1=sum(good_health == 1 & educ_cat_1==1, na.rm = TRUE)/sum(educ_cat_1==1 & srhs_6 == 0, na.rm = TRUE),
            fraction_alive_educ_2=sum(srhs_6 == 0 & educ_cat_2==1, na.rm = TRUE)/sum(educ_cat_2==1, na.rm = TRUE),
            fraction_good_health_educ_2=sum(good_health == 1 & educ_cat_2==1, na.rm = TRUE)/sum(educ_cat_2==1 & srhs_6 == 0, na.rm = TRUE),
            fraction_alive_educ_3=sum(srhs_6 == 0 & educ_cat_3==1, na.rm = TRUE)/sum(educ_cat_3==1, na.rm = TRUE),
            fraction_good_health_educ_3=sum(good_health == 1 & educ_cat_3==1, na.rm = TRUE)/sum(educ_cat_3==1 & srhs_6 == 0, na.rm = TRUE),
            fraction_alive_male=sum(srhs_6 == 0 & genderd==0, na.rm = TRUE)/sum(genderd==0, na.rm = TRUE),
            fraction_good_health_male=sum(good_health == 1 & genderd==0, na.rm = TRUE)/sum(genderd==0 & srhs_6 == 0, na.rm = TRUE),
            fraction_alive_women=sum(srhs_6 == 0 & genderd==1, na.rm = TRUE)/sum(genderd==1, na.rm = TRUE),
            fraction_good_health_women=sum(good_health == 1 & genderd==1, na.rm = TRUE)/sum(genderd==1 & srhs_6 == 0, na.rm = TRUE),
            fraction_alive_couple=sum(srhs_6 == 0 & mstatusd==1, na.rm = TRUE)/sum(mstatusd==1, na.rm = TRUE),
            fraction_good_health_couple=sum(good_health == 1 & mstatusd==1, na.rm = TRUE)/sum(mstatusd==1 & srhs_6 == 0, na.rm = TRUE),
            fraction_alive_single=sum(srhs_6 == 0 & mstatusd==0, na.rm = TRUE)/sum(mstatusd==0, na.rm = TRUE),
            fraction_good_health_single=sum(good_health == 1 & mstatusd==0, na.rm = TRUE)/sum(mstatusd==0 & srhs_6 == 0,na.rm = TRUE)) %>%
  mutate( fraction_goodhealth_type1 = ifelse(counting_alive_type1<n_bar,NA,fraction_goodhealth_type1),
          fraction_goodhealth_type2 = ifelse(counting_alive_type2<n_bar,NA,fraction_goodhealth_type2),
          fraction_goodhealth_type3 = ifelse(counting_alive_type3<n_bar,NA,fraction_goodhealth_type3),
          fraction_goodhealth_type4 = ifelse(counting_alive_type4<n_bar,NA,fraction_goodhealth_type4),
          fraction_goodhealth_type5 = ifelse(counting_alive_type5<n_bar,NA,fraction_goodhealth_type5),
          fraction_badhealth_type1 = ifelse(counting_alive_type1<n_bar,NA,fraction_badhealth_type1),
          fraction_badhealth_type2 = ifelse(counting_alive_type2<n_bar,NA,fraction_badhealth_type2),
          fraction_badhealth_type3 = ifelse(counting_alive_type3<n_bar,NA,fraction_badhealth_type3),
          fraction_badhealth_type4 = ifelse(counting_alive_type4<n_bar,NA,fraction_badhealth_type4),
          fraction_badhealth_type5 = ifelse(counting_alive_type5<n_bar,NA,fraction_badhealth_type5))

###########################################################################################
#Get the actual data 
result_mod_3 <- Data_Historyv2 %>% filter(c_age<=75) %>%
  group_by(c_age) %>%
  summarise(total_n = n(),
            total_n_with_data=sum(!is.na(srhs_6)),
            total_n_type1=sum(cluster_52_frailty_bl == 1 & !is.na(srhs_6) ,na.rm = TRUE),
            total_n_type2=sum(cluster_52_frailty_bl == 2 & !is.na(srhs_6) ,na.rm = TRUE),
            total_n_type3=sum(cluster_52_frailty_bl == 3 & !is.na(srhs_6) ,na.rm = TRUE),
            total_n_type4=sum(cluster_52_frailty_bl == 4 & !is.na(srhs_6) ,na.rm = TRUE),
            total_n_type5=sum(cluster_52_frailty_bl == 5 & !is.na(srhs_6) ,na.rm = TRUE),
            total_alive=sum(srhs_6==0,na.rm = TRUE),
            fraction_alive = total_alive/total_n_with_data,
            total_n_type1_alive=sum(cluster_52_frailty_bl == 1 & srhs_6 == 0,na.rm = TRUE),
            total_n_type2_alive=sum(cluster_52_frailty_bl == 2 & srhs_6 == 0,na.rm = TRUE),
            total_n_type3_alive=sum(cluster_52_frailty_bl == 3 & srhs_6 == 0,na.rm = TRUE),
            total_n_type4_alive=sum(cluster_52_frailty_bl == 4 & srhs_6 == 0,na.rm = TRUE),
            total_n_type5_alive=sum(cluster_52_frailty_bl == 5 & srhs_6 == 0,na.rm = TRUE),
            fraction_srhs_type1_alive = sum(cluster_52_frailty_bl == 1 & srhs_6 == 0 , na.rm = TRUE)/total_n_type1,
            fraction_srhs_type2_alive = sum(cluster_52_frailty_bl == 2 & srhs_6 == 0 , na.rm = TRUE)/total_n_type2,
            fraction_srhs_type3_alive = sum(cluster_52_frailty_bl == 3 & srhs_6 == 0 , na.rm = TRUE)/total_n_type3,
            fraction_srhs_type4_alive = sum(cluster_52_frailty_bl == 4 & srhs_6 == 0 , na.rm = TRUE)/total_n_type4,
            fraction_srhs_type5_alive = sum(cluster_52_frailty_bl == 5 & srhs_6 == 0 , na.rm = TRUE)/total_n_type5,
            fraction_goodhealth = sum(good_health == 1 , na.rm = TRUE)/total_alive,
            fraction_goodhealth_type1 = sum(cluster_52_frailty_bl == 1 & good_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 1 & srhs_6 == 0, na.rm = TRUE),
            fraction_goodhealth_type2 = sum(cluster_52_frailty_bl == 2 & good_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 2 & srhs_6 == 0, na.rm = TRUE),
            fraction_goodhealth_type3 = sum(cluster_52_frailty_bl == 3 & good_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 3 & srhs_6 == 0, na.rm = TRUE),
            fraction_goodhealth_type4 = sum(cluster_52_frailty_bl == 4 & good_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 4 & srhs_6 == 0, na.rm = TRUE),
            fraction_goodhealth_type5 = sum(cluster_52_frailty_bl == 5 & good_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 5 & srhs_6 == 0, na.rm = TRUE),
            fraction_badhealth = sum(bad_health == 1 , na.rm = TRUE)/total_alive,
            fraction_badhealth_type1 = sum(cluster_52_frailty_bl == 1 & bad_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 1 & srhs_6 == 0, na.rm = TRUE),
            fraction_badhealth_type2 = sum(cluster_52_frailty_bl == 2 & bad_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 2 & srhs_6 == 0, na.rm = TRUE),
            fraction_badhealth_type3 = sum(cluster_52_frailty_bl == 3 & bad_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 3 & srhs_6 == 0, na.rm = TRUE),
            fraction_badhealth_type4 = sum(cluster_52_frailty_bl == 4 & bad_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 4 & srhs_6 == 0, na.rm = TRUE),
            fraction_badhealth_type5 = sum(cluster_52_frailty_bl == 5 & bad_health == 1 , na.rm = TRUE)/sum(cluster_52_frailty_bl == 5 & srhs_6 == 0, na.rm = TRUE),
            counting_alive=sum(srhs_6 == 0, na.rm = TRUE),
            counting_alive_type1=sum(cluster_52_frailty_bl == 1 & srhs_6 == 0, na.rm = TRUE),
            counting_alive_type2=sum(cluster_52_frailty_bl == 2 & srhs_6 == 0, na.rm = TRUE),
            counting_alive_type3=sum(cluster_52_frailty_bl == 3 & srhs_6 == 0, na.rm = TRUE),
            counting_alive_type4=sum(cluster_52_frailty_bl == 4 & srhs_6 == 0, na.rm = TRUE),
            counting_alive_type5=sum(cluster_52_frailty_bl == 5 & srhs_6 == 0, na.rm = TRUE),
            cont_type1=counting_alive_type1/counting_alive,
            cont_type2=counting_alive_type2/counting_alive,
            cont_type3=counting_alive_type3/counting_alive,
            cont_type4=counting_alive_type4/counting_alive,
            cont_type5=counting_alive_type5/counting_alive,
            fraction_alive_educ_1=sum(srhs_6 == 0 & educ_cat_1==1, na.rm = TRUE)/sum(educ_cat_1==1, na.rm = TRUE),
            fraction_good_health_educ_1=sum(good_health == 1 & educ_cat_1==1, na.rm = TRUE)/sum(educ_cat_1==1 & srhs_6 == 0, na.rm = TRUE),
            fraction_alive_educ_2=sum(srhs_6 == 0 & educ_cat_2==1, na.rm = TRUE)/sum(educ_cat_2==1, na.rm = TRUE),
            fraction_good_health_educ_2=sum(good_health == 1 & educ_cat_2==1, na.rm = TRUE)/sum(educ_cat_2==1 & srhs_6 == 0, na.rm = TRUE),
            fraction_alive_educ_3=sum(srhs_6 == 0 & educ_cat_3==1, na.rm = TRUE)/sum(educ_cat_3==1, na.rm = TRUE),
            fraction_good_health_educ_3=sum(good_health == 1 & educ_cat_3==1, na.rm = TRUE)/sum(educ_cat_3==1 & srhs_6 == 0, na.rm = TRUE),
            fraction_alive_male=sum(srhs_6 == 0 & genderd==0, na.rm = TRUE)/sum(genderd==0, na.rm = TRUE),
            fraction_good_health_male=sum(good_health == 1 & genderd==0, na.rm = TRUE)/sum(genderd==0 & srhs_6 == 0, na.rm = TRUE),
            fraction_alive_women=sum(srhs_6 == 0 & genderd==1, na.rm = TRUE)/sum(genderd==1, na.rm = TRUE),
            fraction_good_health_women=sum(good_health == 1 & genderd==1, na.rm = TRUE)/sum(genderd==1 & srhs_6 == 0, na.rm = TRUE),
            fraction_alive_couple=sum(srhs_6 == 0 & mstatusd==1, na.rm = TRUE)/sum(mstatusd==1, na.rm = TRUE),
            fraction_good_health_couple=sum(good_health == 1 & mstatusd==1, na.rm = TRUE)/sum(mstatusd==1 & srhs_6 == 0, na.rm = TRUE),
            fraction_alive_single=sum(srhs_6 == 0 & mstatusd==0, na.rm = TRUE)/sum(mstatusd==0, na.rm = TRUE),
            fraction_good_health_single=sum(good_health == 1 & mstatusd==0, na.rm = TRUE)/sum(mstatusd==0 & srhs_6 == 0,na.rm = TRUE)) %>%
  mutate( fraction_goodhealth_type1 = ifelse(counting_alive_type1<n_bar,NA,fraction_goodhealth_type1),
          fraction_goodhealth_type2 = ifelse(counting_alive_type2<n_bar,NA,fraction_goodhealth_type2),
          fraction_goodhealth_type3 = ifelse(counting_alive_type3<n_bar,NA,fraction_goodhealth_type3),
          fraction_goodhealth_type4 = ifelse(counting_alive_type4<n_bar,NA,fraction_goodhealth_type4),
          fraction_goodhealth_type5 = ifelse(counting_alive_type5<n_bar,NA,fraction_goodhealth_type5),
          fraction_badhealth_type1 = ifelse(counting_alive_type1<n_bar,NA,fraction_badhealth_type1),
          fraction_badhealth_type2 = ifelse(counting_alive_type2<n_bar,NA,fraction_badhealth_type2),
          fraction_badhealth_type3 = ifelse(counting_alive_type3<n_bar,NA,fraction_badhealth_type3),
          fraction_badhealth_type4 = ifelse(counting_alive_type4<n_bar,NA,fraction_badhealth_type4),
          fraction_badhealth_type5 = ifelse(counting_alive_type5<n_bar,NA,fraction_badhealth_type5))
          

# We plot model simulations as we plot the data:
# Identify NA locations in result_mod3
na_mask <- is.na(result_mod_3)

# Apply the NA mask to simulations
result_mod_1[na_mask] <- NA
result_mod_2[na_mask] <- NA



# Export simulations and data to excel for analysis: Here you will find the analysis in section 5
# Create a new Excel workbook
wb <- createWorkbook()
# Add each data frame as a separate sheet
addWorksheet(wb, "Data")
writeData(wb, "Data", result_mod_3)

addWorksheet(wb, "Model_no_healthtype")
writeData(wb, "Model_no_healthtype", result_mod_1)

addWorksheet(wb, "Model_with_healthtype")
writeData(wb, "Model_with_healthtype", result_mod_2)

# Save the workbook to an Excel file
saveWorkbook(wb, file = "output/Part3_output/Part3_c_simulations/simul_results.xlsx", overwrite = TRUE)

# Compare result_mod_3$fraction_goodhealth_type1 and 
# result_mod_1$fraction_goodhealth_type1 at age 52 and 70 to get the number in 
# the text for type 1. Similarly, for type 2, use result_mod_3$fraction_goodhealth_type2 
# and result_mod_1$fraction_goodhealth_type2

# Also export individual sheet as .csv

# Define base file path
base_path <- "output/Part3_output/Part3_c_simulations/"

# Save each data frame as a CSV file
write.csv(result_mod_3, file = paste0(base_path, "simul_results_Data.csv"), row.names = FALSE)
write.csv(result_mod_1, file = paste0(base_path, "simul_results_Model_no_healthtype.csv"), row.names = FALSE)
write.csv(result_mod_2, file = paste0(base_path, "simul_results_Model_with_healthtype.csv"), row.names = FALSE)

############################################################################################################################
############################################################################################################################
############################################################################################################################
## Plot simulations results
runcode<-1
if (runcode==1){
  setwd(code_path_p3)
  # This codes plots
  # a) Fraction Alive, b) Fraction in good health and c)Fraction in bad health
  # For simulations using Model 1 and 2 and the corresponding data analog
  source("Part3_c_plots_simul.R")
}


end_time <- Sys.time()
runtime <- end_time-start_time
print(runtime)

############################################################

#Clear enviroment
rm(list = setdiff(ls(), c(to_keep)))
