setwd(main_path)


### Import relevant libraries
library(tidyverse)
library(haven)
library(stats)
library(gcookbook)
library(ggplot2)
library(plot.matrix)
library(imguR)
library(directlabels)
library(ggrepel)

#Store Environment
to_keep <- ls()
start_time <- Sys.time()

file_name<- c('dtafiles/P52_5_Clusters.dta')

df5 <- read_dta(file_name) %>% filter(!is.na(cluster_52_frailty_bl)) %>% filter(agey_e>=Initial_Age) %>% 
  arrange(rahhidpn,year) %>% group_by(rahhidpn) %>%
  mutate(posit=row_number())


# Convert the 'category' variable to a factor
df5$srhs <- as.factor(df5$shlt)
#Excellent Health
df5$srhs_1 <- as.numeric(ifelse(df5$srhs==1 & !is.na(df5$srhs) ,1,
                               ifelse(df5$srhs!=1 & !is.na(df5$srhs),0,NA)))
#Very Good Health
df5$srhs_2 <- as.numeric(ifelse(df5$srhs==2 & !is.na(df5$srhs) ,1,
                               ifelse(df5$srhs!=2 & !is.na(df5$srhs),0,NA)))
#Good Health

df5$srhs_3 <- as.numeric(ifelse(df5$srhs==3 & !is.na(df5$srhs) ,1,
                               ifelse(df5$srhs!=3 & !is.na(df5$srhs),0,NA)))
#Fair Health

df5$srhs_4 <- as.numeric(ifelse(df5$srhs==4 & !is.na(df5$srhs) ,1,
                               ifelse(df5$srhs!=4 & !is.na(df5$srhs),0,NA)))
#Poor Health
df5$srhs_5 <- as.numeric(ifelse(df5$srhs==5 & !is.na(df5$srhs) ,1,
                               ifelse(df5$srhs!=5 & !is.na(df5$srhs),0,NA)))
#Death
df5$srhs_6 <- as.numeric(ifelse(df5$srhs==6 & !is.na(df5$srhs) ,1,
                               ifelse(df5$srhs!=6 & !is.na(df5$srhs),0,NA)))

#df5$srhs_6 <- as.numeric(ifelse(df5$Dead==1,1,df5$srhs_6))

df5 <- df5 %>% mutate(good_health= ifelse(srhs_1==1 | srhs_2==1 | srhs_3==1,1,0))



###
# options_for_plots
opt_size_grid<-0.5
opt_size_vline<-1.5
opt_heigth<-5.5
opt_width<-6.5
opt_dpi<-300

####
n_bar<-0

df<-df5
## Mean Frailty by age
plot_dta <- df %>%
  group_by(cluster_52_frailty_bl, posit) %>%
  summarise(temp = mean(frailty_bl, na.rm = TRUE),
            temp1 = mean(hitot, na.rm = TRUE),
            temp2 = mean(hatotb, na.rm = TRUE),
            temp3 = median(hitot, na.rm = TRUE),
            temp4 = median(hatotb, na.rm = TRUE),
            Age = 52 + (posit - 1) * 2,
            cluster = as.factor(cluster_52_frailty_bl),
            n_obs = sum(!is.na(frailty_bl))) %>% unique() %>% mutate(temp=ifelse(n_obs<n_bar,NA,temp))

ggplot(plot_dta, aes(x = Age, y = temp, linetype = cluster)) + 
  ylab("Frailty") + ylim(0, 1) + xlim(52, 78) + 
  geom_line(size = 1) + 
  scale_x_continuous(breaks = unique(plot_dta$Age),
                     limits = c(52,74)) +
 # coord_cartesian(xlim = c(52, 74)) +  # Ensure no padding around x-axis
  scale_linetype_manual(values = c("1" = "dotted", "2" = "dotdash", "3" = "dashed", "4" = "longdash", "5" = "solid")) +
  labs(linetype = "Cluster") + 
  theme_bw() +
  theme(
    axis.text = element_text(size = 14),
    axis.title = element_text(size = 16),
    legend.position = "none",  # Remove the legend
    legend.text = element_text(size = 14),
    legend.title = element_text(size = 18),  # Increase the font size of the legend title
    panel.grid.major.x = element_line(color = "grey", size = opt_size_grid),  # Enable grid on x-axis
    panel.grid.minor.x = element_blank(),  # Disable minor grid lines on x-axis
    panel.grid.major.y = element_line(color = "grey", size = opt_size_grid),  # Enable grid on y-axis
    panel.grid.minor.y = element_blank()   # Disable minor grid lines on y-axis
    ) +
  geom_vline(xintercept = 60, linetype = "dashed", color = "red", size = opt_size_vline) +
  annotate("text", x = c(72, 72,72,54,57), y = c(0.15,0.37,0.65,0.85 ,0.75), 
           label = c("Type 1\n(57%)", "Type 2\n(27%)","Type 4\n(10%)","Type 5\n(3%)","Type 3\n(3%)"), 
           color = c("black", "black", "black", "black", "black"), 
           size = 5, fontface = "bold")

ggsave(
  filename = "output/Part2_output/Part2_b_graphs/fig2-1.pdf",
  width = opt_width,  # Adjust width as needed for your publication
  height = opt_heigth,   # Adjust height as needed for your publication
  dpi = opt_dpi,    # Optional: Specify DPI for rasterized layers, though unnecessary for PDF
  device = cairo_pdf  # Use Cairo for advanced PDF output
)

# Conditional EPS export
if (eps == 1) {
  ggsave(
    filename = "output/Part2_output/Part2_b_graphs/fig2-1.eps",
    width = opt_width,  # Adjust width as needed for your publication
    height = opt_heigth,   # Adjust height as needed for your publication
    dpi = opt_dpi,    # Optional: Specify DPI for rasterized layers, though unnecessary for PDF
    device = cairo_ps  # Use Cairo for advanced PDF output
  )
}

df<-df5
## Mean Frailty by age
plot_dta <- df %>% filter(frailty_bl<1) %>%
  group_by(cluster_52_frailty_bl, posit) %>%
  summarise(temp = mean(frailty_bl, na.rm = TRUE),
            Age = 52 + (posit - 1) * 2,
            cluster = as.factor(cluster_52_frailty_bl),
            n_obs = sum(!is.na(frailty_bl))) %>% unique() %>% mutate(temp=ifelse(n_obs<n_bar,NA,temp))

ggplot(plot_dta, aes(x = Age, y = temp, linetype = cluster)) + 
  ylab("Frailty") + ylim(0, 1) + xlim(52, 78) + 
  geom_line(size = 1) + 
  scale_x_continuous(breaks = unique(plot_dta$Age)) +
  scale_linetype_manual(values = c("1" = "dotted", "2" = "dotdash", "3" = "dashed", "4" = "longdash", "5" = "solid")) +
  labs(linetype = "Cluster") + 
  theme_bw() +
  theme(
    axis.text = element_text(size = 14),
    axis.title = element_text(size = 16),
    legend.position = "none",  # Remove the legend
    legend.text = element_text(size = 14),
    legend.title = element_text(size = 18),  # Increase the font size of the legend title
    panel.grid.major.x = element_line(color = "grey", size = opt_size_grid),  # Enable grid on x-axis
    panel.grid.minor.x = element_blank(),  # Disable minor grid lines on x-axis
    panel.grid.major.y = element_line(color = "grey", size = opt_size_grid),  # Enable grid on y-axis
    panel.grid.minor.y = element_blank()   # Disable minor grid lines on y-axis
  ) +
  geom_vline(xintercept = 60, linetype = "dashed", color = "red", size = opt_size_vline) +
  annotate("text", x = c(72, 72,72,54,58), y = c(0.04,0.39,0.58,0.70 ,0.60), 
           label = c("Type 1\n(57%)", "Type 2\n(27%)","Type 4\n(10%)","Type 5\n(3%)","Type 3\n(3%)"), 
           color = c("black", "black", "black", "black", "black"), 
           size = 5, fontface = "bold")

ggsave(
  filename = "output/Part2_output/Part2_b_graphs/fig2-2.pdf",
  width = opt_width,  # Adjust width as needed for your publication
  height = opt_heigth,   # Adjust height as needed for your publication
  dpi = opt_dpi,    # Optional: Specify DPI for rasterized layers, though unnecessary for PDF
  device = cairo_pdf  # Use Cairo for advanced PDF output
)
# Conditional EPS export
if (eps == 1) {
  ggsave(
    filename = "output/Part2_output/Part2_b_graphs/fig2-2.eps",
    width = opt_width,  # Adjust width as needed for your publication
    height = opt_heigth,   # Adjust height as needed for your publication
    dpi = opt_dpi,    # Optional: Specify DPI for rasterized layers, though unnecessary for PDF
    device = cairo_ps  # Use Cairo for advanced PDF output
  )
}

## Fraction Alive by type
df<-df5
## Mean Frailty by age
plot_dta <- df %>% 
  group_by(cluster_52_frailty_bl,posit) %>%
  summarise(total_n = n(),
            total_n_with_data=sum(!is.na(Dead)),
            total_n_with_srhs=sum(!is.na(srhs)),
            total_alive=sum(Dead==0,na.rm = TRUE),
            total_alive_with_srhs=sum(!is.na(srhs) & Dead==0),
            fraction_alive = total_alive/total_n_with_data,
            fraction_dead = 1-fraction_alive,
            fraction_goodhealth = sum(good_health == 1 , na.rm = TRUE)/total_alive_with_srhs,
            fraction_badhealth = 1-fraction_goodhealth,
            Age = 52 + (posit - 1) * 2,
            cluster = as.factor(cluster_52_frailty_bl)) %>% unique() %>% mutate(fraction_goodhealth=ifelse(total_alive_with_srhs<n_bar,NA,fraction_goodhealth),
                                                                                fraction_badhealth=ifelse(total_alive_with_srhs<n_bar,NA,fraction_badhealth))

ggplot(plot_dta, aes(x = Age, y = fraction_dead, linetype = cluster)) + 
  ylab("Fraction dead") + ylim(0, 1) + xlim(52, 78) + 
  geom_line(size = 1) + 
  scale_x_continuous(breaks = unique(plot_dta$Age)) +
  scale_linetype_manual(values = c("1" = "dotted", "2" = "dotdash", "3" = "dashed", "4" = "longdash", "5" = "solid")) +
  labs(linetype = "Cluster") + 
  theme_bw() +
  theme(
    axis.text = element_text(size = 14),
    axis.title = element_text(size = 16),
    legend.position = "none",  # Remove the legend
    legend.text = element_text(size = 14),
    legend.title = element_text(size = 18),  # Increase the font size of the legend title
    panel.grid.major.x = element_line(color = "grey", size = opt_size_grid),  # Enable grid on x-axis
    panel.grid.minor.x = element_blank(),  # Disable minor grid lines on x-axis
    panel.grid.major.y = element_line(color = "grey", size = opt_size_grid),  # Enable grid on y-axis
    panel.grid.minor.y = element_blank()   # Disable minor grid lines on y-axis
  ) +
  geom_vline(xintercept = 60, linetype = "dashed", color = "red", size = opt_size_vline) +
  annotate("text", x = c(73, 73,73,54,57), y = c(0.03,0.32,0.55,0.65,0.55), 
         label = c("Type 1\n(57%)", "Type 2\n(27%)","Type 4\n(10%)","Type 5\n(3%)","Type 3\n(3%)"), 
         color = c("black", "black", "black", "black", "black"), 
         size = 5, fontface = "bold")

ggsave(
  filename = "output/Part2_output/Part2_b_graphs/fig2-3.pdf",
  width = opt_width,  # Adjust width as needed for your publication
  height = opt_heigth,   # Adjust height as needed for your publication
  dpi = opt_dpi,    # Optional: Specify DPI for rasterized layers, though unnecessary for PDF
  device = cairo_pdf  # Use Cairo for advanced PDF output
)
# Conditional EPS export
if (eps == 1) {
  ggsave(
    filename = "output/Part2_output/Part2_b_graphs/fig2-3.eps",
    width = opt_width,  # Adjust width as needed for your publication
    height = opt_heigth,   # Adjust height as needed for your publication
    dpi = opt_dpi,    # Optional: Specify DPI for rasterized layers, though unnecessary for PDF
    device = cairo_ps  # Use Cairo for advanced PDF output
  )
}

################################################################################
## APPENDIX E: How do trajectories look with 4 and 6 clusters? 
################################################################################

################################################################################
### Figure 12. Health trajectories by cluster when K=4
################################################################################
file_name<- c('dtafiles/P52_4_Clusters.dta')


df5 <- read_dta(file_name) %>% filter(!is.na(cluster_52_frailty_bl)) %>% filter(agey_e>=Initial_Age) %>% 
  arrange(rahhidpn,year) %>% group_by(rahhidpn) %>%
  mutate(posit=row_number())


# Convert the 'category' variable to a factor
df5$srhs <- as.factor(df5$shlt)
#Excellent Health
df5$srhs_1 <- as.numeric(ifelse(df5$srhs==1 & !is.na(df5$srhs) ,1,
                                ifelse(df5$srhs!=1 & !is.na(df5$srhs),0,NA)))
#Very Good Health
df5$srhs_2 <- as.numeric(ifelse(df5$srhs==2 & !is.na(df5$srhs) ,1,
                                ifelse(df5$srhs!=2 & !is.na(df5$srhs),0,NA)))
#Good Health

df5$srhs_3 <- as.numeric(ifelse(df5$srhs==3 & !is.na(df5$srhs) ,1,
                                ifelse(df5$srhs!=3 & !is.na(df5$srhs),0,NA)))
#Fair Health

df5$srhs_4 <- as.numeric(ifelse(df5$srhs==4 & !is.na(df5$srhs) ,1,
                                ifelse(df5$srhs!=4 & !is.na(df5$srhs),0,NA)))
#Poor Health
df5$srhs_5 <- as.numeric(ifelse(df5$srhs==5 & !is.na(df5$srhs) ,1,
                                ifelse(df5$srhs!=5 & !is.na(df5$srhs),0,NA)))
#Death
df5$srhs_6 <- as.numeric(ifelse(df5$srhs==6 & !is.na(df5$srhs) ,1,
                                ifelse(df5$srhs!=6 & !is.na(df5$srhs),0,NA)))

#df5$srhs_6 <- as.numeric(ifelse(df5$Dead==1,1,df5$srhs_6))

df5 <- df5 %>% mutate(good_health= ifelse(srhs_1==1 | srhs_2==1 | srhs_3==1,1,0))

n_bar<-0

df<-df5
## Mean Frailty by age
plot_dta <- df %>%
  group_by(cluster_52_frailty_bl, posit) %>%
  summarise(temp = mean(frailty_bl, na.rm = TRUE),
            Age = 52 + (posit - 1) * 2,
            cluster = as.factor(cluster_52_frailty_bl),
            n_obs = sum(!is.na(frailty_bl))) %>% unique() %>% mutate(temp=ifelse(n_obs<n_bar,NA,temp))

ggplot(plot_dta, aes(x = Age, y = temp, linetype = cluster)) + 
  ylab("Frailty") + ylim(0, 1) + xlim(52, 78) + 
  geom_line(size = 1) + 
  scale_x_continuous(breaks = unique(plot_dta$Age)) +
  scale_linetype_manual(values = c("1" = "dotted", "2" = "dotdash", "3" = "dashed", "4" = "solid")) +
  labs(linetype = "Cluster") + 
  theme_bw() +
  theme(
    axis.text = element_text(size = 14),
    axis.title = element_text(size = 16),
    legend.position = "none",  # Remove the legend
    legend.text = element_text(size = 14),
    legend.title = element_text(size = 18),  # Increase the font size of the legend title
    panel.grid.major.x = element_line(color = "grey", size = opt_size_grid),  # Enable grid on x-axis
    panel.grid.minor.x = element_blank(),  # Disable minor grid lines on x-axis
    panel.grid.major.y = element_line(color = "grey", size = opt_size_grid),  # Enable grid on y-axis
    panel.grid.minor.y = element_blank()   # Disable minor grid lines on y-axis
  ) +
  geom_vline(xintercept = 60, linetype = "dashed", color = "red", size = opt_size_vline) +
  annotate("text", x = c(72, 72,72,54), y = c(0.15,0.37,0.65,0.85), 
           label = c("Type 1\n(58%)", "Type 2\n(27%)","Type 3\n(11%)","Type 4\n(4%)"), 
           color = c("black", "black", "black", "black"), 
           size = 5, fontface = "bold")

ggsave(
  filename = "output/Part2_output/Part2_b_graphs/fig12-1.pdf",
  width = opt_width,  # Adjust width as needed for your publication
  height = opt_heigth,   # Adjust height as needed for your publication
  dpi = opt_dpi,    # Optional: Specify DPI for rasterized layers, though unnecessary for PDF
  device = cairo_pdf  # Use Cairo for advanced PDF output
)

# Conditional EPS export
if (eps == 1) {
  ggsave(
    filename = "output/Part2_output/Part2_b_graphs/fig12-1.eps",
    width = opt_width,  # Adjust width as needed for your publication
    height = opt_heigth,   # Adjust height as needed for your publication
    dpi = opt_dpi,    # Optional: Specify DPI for rasterized layers, though unnecessary for PDF
    device = cairo_ps  # Use Cairo for advanced PDF output
  )
}

df<-df5
## Mean Frailty by age
plot_dta <- df %>% filter(frailty_bl<1) %>%
  group_by(cluster_52_frailty_bl, posit) %>%
  summarise(temp = mean(frailty_bl, na.rm = TRUE),
            Age = 52 + (posit - 1) * 2,
            cluster = as.factor(cluster_52_frailty_bl),
            n_obs = sum(!is.na(frailty_bl))) %>% unique() %>% mutate(temp=ifelse(n_obs<n_bar,NA,temp))

ggplot(plot_dta, aes(x = Age, y = temp, linetype = cluster)) + 
  ylab("Frailty") + ylim(0, 1) + xlim(52, 78) + 
  geom_line(size = 1) + 
  scale_x_continuous(breaks = unique(plot_dta$Age)) +
  scale_linetype_manual(values = c("1" = "dotted", "2" = "dotdash", "3" = "dashed", "4" = "solid")) +
  labs(linetype = "Cluster") + 
  theme_bw() +
  theme(
    axis.text = element_text(size = 14),
    axis.title = element_text(size = 16),
    legend.position = "none",  # Remove the legend
    legend.text = element_text(size = 14),
    legend.title = element_text(size = 18),  # Increase the font size of the legend title
    panel.grid.major.x = element_line(color = "grey", size = opt_size_grid),  # Enable grid on x-axis
    panel.grid.minor.x = element_blank(),  # Disable minor grid lines on x-axis
    panel.grid.major.y = element_line(color = "grey", size = opt_size_grid),  # Enable grid on y-axis
    panel.grid.minor.y = element_blank()   # Disable minor grid lines on y-axis
  ) +
  geom_vline(xintercept = 60, linetype = "dashed", color = "red", size = opt_size_vline) +
  annotate("text", x = c(72, 72,72,58), y = c(0.10,0.37,0.60,0.85), 
           label = c("Type 1\n(58%)", "Type 2\n(27%)","Type 3\n(11%)","Type 4\n(4%)"), 
           color = c("black", "black", "black", "black"), 
           size = 5, fontface = "bold")

ggsave(
  filename = "output/Part2_output/Part2_b_graphs/fig12-2.pdf",
  width = opt_width,  # Adjust width as needed for your publication
  height = opt_heigth,   # Adjust height as needed for your publication
  dpi = opt_dpi,    # Optional: Specify DPI for rasterized layers, though unnecessary for PDF
  device = cairo_pdf  # Use Cairo for advanced PDF output
)
# Conditional EPS export
if (eps == 1) {
  ggsave(
    filename = "output/Part2_output/Part2_b_graphs/fig12-2.eps",
    width = opt_width,  # Adjust width as needed for your publication
    height = opt_heigth,   # Adjust height as needed for your publication
    dpi = opt_dpi,    # Optional: Specify DPI for rasterized layers, though unnecessary for PDF
    device = cairo_ps  # Use Cairo for advanced PDF output
  )
}

## Fraction Alive by type

df<-df5
## Mean Frailty by age
plot_dta <- df %>% 
  group_by(cluster_52_frailty_bl,posit) %>%
  summarise(total_n = n(),
            total_n_with_data=sum(!is.na(Dead)),
            total_n_with_srhs=sum(!is.na(srhs)),
            total_alive=sum(Dead==0,na.rm = TRUE),
            total_alive_with_srhs=sum(!is.na(srhs) & Dead==0),
            fraction_alive = total_alive/total_n_with_data,
            fraction_dead = 1-fraction_alive,
            fraction_goodhealth = sum(good_health == 1 , na.rm = TRUE)/total_alive_with_srhs,
            fraction_badhealth = 1-fraction_goodhealth,
            Age = 52 + (posit - 1) * 2,
            cluster = as.factor(cluster_52_frailty_bl)) %>% unique() %>% mutate(fraction_goodhealth=ifelse(total_alive_with_srhs<n_bar,NA,fraction_goodhealth),
                                                                                fraction_badhealth=ifelse(total_alive_with_srhs<n_bar,NA,fraction_badhealth))

ggplot(plot_dta, aes(x = Age, y = fraction_dead, linetype = cluster)) + 
  ylab("Fraction dead") + ylim(0, 1) + xlim(52, 78) + 
  geom_line(size = 1) + 
  scale_x_continuous(breaks = unique(plot_dta$Age)) +
  scale_linetype_manual(values = c("1" = "dotted", "2" = "dotdash", "3" = "dashed", "4" = "solid")) +
  labs(linetype = "Cluster") + 
  theme_bw() +
  theme(
    axis.text = element_text(size = 14),
    axis.title = element_text(size = 16),
    legend.position = "none",  # Remove the legend
    legend.text = element_text(size = 14),
    legend.title = element_text(size = 18),  # Increase the font size of the legend title
    panel.grid.major.x = element_line(color = "grey", size = opt_size_grid),  # Enable grid on x-axis
    panel.grid.minor.x = element_blank(),  # Disable minor grid lines on x-axis
    panel.grid.major.y = element_line(color = "grey", size = opt_size_grid),  # Enable grid on y-axis
    panel.grid.minor.y = element_blank()   # Disable minor grid lines on y-axis
  ) +
  geom_vline(xintercept = 60, linetype = "dashed", color = "red", size = opt_size_vline) +
  annotate("text", x = c(72, 72,72,54), y = c(0.025,0.35,0.6,0.85), 
           label = c("Type 1\n(58%)", "Type 2\n(27%)","Type 3\n(11%)","Type 4\n(4%)"), 
           color = c("black", "black", "black", "black"), 
           size = 5, fontface = "bold")

ggsave(
  filename = "output/Part2_output/Part2_b_graphs/fig12-3.pdf",
  width = opt_width,  # Adjust width as needed for your publication
  height = opt_heigth,   # Adjust height as needed for your publication
  dpi = opt_dpi,    # Optional: Specify DPI for rasterized layers, though unnecessary for PDF
  device = cairo_pdf  # Use Cairo for advanced PDF output
)

# Conditional EPS export
if (eps == 1) {
  ggsave(
    filename = "output/Part2_output/Part2_b_graphs/fig12-3.eps",
    width = opt_width,  # Adjust width as needed for your publication
    height = opt_heigth,   # Adjust height as needed for your publication
    dpi = opt_dpi,    # Optional: Specify DPI for rasterized layers, though unnecessary for PDF
    device = cairo_ps  # Use Cairo for advanced PDF output
  )
}

################################################################################
### Figure 13. Health trajectories by cluster when K=6
################################################################################
file_name<- c('dtafiles/P52_6_Clusters.dta')


df5 <- read_dta(file_name) %>% filter(!is.na(cluster_52_frailty_bl)) %>% filter(agey_e>=Initial_Age) %>% 
  arrange(rahhidpn,year) %>% group_by(rahhidpn) %>%
  mutate(posit=row_number())


# Convert the 'category' variable to a factor
df5$srhs <- as.factor(df5$shlt)
#Excellent Health
df5$srhs_1 <- as.numeric(ifelse(df5$srhs==1 & !is.na(df5$srhs) ,1,
                                ifelse(df5$srhs!=1 & !is.na(df5$srhs),0,NA)))
#Very Good Health
df5$srhs_2 <- as.numeric(ifelse(df5$srhs==2 & !is.na(df5$srhs) ,1,
                                ifelse(df5$srhs!=2 & !is.na(df5$srhs),0,NA)))
#Good Health

df5$srhs_3 <- as.numeric(ifelse(df5$srhs==3 & !is.na(df5$srhs) ,1,
                                ifelse(df5$srhs!=3 & !is.na(df5$srhs),0,NA)))
#Fair Health

df5$srhs_4 <- as.numeric(ifelse(df5$srhs==4 & !is.na(df5$srhs) ,1,
                                ifelse(df5$srhs!=4 & !is.na(df5$srhs),0,NA)))
#Poor Health
df5$srhs_5 <- as.numeric(ifelse(df5$srhs==5 & !is.na(df5$srhs) ,1,
                                ifelse(df5$srhs!=5 & !is.na(df5$srhs),0,NA)))
#Death
df5$srhs_6 <- as.numeric(ifelse(df5$srhs==6 & !is.na(df5$srhs) ,1,
                                ifelse(df5$srhs!=6 & !is.na(df5$srhs),0,NA)))

#df5$srhs_6 <- as.numeric(ifelse(df5$Dead==1,1,df5$srhs_6))

df5 <- df5 %>% mutate(good_health= ifelse(srhs_1==1 | srhs_2==1 | srhs_3==1,1,0))

n_bar<-0

df<-df5
## Mean Frailty by age
plot_dta <- df %>%
  group_by(cluster_52_frailty_bl, posit) %>%
  summarise(temp = mean(frailty_bl, na.rm = TRUE),
            Age = 52 + (posit - 1) * 2,
            cluster = as.factor(cluster_52_frailty_bl),
            n_obs = sum(!is.na(frailty_bl))) %>% unique() %>% mutate(temp=ifelse(n_obs<n_bar,NA,temp))

ggplot(plot_dta, aes(x = Age, y = temp, linetype = cluster)) + 
  ylab("Frailty") + ylim(0, 1) + xlim(52, 78) + 
  geom_line(size = 1) + 
  scale_x_continuous(breaks = unique(plot_dta$Age)) +
  scale_linetype_manual(values = c("1" = "dotted", "2" = "dotdash", "3" = "dashed", "4" = "twodash", "5" = "longdash", "6"="solid")) +
  labs(linetype = "Cluster") + 
  theme_bw() +
  theme(
    axis.text = element_text(size = 14),
    axis.title = element_text(size = 16),
    legend.position = "none",  # Remove the legend
    legend.text = element_text(size = 14),
    legend.title = element_text(size = 18),  # Increase the font size of the legend title
    panel.grid.major.x = element_line(color = "grey", size = opt_size_grid),  # Enable grid on x-axis
    panel.grid.minor.x = element_blank(),  # Disable minor grid lines on x-axis
    panel.grid.major.y = element_line(color = "grey", size = opt_size_grid),  # Enable grid on y-axis
    panel.grid.minor.y = element_blank()   # Disable minor grid lines on y-axis
  ) +
  geom_vline(xintercept = 60, linetype = "dashed", color = "red", size = opt_size_vline) +
  annotate("text", x = c(72, 72, 72, 57, 72, 53), y = c(0.15, 0.37, 0.62, 0.75, 0.85, 0.85),
           label = c("Type 1\n(48%)", "Type 2\n(29%)","Type 3\n(13%)","Type 4\n(2%)","Type 5\n(5%)","Type 6\n(3%)"), 
           color = c("black", "black", "black", "black","black","black"), 
           size = 5, fontface = "bold")

ggsave(
  filename = "output/Part2_output/Part2_b_graphs/fig13-1.pdf",
  width = opt_width,  # Adjust width as needed for your publication
  height = opt_heigth,   # Adjust height as needed for your publication
  dpi = opt_dpi,    # Optional: Specify DPI for rasterized layers, though unnecessary for PDF
  device = cairo_pdf  # Use Cairo for advanced PDF output
)

# Conditional EPS export
if (eps == 1) {
  ggsave(
    filename = "output/Part2_output/Part2_b_graphs/fig13-1.eps",
    width = opt_width,  # Adjust width as needed for your publication
    height = opt_heigth,   # Adjust height as needed for your publication
    dpi = opt_dpi,    # Optional: Specify DPI for rasterized layers, though unnecessary for PDF
    device = cairo_ps  # Use Cairo for advanced PDF output
  )
}


df<-df5
## Mean Frailty by age
plot_dta <- df %>% filter(frailty_bl<1) %>%
  group_by(cluster_52_frailty_bl, posit) %>%
  summarise(temp = mean(frailty_bl, na.rm = TRUE),
            Age = 52 + (posit - 1) * 2,
            cluster = as.factor(cluster_52_frailty_bl),
            n_obs = sum(!is.na(frailty_bl))) %>% unique() %>% mutate(temp=ifelse(n_obs<n_bar,NA,temp))

ggplot(plot_dta, aes(x = Age, y = temp, linetype = cluster)) + 
  ylab("Frailty") + ylim(0, 1) + xlim(52, 78) + 
  geom_line(size = 1) + 
  scale_x_continuous(breaks = unique(plot_dta$Age)) +
  scale_linetype_manual(values = c("1" = "dotted", "2" = "dotdash", "3" = "dashed", "4" = "twodash", "5" = "longdash", "6" = "solid")) +
  labs(linetype = "Cluster") + 
  theme_bw() +
  theme(
    axis.text = element_text(size = 14),
    axis.title = element_text(size = 16),
    legend.position = "none",  # Remove the legend
    legend.text = element_text(size = 14),
    legend.title = element_text(size = 18),  # Increase the font size of the legend title
    panel.grid.major.x = element_line(color = "grey", size = opt_size_grid),  # Enable grid on x-axis
    panel.grid.minor.x = element_blank(),  # Disable minor grid lines on x-axis
    panel.grid.major.y = element_line(color = "grey", size = opt_size_grid),  # Enable grid on y-axis
    panel.grid.minor.y = element_blank()   # Disable minor grid lines on y-axis
  ) +
  geom_vline(xintercept = 60, linetype = "dashed", color = "red", size = opt_size_vline) +
  annotate("text", x = c(72, 68, 72, 58, 68, 55), y = c(0.1, 0.30, 0.45, 0.65, 0.6, 0.8),
           label = c("Type 1\n(48%)", "Type 2\n(29%)","Type 3\n(13%)","Type 4\n(2%)","Type 5\n(5%)","Type 6\n(3%)"), 
           color = c("black", "black", "black", "black","black","black"), 
           size = 5, fontface = "bold")

ggsave(
  filename = "output/Part2_output/Part2_b_graphs/fig13-2.pdf",
  width = opt_width,  # Adjust width as needed for your publication
  height = opt_heigth,   # Adjust height as needed for your publication
  dpi = opt_dpi,    # Optional: Specify DPI for rasterized layers, though unnecessary for PDF
  device = cairo_pdf  # Use Cairo for advanced PDF output
)
# Conditional EPS export
if (eps == 1) {
  ggsave(
    filename = "output/Part2_output/Part2_b_graphs/fig13-2.eps",
    width = opt_width,  # Adjust width as needed for your publication
    height = opt_heigth,   # Adjust height as needed for your publication
    dpi = opt_dpi,    # Optional: Specify DPI for rasterized layers, though unnecessary for PDF
    device = cairo_ps  # Use Cairo for advanced PDF output
  )
}

## Fraction Alive by type

df<-df5
## Mean Frailty by age
plot_dta <- df %>% 
  group_by(cluster_52_frailty_bl,posit) %>%
  summarise(total_n = n(),
            total_n_with_data=sum(!is.na(Dead)),
            total_n_with_srhs=sum(!is.na(srhs)),
            total_alive=sum(Dead==0,na.rm = TRUE),
            total_alive_with_srhs=sum(!is.na(srhs) & Dead==0),
            fraction_alive = total_alive/total_n_with_data,
            fraction_dead = 1-fraction_alive,
            fraction_goodhealth = sum(good_health == 1 , na.rm = TRUE)/total_alive_with_srhs,
            fraction_badhealth = 1-fraction_goodhealth,
            Age = 52 + (posit - 1) * 2,
            cluster = as.factor(cluster_52_frailty_bl)) %>% unique() %>% mutate(fraction_goodhealth=ifelse(total_alive_with_srhs<n_bar,NA,fraction_goodhealth),
                                                                                fraction_badhealth=ifelse(total_alive_with_srhs<n_bar,NA,fraction_badhealth))

ggplot(plot_dta, aes(x = Age, y = fraction_dead, linetype = cluster)) + 
  ylab("Fraction dead") + ylim(0, 1) + xlim(52, 78) + 
  geom_line(size = 1) + 
  scale_x_continuous(breaks = unique(plot_dta$Age)) +
  scale_linetype_manual(values = c("1" = "dotted", "2" = "dotdash", "3" = "dashed", "4" = "twodash", "5" = "longdash", "6" = "solid")) +
  labs(linetype = "Cluster") + 
  theme_bw() +
  theme(
    axis.text = element_text(size = 14),
    axis.title = element_text(size = 16),
    legend.position = "none",  # Remove the legend
    legend.text = element_text(size = 14),
    legend.title = element_text(size = 18),  # Increase the font size of the legend title
    panel.grid.major.x = element_line(color = "grey", size = opt_size_grid),  # Enable grid on x-axis
    panel.grid.minor.x = element_blank(),  # Disable minor grid lines on x-axis
    panel.grid.major.y = element_line(color = "grey", size = opt_size_grid),  # Enable grid on y-axis
    panel.grid.minor.y = element_blank()   # Disable minor grid lines on y-axis
  ) +
  geom_vline(xintercept = 60, linetype = "dashed", color = "red", size = opt_size_vline) +
  annotate("text", x = c(72, 68, 72, 57, 72, 53), y = c(0.02, 0.2, 0.37, 0.75, 0.6, 0.85),
           label = c("Type 1\n(48%)", "Type 2\n(29%)","Type 3\n(13%)","Type 4\n(2%)","Type 5\n(5%)","Type 6\n(3%)"), 
           color = c("black", "black", "black", "black","black","black"), 
           size = 5, fontface = "bold")

ggsave(
  filename = "output/Part2_output/Part2_b_graphs/fig13-3.pdf",
  width = opt_width,  # Adjust width as needed for your publication
  height = opt_heigth,   # Adjust height as needed for your publication
  dpi = opt_dpi,    # Optional: Specify DPI for rasterized layers, though unnecessary for PDF
  device = cairo_pdf  # Use Cairo for advanced PDF output
)

# Conditional EPS export
if (eps == 1) {
  ggsave(
    filename = "output/Part2_output/Part2_b_graphs/fig13-3.eps",
    width = opt_width,  # Adjust width as needed for your publication
    height = opt_heigth,   # Adjust height as needed for your publication
    dpi = opt_dpi,    # Optional: Specify DPI for rasterized layers, though unnecessary for PDF
    device = cairo_ps  # Use Cairo for advanced PDF output
  )
}

end_time <- Sys.time()
runtime <- end_time-start_time
print(runtime)

#Clear environment
rm(list = setdiff(ls(), c(to_keep)))

################################################################################
## APPENDIX F: Longer clustering period
################################################################################
setwd(code_path_p2)
source("Part2_b_clusteringlong_6.R")

setwd(code_path_p2)
source("Part2_b_clusteringlong_7.R")






