geom_hline(yintercept = 0, color = "black", linetype = "solid", size = 1) +
theme_minimal() +
theme(
axis.text = element_text(size = 18),
title = element_blank(),
axis.title = element_text(size = 18),
panel.grid.major.x = element_line(color = "gray", linetype = "solid"),
panel.grid.minor.x = element_blank(),
legend.position = "none",  # Remove legend,
legend.direction = "horizontal",
legend.box = "horizontal",
legend.title = element_blank(),
legend.text = element_blank(),
legend.spacing.x = unit(0.5, "cm"),
legend.key.height = unit(2, "lines")  # Adjust the height of each legend row
) +
scale_color_manual(values = c("Model" = "black","Data"="black" ,
"Type 1" = "#1f78b4",
"Type 2" = "#ff7f00",
"Type 3" = "#e31a1c",
"Type 4" = "#33a02c",
"Type 5" = "#6a3d9a"))
name1<-"FractionAlive_by_type_mod_2"
ggsave(file = paste0("tgfiles/Implications/Simulations/", name1, ".png"), plot = plot_all,
width = 6, height = 6, units = "in", dpi = 300)
################################################################################################
################################################################################################
#########################              GOOD HEALTH              ################################
# 6) Good Health, All individuals, data + model 2 --> Fraction_Goodhealth_all_mod2
plot_all<-ggplot(data_to_plot, aes(x = c_age)) +
geom_line(aes(y = fraction_goodhealth2, color = "Data"), size = 2, linetype = "dashed") +
geom_text(data = subset(data_to_plot, fraction_goodhealth2 == min(fraction_goodhealth2)),
aes(label = "All", x = 70, y = fraction_goodhealth2),
vjust = +1.5, hjust = +1, size = 5, color = "black") +
#Model 1
geom_line(aes(y = fraction_goodhealth, color = "Model"), size = 1.25, linetype = "solid") +
geom_text(data = subset(data_to_plot, fraction_goodhealth == min(fraction_goodhealth)),
aes(label = "Model 2", x = 70, y = fraction_goodhealth),
vjust = -1.5, hjust = +1, size = 5, color = "#666666") +
labs(x = "Age", y = "Fraction in Good Health") +
geom_hline(yintercept = 0, color = "black", linetype = "solid", size = 1) +
theme_minimal() +
theme(
axis.text = element_text(size = 18),
title = element_blank(),
axis.title = element_text(size = 18),
panel.grid.major.x = element_line(color = "gray", linetype = "solid"),
panel.grid.minor.x = element_blank(),
legend.position = "none",
legend.direction = "horizontal",
legend.box = "horizontal",
legend.title = element_blank(),
legend.text = element_text(face = "italic", size = 18),
legend.spacing.x = unit(0.5, "cm"),
legend.key.height = unit(2, "lines")  # Adjust the height of each legend row
) +
scale_x_continuous(limits = c(52, 70))  +
scale_y_continuous(limits = c(0, 1))  +
scale_color_manual(values = c("Model" = "#666666","Data"="black" ,
"Type 1" = "#1f78b4",
"Type 2" = "#ff7f00",
"Type 3" = "#e31a1c",
"Type 4" = "#33a02c",
"Type 5" = "#6a3d9a"))
name1<-"Fraction_Goodhealth_all_mod2"
ggsave(file = paste0("tgfiles/Implications/Simulations/", name1, ".png"), plot = plot_all,
width = 6, height = 6, units = "in", dpi = 300)
# 3) Fraction in good health , by cluster, data and model 2 --> FractionAlive_by_type_mod2
plot_all <- ggplot(data_to_plot, aes(x = c_age)) +
#Type 1
geom_line(aes(y = fraction_goodhealth_type1_2, color = "Type 1"), size = 2, linetype = "dashed") +
geom_text(data = subset(data_to_plot, fraction_goodhealth_type1_2 == min(fraction_goodhealth_type1_2)),
aes(label = "Type 1", x = 70, y = fraction_goodhealth_type1_2),
vjust = -1.5, hjust = +1, size = 5, color = "#1f78b4") +
# Mod 1-> Type 1
geom_line(aes(y = fraction_goodhealth_type1, color = "Type 1"), size = 1.25, linetype = "solid") +
#Type 2
geom_line(aes(y = fraction_goodhealth_type2_2, color = "Type 2"), size = 2, linetype = "dashed") +
geom_text(data = subset(data_to_plot, fraction_goodhealth_type2_2 == min(fraction_goodhealth_type2_2)),
aes(label = "Type 2", x = 70, y = fraction_goodhealth_type2_2),
vjust = -1.5, hjust = +1, size = 5, color = "#ff7f00") +
# Mod 1-> Type 2
geom_line(aes(y = fraction_goodhealth_type2, color = "Type 2"), size = 1.25, linetype = "solid") +
#Type 3
geom_line(aes(y = fraction_goodhealth_type3_2, color = "Type 3"), size = 2, linetype = "dashed") +
geom_text(data = subset(data_to_plot, fraction_goodhealth_type3_2 == min(fraction_goodhealth_type3_2)),
aes(label = "Type 3", x = 70, y = fraction_goodhealth_type3_2),
vjust = -2.5, hjust = +1, size = 5, color = "#e31a1c") +
# Mod 1-> Type 3
geom_line(aes(y = fraction_goodhealth_type3, color = "Type 3"), size = 1.25, linetype = "solid") +
#Type 4
geom_line(aes(y = fraction_goodhealth_type4_2, color = "Type 4"), size = 2, linetype = "dashed") +
geom_text(data = subset(data_to_plot, fraction_goodhealth_type4_2 == min(fraction_goodhealth_type4_2)),
aes(label = "Type 4", x = 70, y = 0.4),
vjust = -2.5, hjust = +1, size = 5, color = "#33a02c") +
# Mod 1-> Type 4
geom_line(aes(y = fraction_goodhealth_type4, color = "Type 4"), size = 1.25, linetype = "solid") +
#Type 5
geom_line(aes(y = fraction_goodhealth_type5_2, color = "Type 5"), size = 2, linetype = "dashed") +
geom_text(data = subset(data_to_plot, fraction_goodhealth_type5_2 == min(fraction_goodhealth_type5_2)),
aes(label = "Type 5", x = 70, y = fraction_goodhealth_type5_2),
vjust = -2.5, hjust = +1, size = 5, color = "#6a3d9a") +
# Mod 1-> Type 5
geom_line(aes(y = fraction_goodhealth_type5, color = "Type 5"), size = 1.25, linetype = "solid") +
labs(x = "Age", y = "Fraction in Good Health") +
geom_hline(yintercept = 0, color = "black", linetype = "solid", size = 1) +
theme_minimal() +
theme(
axis.text = element_text(size = 18),
title = element_blank(),
axis.title = element_text(size = 18),
panel.grid.major.x = element_line(color = "gray", linetype = "solid"),
panel.grid.minor.x = element_blank(),
legend.position = "none",  # Remove legend,
legend.direction = "horizontal",
legend.box = "horizontal",
legend.title = element_blank(),
legend.text = element_blank(),
legend.spacing.x = unit(0.5, "cm"),
legend.key.height = unit(2, "lines")  # Adjust the height of each legend row
) +
scale_color_manual(values = c("Model" = "black","Data"="black" ,
"Type 1" = "#1f78b4",
"Type 2" = "#ff7f00",
"Type 3" = "#e31a1c",
"Type 4" = "#33a02c",
"Type 5" = "#6a3d9a"))
name1<-"FractionGoodhealth_by_type_mod2"
ggsave(file = paste0("tgfiles/Implications/Simulations/", name1, ".png"), plot = plot_all,
width = 6, height = 6, units = "in", dpi = 300)
################################################################################################
################################################################################################
#########################              BAD HEALTH              ################################
# 10) Bad Health, All individuals, data + model 2 --> Fraction_Badhealth_all_mod2
plot_all<-ggplot(data_to_plot, aes(x = c_age)) +
geom_line(aes(y = fraction_badhealth2, color = "Data"), size = 2, linetype = "dashed") +
geom_text(data = subset(data_to_plot, fraction_badhealth2 == min(fraction_badhealth2)),
aes(label = "All", x = 70, y = fraction_badhealth2),
vjust = +1.5, hjust = +1, size = 5, color = "black") +
#Model 1
geom_line(aes(y = fraction_badhealth, color = "Model"), size = 1.25, linetype = "solid") +
geom_text(data = subset(data_to_plot, fraction_badhealth == min(fraction_badhealth)),
aes(label = "Model 2", x = 70, y = fraction_badhealth),
vjust = -1.5, hjust = +1, size = 5, color = "#666666") +
labs(x = "Age", y = "Fraction in Bad Health") +
geom_hline(yintercept = 0, color = "black", linetype = "solid", size = 1) +
theme_minimal() +
theme(
axis.text = element_text(size = 18),
title = element_blank(),
axis.title = element_text(size = 18),
panel.grid.major.x = element_line(color = "gray", linetype = "solid"),
panel.grid.minor.x = element_blank(),
legend.position = "none",
legend.direction = "horizontal",
legend.box = "horizontal",
legend.title = element_blank(),
legend.text = element_text(face = "italic", size = 18),
legend.spacing.x = unit(0.5, "cm"),
legend.key.height = unit(2, "lines")  # Adjust the height of each legend row
) +
scale_x_continuous(limits = c(52, 70))  +
scale_y_continuous(limits = c(0, 1))  +
scale_color_manual(values = c("Model" = "#666666","Data"="black" ,
"Type 1" = "#1f78b4",
"Type 2" = "#ff7f00",
"Type 3" = "#e31a1c",
"Type 4" = "#33a02c",
"Type 5" = "#6a3d9a"))
name1<-"Fraction_Badhealth_all_mod2"
ggsave(file = paste0("tgfiles/Implications/Simulations/", name1, ".png"), plot = plot_all,
width = 6, height = 6, units = "in", dpi = 300)
# 3) Fraction in Bad health , by cluster, data and model 2 --> FractionBadhealth_by_type_mod2
plot_all <- ggplot(data_to_plot, aes(x = c_age)) +
#Type 1
geom_line(aes(y = fraction_badhealth_type1_2, color = "Type 1"), size = 2, linetype = "dashed") +
geom_text(data = subset(data_to_plot, fraction_badhealth_type1_2 == min(fraction_badhealth_type1_2)),
aes(label = "Type 1", x = 70, y = fraction_badhealth_type1_2),
vjust = -1.5, hjust = +1, size = 5, color = "#1f78b4") +
# Mod 1-> Type 1
geom_line(aes(y = fraction_badhealth_type1, color = "Type 1"), size = 1.25, linetype = "solid") +
#Type 2
geom_line(aes(y = fraction_badhealth_type2_2, color = "Type 2"), size = 2, linetype = "dashed") +
geom_text(data = subset(data_to_plot, fraction_badhealth_type2_2 == min(fraction_badhealth_type2_2)),
aes(label = "Type 2", x = 70, y = fraction_badhealth_type2_2),
vjust = -2.5, hjust = +1, size = 5, color = "#ff7f00") +
# Mod 1-> Type 2
geom_line(aes(y = fraction_badhealth_type2, color = "Type 2"), size = 1.25, linetype = "solid") +
#Type 3
geom_line(aes(y = fraction_badhealth_type3_2, color = "Type 3"), size = 2, linetype = "dashed") +
geom_text(data = subset(data_to_plot, fraction_badhealth_type3_2 == min(fraction_badhealth_type3_2)),
aes(label = "Type 3", x = 70, y = fraction_badhealth_type3_2),
vjust = -3, hjust = +1, size = 5, color = "#e31a1c") +
# Mod 1-> Type 3
geom_line(aes(y = fraction_badhealth_type3, color = "Type 3"), size = 1.25, linetype = "solid") +
#Type 4
geom_line(aes(y = fraction_badhealth_type4_2, color = "Type 4"), size = 2, linetype = "dashed") +
geom_text(data = subset(data_to_plot, fraction_badhealth_type4_2 == min(fraction_badhealth_type4_2)),
aes(label = "Type 4", x = 70, y = 0.4),
vjust = -2.5, hjust = +1, size = 5, color = "#33a02c") +
# Mod 1-> Type 4
geom_line(aes(y = fraction_badhealth_type4, color = "Type 4"), size = 1.25, linetype = "solid") +
#Type 5
geom_line(aes(y = fraction_badhealth_type5_2, color = "Type 5"), size = 2, linetype = "dashed") +
geom_text(data = subset(data_to_plot, fraction_badhealth_type5_2 == min(fraction_badhealth_type5_2)),
aes(label = "Type 5", x = 70, y = 0.9),
vjust = -2.5, hjust = +1, size = 5, color = "#6a3d9a") +
# Mod 1-> Type 5
geom_line(aes(y = fraction_badhealth_type5, color = "Type 5"), size = 1.25, linetype = "solid") +
labs(x = "Age", y = "Fraction in Bad Health") +
geom_hline(yintercept = 0, color = "black", linetype = "solid", size = 1) +
theme_minimal() +
theme(
axis.text = element_text(size = 18),
title = element_blank(),
axis.title = element_text(size = 18),
panel.grid.major.x = element_line(color = "gray", linetype = "solid"),
panel.grid.minor.x = element_blank(),
legend.position = "none",  # Remove legend,
legend.direction = "horizontal",
legend.box = "horizontal",
legend.title = element_blank(),
legend.text = element_blank(),
legend.spacing.x = unit(0.5, "cm"),
legend.key.height = unit(2, "lines")  # Adjust the height of each legend row
) +
scale_color_manual(values = c("Model" = "black","Data"="black" ,
"Type 1" = "#1f78b4",
"Type 2" = "#ff7f00",
"Type 3" = "#e31a1c",
"Type 4" = "#33a02c",
"Type 5" = "#6a3d9a"))
name1<-"FractionBadhealth_by_type_mod2"
ggsave(file = paste0("tgfiles/Implications/Simulations/", name1, ".png"), plot = plot_all,
width = 6, height = 6, units = "in", dpi = 300)
## This is the master code that produce all the result of the paper "Health Inequality and Health Types" by
## Margherita Borella, Francisco Bullano, Mariacristina De Nardi,Benjamin Krueger, and Elena Manresa.
# This version of the code: 2/12/2024
# The code is structure as follows:
# This main code will call sub routines that will produce specific results.
# The main code is structure in 4 parts.
# Part 1: Setting directories and installing all require R.Packages
# Part 2: Get Main results of the paper
#         2.1)
#         2.2)
#         2.3)
#         2.4)
#         2.5)
#         2.6)
#         2.7)
# Part 3: Health types and analysis by Gender, Sex and Ethicity
#         3.1)
#         3.2)
#         3.3)
# Part 4: Health types and implications for health dynamcis
#         4.1)
#         4.2)
#         4.3)
## Part 1: Setting directories and installing all require R.Packages
# Clear console
rm(list = ls())
cat("\014")  # ctrl+L
#Set directory
Personal_laptop<-1
if (Personal_laptop==1){
main_path <- "C:/Users/bulla/Dropbox/Borella_Genetics/HRS Data Work/Cluster_Health_types_codes"
data_path <-    paste0(main_path,"/dtafiles")
code_path <-    paste0(main_path,"/Rep_Files_codes")
code_path_p1 <- paste0(code_path,"/Part1")
code_path_p2 <- paste0(code_path,"/Part2")
code_path_p3 <- paste0(code_path,"/Part3")
code_path_p4 <- paste0(code_path,"/Part4")
}
if (Personal_laptop==0){
main_path <- "C:/Users/IRFAB01/Dropbox/Borella_Genetics_worksheet/Replication_Clustering_Health"
data_path <-    paste0(main_path,"/dtafiles")
code_path <-    paste0(main_path,"/Rep_Files_codes")
code_path_p1 <- paste0(code_path,"/Part1")
code_path_p2 <- paste0(code_path,"/Part2")
code_path_p3 <- paste0(code_path,"/Part3")
code_path_p4 <- paste0(code_path,"/Part4")
}
setwd(main_path)
## Part 1: Setting directories and installing all require R.Packages
# Install required libraries:
# If this is the first time runing the code, set intall_lib==1 and get all needed packages.
# If already installed, set intall_lib==0
intall_lib<-0
if (intall_lib==1) {
setwd(code_path_p1)
source("Install_Libraries.R")
setwd(main_path)
}
# Import relevant libraries.
import_lib<-1
if (import_lib==1) {
setwd(code_path_p1)
source("Import_Libraries.R")
setwd(main_path)
}
# Need to upload data
setwd(main_path)
### Make sure this libraries are Loaded!
##
library(tidyverse)
library(haven)
library(stats)
library(ggplot2)
##
#Store Enviroment
to_keep <- ls()
tracker_birth_year<- read_dta("dtafiles/trk2020tr_r.dta") %>% mutate(rahhidpn = as.numeric(paste0(HHID, PN))) %>%
select(HHID,PN,rahhidpn,BIRTHYR) %>% mutate(BIRTHYR=ifelse(BIRTHYR==0,NA,BIRTHYR))
df <- read_dta("dtafiles/CleanPanelBalancedv6.dta") %>%  mutate(shlt = ifelse(Dead==1, 6, shlt)) %>% arrange(rahhidpn,year)
df <- left_join(df,tracker_birth_year, by = "rahhidpn") %>% mutate(birthyear=BIRTHYR) %>%
mutate(agey_e = ifelse(!is.na(agey_e), agey_e, year-birthyear),
agey_e2= year-birthyear)
#Write it down
write_dta(df, path = paste0("dtafiles/CleanPanelBalancedv6_bis.dta"))
## Original Ben's Selection
### Here!!!
df_ben_O <- read_dta("dtafiles/CleanPanelBalancedv6.dta") %>%
mutate(shlt = ifelse(Dead==1, 6, shlt)) %>% group_by(rahhidpn) %>%
mutate(birthyear = max(year-agey_e, na.rm = T)) %>%
ungroup() %>% mutate(agey_e = ifelse(!is.na(agey_e), agey_e, year-birthyear))
## Ben's Selection
elem <- function(m){
fun <- function(x){
return(nth(x, n = m))
}
return(fun)
}
prepare <- function(dta, x){
dta <- dta %>% group_by(rahhidpn) %>%
filter(!(Dead==1 & row_number()==1), !is.na(frailty_bl)) %>% filter(n() >= 5) %>%
mutate(across(c("shlt", "frailty_bl"),
list(n1 = elem(1),
n2 = elem(2),
n3 = elem(3),
n4 = elem(4),
n5 = elem(5)),
.names = "{col}{fn}")) %>%
filter(row_number()==1) %>% ungroup()
return(dta)
}
df_52_ben_O <- prepare(filter(df_ben_O, between(agey_e, 52, 61)), 51) %>% select(rahhidpn,starts_with("frailty_bln"))
## Get the ID
sample_ben <- df_52_ben_O$rahhidpn %>% unique()
##
df_ben_vo <- subset(df_ben_O, rahhidpn %in% sample_ben) %>% filter(between(agey_e, 52, 61))
df_ben_vf <- subset(df, rahhidpn %in% sample_ben) %>% filter(between(agey_e, 52, 61))
df_52bis_vo <- df_ben_vo %>% filter(between(agey_e, 52, 61)) %>% arrange(rahhidpn,year) %>% group_by(rahhidpn) %>%
mutate(n_obs = row_number(), age_init=min(agey_e),max_nobs=max(n_obs),age_second=sort(agey_e)[2],
missing_frailty=sum(is.na(frailty_bl)),F4_Age=lead(agey_e,4),
F5_Age=lead(agey_e,5),
frailty=frailty_bl,
F_frailty = lead(frailty_bl,1),
F2_frailty= lead(frailty_bl,2),
F3_frailty= lead(frailty_bl,3),
F4_frailty= lead(frailty_bl,4),
F5_frailty= lead(frailty_bl,5),
five_row= ifelse(!is.na(frailty) & !is.na(F_frailty) & !is.na(F2_frailty) & !is.na(F3_frailty) & !is.na(F4_frailty),1,0),
last_row=ifelse(!is.na(F_frailty) & !is.na(F2_frailty) & !is.na(F3_frailty) & !is.na(F4_frailty) & !is.na(F5_frailty),1,0)) %>%
select(rahhidpn,year,birthyear,n_obs,age_init,max_nobs,age_second,,F4_Age,F5_Age,
missing_frailty,Dead,frailty,F_frailty,F2_frailty,F3_frailty,F4_frailty,F5_frailty,five_row,last_row) %>% filter(n_obs==1)
df_52bis_vf <- df_ben_vf %>% filter(between(agey_e, 52, 61)) %>% arrange(rahhidpn,year) %>% group_by(rahhidpn) %>%
mutate(n_obs = row_number(), age_init=min(agey_e),max_nobs=max(n_obs),age_second=sort(agey_e)[2],
missing_frailty=sum(is.na(frailty_bl)),F4_Age=lead(agey_e,4),
F5_Age=lead(agey_e,5),
frailty=frailty_bl,
F_frailty = lead(frailty_bl,1),
F2_frailty= lead(frailty_bl,2),
F3_frailty= lead(frailty_bl,3),
F4_frailty= lead(frailty_bl,4),
F5_frailty= lead(frailty_bl,5),
five_row= ifelse(!is.na(frailty) & !is.na(F_frailty) & !is.na(F2_frailty) & !is.na(F3_frailty) & !is.na(F4_frailty),1,0),
last_row=ifelse(!is.na(F_frailty) & !is.na(F2_frailty) & !is.na(F3_frailty) & !is.na(F4_frailty) & !is.na(F5_frailty),1,0)) %>%
select(rahhidpn,year,birthyear,n_obs,age_init,max_nobs,age_second,F4_Age,F5_Age,
missing_frailty,Dead,frailty,F_frailty,F2_frailty,F3_frailty,F4_frailty,F5_frailty,five_row,last_row) %>% filter(n_obs==1)
df_all <- left_join(df_52bis_vo,df_52bis_vf, by = "rahhidpn")
analisis_ben <- df_all %>% group_by(age_init.x,age_init.y,five_row.x,last_row.x,five_row.y,last_row.y,age_second.y) %>% summarize(total_id = n())
###
dff <- df %>%  filter(between(agey_e, 52, 61)) %>% arrange(rahhidpn,year) %>% group_by(rahhidpn) %>%
mutate(n_obs = row_number(), age_init=min(agey_e),max_nobs=max(n_obs),age_second=sort(agey_e)[2],
missing_frailty=sum(is.na(frailty_bl)),F4_Age=lead(agey_e,4),
F5_Age=lead(agey_e,5),
frailty=frailty_bl,
F_frailty = lead(frailty_bl,1),
F2_frailty= lead(frailty_bl,2),
F3_frailty= lead(frailty_bl,3),
F4_frailty= lead(frailty_bl,4),
F5_frailty= lead(frailty_bl,5),
five_row= ifelse(!is.na(frailty) & !is.na(F_frailty) & !is.na(F2_frailty) & !is.na(F3_frailty) & !is.na(F4_frailty),1,0),
last_row=ifelse(!is.na(F_frailty) & !is.na(F2_frailty) & !is.na(F3_frailty) & !is.na(F4_frailty) & !is.na(F5_frailty),1,0),
death_1 =ifelse(frailty==1,1,0),
death_2 =ifelse(F_frailty==1,1,0)) %>%
select(rahhidpn,year,birthyear,n_obs,age_init,max_nobs,age_second,F4_Age,F5_Age,
missing_frailty,Dead,frailty,F_frailty,F2_frailty,F3_frailty,F4_frailty,F5_frailty,five_row,last_row,death_1,death_2) %>% filter(n_obs==1)
### Generate conditions for inclusion ->
dff <- dff %>% mutate(crit_1= ifelse(age_init<54 & five_row ==1 & death_1==0,1,0),
crit_2= ifelse(age_second<54 & five_row==0 & last_row==1 & death_2==0,1,0)) %>% filter(crit_1==1 | crit_2==1 )
#### Create documentation for this sample
## Strict sample: Age 52-53
## Original Sample
df_strict<- dff %>% mutate(frailty=ifelse(crit_2==1, F_frailty,frailty),
F_frailty=ifelse(crit_2==1, F2_frailty,F_frailty),
F2_frailty=ifelse(crit_2==1, F3_frailty,F2_frailty),
F3_frailty=ifelse(crit_2==1, F4_frailty,F3_frailty),
F4_frailty=ifelse(crit_2==1, F5_frailty,F4_frailty),
Initial_Age=ifelse(crit_2==1, age_second,age_init),
Final_Age=ifelse(crit_2==1, F5_Age,F4_Age)) %>%
select(rahhidpn,frailty,F_frailty,F2_frailty,F3_frailty,F4_frailty,Initial_Age,Final_Age) %>% ungroup()
df_broad <- df_52bis_vo %>% mutate(frailty=ifelse(five_row==0 & last_row==1, F_frailty,frailty),
F_frailty=ifelse(five_row==0 & last_row==1, F2_frailty,F_frailty),
F2_frailty=ifelse(five_row==0 & last_row==1, F3_frailty,F2_frailty),
F3_frailty=ifelse(five_row==0 & last_row==1, F4_frailty,F3_frailty),
F4_frailty=ifelse(five_row==0 & last_row==1, F5_frailty,F4_frailty),
Initial_Age=ifelse(five_row==0 & last_row==1, age_second,age_init),
Final_Age=ifelse(five_row==0 & last_row==1, F5_Age,F4_Age)) %>%
mutate(mid_gap= ifelse(five_row==0 & last_row==0,1,0)) %>% filter(mid_gap==0) %>%
select(rahhidpn,frailty,F_frailty,F2_frailty,F3_frailty,F4_frailty,Initial_Age,Final_Age) %>% ungroup()
# Inner join the two datasets on rahhidpn
common_individuals <- inner_join(df_strict, df_broad, by = "rahhidpn")
## Run Clustering
## Missing: Run the clustering with df_strict and df_broad
cluster <- function(var, k, dta){
subdf <- dta %>% select("rahhidpn")
filtered <- dta %>% select(ends_with(var)) %>% as.matrix()
filtered <- filtered[, -1]  # Exclude the first column
cresult <- kmeans(filtered, k, iter.max = 50, nstart = 1000)
cent <- cresult$centers
order <- apply(cent, MARGIN = 1, mean)
order <- rank(order)
ids <- cresult$cluster
a<-order[ids]
subdf$temp <-a
name <- paste0("cluster_52_frailty_bl")
subdf<-subdf %>% rename_at("temp", ~name)
return(subdf)
}
# Keep the same sample as ben's (1) or New sample (0)
Original_selection<-1
if (Original_selection==0){
for (k in 2:10) {
var <- c('frailty')
dta <- df_strict
clustered <- cluster(var, k, dta)
clustered_out<-inner_join(dta, clustered, by = "rahhidpn") %>% select(rahhidpn,cluster_52_frailty_bl,Initial_Age,Final_Age)
dta_out <- inner_join(df, clustered_out, by = "rahhidpn")
write_dta(dta_out, path = paste0("dtafiles/P52_", k, "_Clusters.dta"))
}
}
if (Original_selection==1){
for (k in 2:10) {
var <- c('frailty')
dta <- df_broad
clustered <- cluster(var, k, dta)
clustered_out<-inner_join(dta, clustered, by = "rahhidpn") %>% select(rahhidpn,cluster_52_frailty_bl,Initial_Age,Final_Age)
dta_out <- inner_join(df, clustered, by = "rahhidpn")
write_dta(dta_out, path = paste0("dtafiles/P52_", k, "_Clusters_old.dta"))
}
}
#Clear enviroment
rm(list = setdiff(ls(), c(to_keep)))
setwd(main_path)
### Make sure this libraries are Loaded!
##
#import statements
library(tidyverse)
library(haven)
library(stats)
library(ggplot2)
library(xtable)
library(stargazer)
library(coefplot)
library(gplots)
library(reshape2)
library(feisr)
library(dplyr)
library(knitr)
library(kpodclustr)
library(Cairo)
library(zoo)
library(labelled)
library(plot.matrix)
library(pscl)
library(multiUS)
library(factoextra)
library(RColorBrewer)
library(nnet)
library(caret)
library(lmtest)
library(pscl)
library(DescTools)
##
# Select the ID that we are using in our sample
#To save processing time, upload data once
## Get the clustering panel
setwd(main_path)
data_final<-1
## 1 ) Age 52-53. 0 ) Ben's selection
setwd(code_path_p2)
source("Get_clustering_sample.R")
# This will run the selection and get data_clustering
setwd(main_path)
## Get Main Files
df <- read_dta("dtafiles/CleanPanelBalancedv6_bis.dta")
## Part4 a: Get data ready:
# Constructs all necessary variables
#
setwd(code_path_p4)
source('Part4_a_get_data_ready.R')
