/***************************************************************************/
/* Title: cleanBreastfeeding.do                                            */
/* Purpose: To load and format the breastfeeding data collected by Dan     */
/*  Sacks and Betsey Stevenson.                                            */
/* Last Modified: August 2, 2010                                           */
/* Author: John and Dan Sacks.                                             */
/***************************************************************************/

/***************************************************************************/
/* Preliminaries                                                           */
/***************************************************************************/

clear all
#delimit ;
set mem 200m;
set matsize 800;
set more off;
global basedir "bulk/bpeaRR/breastfeedingSurvey";

capture log close;
log using $basedir/logs/assemble.log, replace

/***************************************************************************/
/* Load the data                                                           */
/***************************************************************************/;

/*************************************************************************/
/* Note that the freakonomics and facebook results are stored separately */
/*************************************************************************/;

insheet using "$basedir/Breastfeeding_Survey_NYT.csv", comma;
gen nytimes = 1; /* make an indicator for whether the data came from freakonomics */
drop in 1/2;
save $basedir/aFreakonomics, replace;
clear;

insheet using "$basedir/Breastfeeding_Survey_FB.csv", comma;
gen nytimes = 0;
drop in 1/2;

append using $basedir/aFreakonomics;
rm $basedir/aFreakonomics.dta;

/***************************************************************************/
/* Clean the data                                                          */
/***************************************************************************/;

/******************************************************/
/* Drop empty rows and confidential/useless variables */
/******************************************************/;

local varstobedropped "v1 v2 v3 v4 v5 v6 v7 v71";
foreach var of local varstobedropped {;
	drop `var';
};

/*****************************************************/
/* Rename relevant variables and code them as floats */
/*****************************************************/;

rename v8 date;
destring v9, force gen(finished);
destring v10, force gen(highestEduc);
destring v11, force gen(yearBorn);
destring v12, force gen(sex);
destring v13, force gen(maritalStatus);
destring v14, force gen(numChildren);
destring v15, force gen(yearLastChildBorn);
destring v16, force gen(numWeeksLeave);
destring v17, force gen(numHoursWorkAfterBirth);
destring v18, force gen(numWeeksLeaveSpouse);
destring v19, force gen(numHrsWorkSpouse);
destring v20, force gen(breastfed);
destring v21, force gen(reasonsNotnoBenefit);
destring v22, force gen(reasonsNottroubleLatching);
destring v23, force gen(reasonsNotpainful);
destring v24, force gen(reasonsNotnoTime);
destring v25, force gen(reasonsNottooDifficult);
destring v26, force gen(reasonsNotnotComfortable);
destring v27, force gen(reasonsNotother);
rename v28 text_reasonsNotother;
destring v29, force gen(stopAge);
destring v30, force gen(ageFedOther);
destring v31, force gen(portionBreastmilk);
destring v32, force gen(exclusive);
destring v33, force gen(formula);
destring v34, force gen(hoursBreastfeeding);
destring v35, force gen(reasonsWhyhealthBenefits);
destring v36, force gen(reasonsWhyintelligenceBenefits);
destring v37, force gen(reasonsWhybonding);
destring v38, force gen(reasonsWhyenjoyedTime);
destring v39, force gen(reasonsWhymothersHealthBenefits);
destring v40, force gen(reasonsWhyconvenience);
destring v41, force gen(reasonsWhyother);
rename v42 text_reasonsWhyother;
destring v43, force gen(reasonsStoppedstillBreastfeeding);
destring v44, force gen(reasonsStoppedeatingSolids);
destring v45, force gen(reasonsStoppednoFurtherBenefit);
destring v46, force gen(reasonsStoppedtroubleLatching);
destring v47, force gen(reasonsStoppedpainful);
destring v48, force gen(reasonsStoppednoTime);
destring v49, force gen(reasonsStoppednoPlaceToPumpAtWk);
destring v50, force gen(reasonsStoppeduncomftPumpingAtWk);
destring v51, force gen(reasonsStoppedtroublePumping);
destring v52, force gen(reasonsStoppedtooDifficult2);
destring v53, force gen(reasonsStoppednotComftWithIdea);
destring v54, force gen(reasonsStoppedother);
rename v55 text_reasonsStoppedother;
destring v56, force gen(income25kOrLess);
destring v57, force gen(income25to50k);
destring v58, force gen(income50to75k);
destring v59, force gen(income75to100k);
destring v60, force gen(income100to150k);
destring v61, force gen(income150to250k);
destring v62, force gen(income250to500k);
destring v63, force gen(income500kOrMore);
destring v64, force gen(incomeNotSaying);
destring v65, force gen(raceWhite);
destring v66, force gen(raceBlack);
destring v67, force gen(raceAmIndian);
destring v68, force gen(raceAsianPI);
destring v69, force gen(raceHispanic);
destring v70, force gen(raceOther);

/****************************/
/* Now drop these variables */
/****************************/;

foreach var of varlist v9 v10 v11 v12 v13 v14 v15 v16 v17 v18 v19 v20 v21 v22 v23 v24 v25 v26
	v27 v29 v30 v31 v32 v33 v34 v35 v36 v37 v38 v39 v40 v41 v43 v44 v45 v46 v47 v48 v49 v50
	v51 v52 v53 v54 v56 v57 v58 v59 v60 v61 v62 v63 v64 v65 v66 v67 v68 v69 v70 {;
    	drop `var';
};

/***************************/
/* Code up date and time   */
/***************************/

split date, parse(" ");
drop date;
gen date = date(date1, "YMD");
rename date2 time;
drop date1;


/*************************************************************************/
/* Deal with categorical variables                                       */
/*************************************************************************/

/********************************************************************/
/* The four problematic variables are race, income, reasonsNot      */
/* (breastfed),                                                     */
/* reasonsStopped (breastfeeding), and reasonsWhy (breastfeeding).  */
/* The trouble is that individuals could potentially choose         */
/* multiple answers, so for each possible answer, the raw data      */
/* contain a varible equal to one if you gave that answer,          */
/* and missing otherwise. I turn these data into a categorical      */
/* indicating each of the possible answers, except for people       */
/* who give multiple answers (which it indicates). The variable     */
/* is missing if and only if an individual didn't give any          */
/* answer; i.e. if all of the raw variables are missing.            */
/********************************************************************/;


/************************************************/
/* figure out when things are always missing,   */
/* fill in the 1/0 variables so that zero       */
/* indicates "not this category" and missing    */
/* indicates "no response given."               */
/************************************************/;

foreach q in race income reasonsStopped reasonsWhy{;
    gen missing_`q' = 1;
    foreach var of varlist `q'*{;
        replace missing_`q' = 0 if !missing(`var');
    };
    foreach var of varlist `q'*{;
        replace `var' = 0 if missing(`var') & !missing_`q';
    };
};

/*********************************/
/* get rid of missing indicators */
/*********************************/
drop missing*

/***********************************************/
/* Make a single categorical variable for race */
/***********************************************/;

gen race = 0; 
replace race = 1 if raceWhite == 1 & race == 0;

replace race = 9 if raceBlack == 1 & race != 0 ;
replace race = 2 if raceBlack == 1 & race == 0;

replace race = 9 if raceAmIndian == 1 & race != 0 ;
replace race = 3 if raceAmIndian == 1 & race == 0;

replace race = 9 if raceAsianPI == 1 & race != 0 ;
replace race = 4 if raceAsianPI == 1 & race == 0 ;

replace race = 9 if raceHispanic == 1 & race != 0 ;
replace race = 5 if raceHispanic == 1 & race==0;

replace race = 9 if raceOther == 1 & race != 0 ;
replace race = 6 if raceOther == 1 & race == 0 ;

replace race = . if race == 0;

foreach race in White Black AmIndian AsianPI Hispanic Other{;
        rename race`race' race_`race';
};
/*************************************************/
/* Make a single categorical variable for income */
/*************************************************/;

gen income = 0;
replace income = 1 if income25kOrLess == 1 & income == 0;

replace income = 99 if income25to50k == 1 & income != 0;
replace income = 2  if income25to50k == 1 & income == 0;

replace income = 99 if income50to75k == 1 & income != 0;
replace income = 3  if income50to75k == 1 & income == 0;

replace income = 99 if income75to100k == 1 & income != 0;
replace income = 4  if income75to100k == 1 & income == 0;

replace income = 99 if income100to150k == 1 & income != 0;
replace income = 5  if income100to150k == 1 & income == 0;

replace income = 99 if income150to250k == 1 & income != 0;
replace income = 6  if income150to250k == 1 & income == 0;

replace income = 99 if income250to500k == 1 & income != 0;
replace income = 7  if income250to500k == 1 & income == 0;

replace income = 99 if income500kOrMore == 1 & income != 0;
replace income = 8  if income500kOrMore == 1 & income == 0;

replace income = 99 if incomeNotSaying == 1 & income != 0;
replace income = 9  if incomeNotSaying == 1 & income == 0;

replace income = . if income == 0;

/************************************************************************/
/* Make a single categorical variable for reasons for not breastfeeding */
/************************************************************************/;

gen reasonNot = 0;
replace reasonNot = 1 if reasonsNotnoBenefit == 1;

replace reasonNot = 9 if reasonsNottroubleLatching == 1 & reasonNot != 0;
replace reasonNot = 2 if reasonsNottroubleLatching == 1 & reasonNot == 0;

replace reasonNot = 9 if reasonsNotpainful == 1 & reasonNot != 0;
replace reasonNot = 3 if reasonsNotpainful == 1 & reasonNot == 0;

replace reasonNot = 9 if reasonsNotnoTime == 1 & reasonNot != 0;
replace reasonNot = 4 if reasonsNotnoTime == 1 & reasonNot == 0;

replace reasonNot = 9 if reasonsNottooDifficult == 1 & reasonNot != 0;
replace reasonNot = 5 if reasonsNottooDifficult == 1 & reasonNot == 0;

replace reasonNot = 9 if reasonsNotnotComfortable == 1 & reasonNot != 0;
replace reasonNot = 6 if reasonsNotnotComfortable == 1 & reasonNot == 0;

replace reasonNot = 9 if reasonsNotother == 1 & reasonNot != 0;
replace reasonNot = 7 if reasonsNotother == 1 & reasonNot == 0;

replace reasonNot = . if reasonNot == 0;

foreach reason in noBenefit troubleLatching painful noTime
    tooDifficult notComfortable other{;
        rename reasonsNot`reason' not_`reason';
};

/************************************************************************/
/* Make a single categorical variable for reasons for stopping          */
/************************************************************************/;

gen reasonStopped = 0;
replace reasonStopped = 1 if reasonsStoppedstillBreastfeeding == 1;

replace reasonStopped = 99 if reasonsStoppedeatingSolids == 1 & reasonStopped != 0;
replace reasonStopped = 2  if reasonsStoppedeatingSolids == 1 & reasonStopped == 0;

replace reasonStopped = 99 if reasonsStoppednoFurtherBenefit == 1 & reasonStopped != 0;
replace reasonStopped = 3  if reasonsStoppednoFurtherBenefit == 1 & reasonStopped == 0;

replace reasonStopped = 99 if reasonsStoppedtroubleLatching == 1 & reasonStopped != 0;
replace reasonStopped = 4  if reasonsStoppedtroubleLatching == 1 & reasonStopped == 0;

replace reasonStopped = 99 if reasonsStoppedpainful == 1 & reasonStopped != 0;
replace reasonStopped = 5  if reasonsStoppedpainful == 1 & reasonStopped == 0;

replace reasonStopped = 99 if reasonsStoppednoTime == 1 & reasonStopped != 0;
replace reasonStopped = 6  if reasonsStoppednoTime == 1 & reasonStopped == 0;

replace reasonStopped = 99 if reasonsStoppednoPlaceToPumpAtWk == 1 & reasonStopped != 0;
replace reasonStopped = 7  if reasonsStoppednoPlaceToPumpAtWk == 1 & reasonStopped == 0;

replace reasonStopped = 99 if reasonsStoppeduncomftPumpingAtWk == 1 & reasonStopped != 0;
replace reasonStopped = 8  if reasonsStoppeduncomftPumpingAtWk == 1 & reasonStopped == 0;

replace reasonStopped = 99 if reasonsStoppedtroublePumping == 1 & reasonStopped != 0;
replace reasonStopped = 9  if reasonsStoppedtroublePumping == 1 & reasonStopped == 0;

replace reasonStopped = 99 if reasonsStoppedtooDifficult2 == 1 & reasonStopped != 0;
replace reasonStopped = 10 if reasonsStoppedtooDifficult2 == 1 & reasonStopped == 0;

replace reasonStopped = 99 if reasonsStoppednotComftWithIdea == 1 & reasonStopped != 0;
replace reasonStopped = 11 if reasonsStoppednotComftWithIdea == 1 & reasonStopped == 0;

replace reasonStopped = 99 if reasonsStoppedother == 1 & reasonStopped != 0;
replace reasonStopped = 12 if reasonsStoppedother == 1 & reasonStopped == 0;

replace reasonStopped = . if reasonStopped == 0;

foreach reason in stillBreastfeeding eatingSolids noFurtherBenefit troubleLatching
    painful noTime noPlaceToPumpAtWk uncomftPumpingAtWk troublePumping 
    tooDifficult2 notComftWithIdea other{;
        rename reasonsStopped`reason' stop_`reason';
};

/************************************************************************/
/* Make a single categorical variable for reasons for breastfeeding     */
/************************************************************************/;    

gen reasonWhy = 0;
replace reasonWhy = 1 if reasonsWhyhealthBenefits == 1;

replace reasonWhy = 9 if reasonsWhyintelligenceBenefits == 1 & reasonWhy != 0;
replace reasonWhy = 2 if reasonsWhyintelligenceBenefits == 1 & reasonWhy == 0;

replace reasonWhy = 9 if reasonsWhybonding == 1 & reasonWhy != 0;
replace reasonWhy = 3 if reasonsWhybonding == 1 & reasonWhy == 0;

replace reasonWhy = 9 if reasonsWhyenjoyedTime == 1 & reasonWhy != 0;
replace reasonWhy = 4 if reasonsWhyenjoyedTime == 1 & reasonWhy == 0;

replace reasonWhy = 9 if reasonsWhymothersHealthBenefits == 1 & reasonWhy != 0;
replace reasonWhy = 5 if reasonsWhymothersHealthBenefits == 1 & reasonWhy == 0;

replace reasonWhy = 9 if reasonsWhyconvenience == 1 & reasonWhy != 0;
replace reasonWhy = 6 if reasonsWhyconvenience == 1 & reasonWhy == 0;

replace reasonWhy = 9 if reasonsWhyother == 1 & reasonWhy != 0;
replace reasonWhy = 7 if reasonsWhyother == 1 & reasonWhy == 0;

replace reasonWhy = . if reasonWhy == 0;

foreach reason in healthBenefits intelligenceBenefits bonding enjoyedTime 
    mothersHealthBenefits convenience other{;
        rename reasonsWhy`reason' why_`reason';
};

/**********************************/
/* Label the usable variables     */
/**********************************/;

label define educ 
    1 "1. <HS" 
    2 "2. HS or equiv" 
    3 "3. Some college" 
    4 "4. AA" 
    5 "5.BA" 
    6 "6. MBA" 
    7 "7. MD" 
    8 "8. Other prof" 
    9 "9. Vet" 
    10 "10. PhD" 
    11 "11. Other master's"
    12 "12. JD";
label values highestEduc educ;
label var highestEduc "Highest education recieved";

label define maritalStatus2 
    1 "1. Never Married" 
    2 "2. Married" 
    3 "3. Partnered" 
    4 "4. Separated" 
    5 "5. Divorced" 
    6 "6. Widowed";
label values maritalStatus maritalStatus2;
label var maritalStatus "Current marital status";

label define numChildren2 
    1 "1. None" 
    2 "2. One" 
    3 "3. Two" 
    4 "4. More than two";
label values numChildren numChildren2;
label var numChildren "How many children do you have";

label define numWeeksLeave2 
    1 "1. No time off" 
    2 "2. <2 weeks" 
    3 "3. 2-4 weeks" 
    4 "4. 4-6 weeks" 
    5 "5. 6-8 weeks" 
    6 "6. 8-12 weeks"
    7 "7. 12-18 weeks" 
    8 "8. 18-26 weeks" 
    9 "9. 6 months or more";
label values numWeeksLeave numWeeksLeave2;
label var numWeeksLeave "Weeks off after birth of last child";

label define numHoursWorkAfterBirth2 
    1 "1. Have not returned to work since the birth of my last child"
	2 "2. <20 hours a week" 
    3 "3. 20-30 hours a week" 
    4 "4. 30-40 hours a week" 
    5 "5. 40 hours a week"
    6 "6. 41-50 hours a week" 
    7 "7. 50 or more hours a week";
label values numHoursWorkAfterBirth numHoursWorkAfterBirth2;
label var numHoursWorkAfterBirth "Hours worked/wk after birth of last child";

label define numWeeksLeaveSpouse2 
    1 "1. I didnt have a spouse or partner living with me at the time that my last child was born"
	2 "2. He/she didnt take any time off work" 
    3 "3. Less than 2 weeks" 
    4 "4. 2 to less than 4 weeks"
	5 "5. 4 to less than 6 weeks" 
    6 "6. 6 to less than 8 weeks" 
    7 "7. 8 to less than 12 weeks"
	8 "8. 12 to less than 18 weeks" 
    9 "9. 18 to less than 26 weeks " 
    10 "10. 6 months or more";
label values numWeeksLeaveSpouse numWeeksLeaveSpouse2;
label var numWeeksLeaveSpouse "Spouse's weeks off after birth of last child";

label define numHrsWorkSpouse2 
    1 "1. My spouse/partner hasn't returned to work since the birth of our last child"
	2 "2. Less than 20 hours per week" 3 "3. 20 to less than 30 hours per week" 
    4 "4. 30 to less than 40 hours per week"
	5 "5. 40 hours per week" 6 "6. 41 to 50 hours per week" 7 "7. 50 or more hours per week";
label values numHrsWorkSpouse numHrsWorkSpouse2;
label var numHrsWorkSpouse "Spouse's hours worked/wk after birth of last child";

label define portionBreastmilk2 
    1 "1. <0.25" 
    2 "2. 0.25-0.5" 
    3 "3. 0.5-0.75"
	4 "4. >=0.75";
label values portionBreastmilk portionBreastmilk2;
label var portionBreastmilk "% of food = breastmilk at 6 months";

label define hoursBreastfeeding2 
    1 "1. Less than 1 hour per day" 
    2 "2. 1 to less than 2 hours per day"
	3 "3. 2 to less than 3 hours per day" 
    4 "4. 3 to less than 4 hours per day" 
    5 "5. 4 to less than 5 hours per day"
	6 "6. 5 to less than 6 hours per day" 
    7 "7. More than 6 hours per day" 
    8 "8. Dont know";
label values hoursBreastfeeding hoursBreastfeeding2;
label var hoursBreastfeeding "Hrs/day breastfed/pumped, first 6 months";

label define sex2 
    1 "1. Male" 
    2 "2. Female";
label values sex sex2;
label var sex "Sex of respondent";

label define breastfed2 
    1 "1. Yes" 
    2 "2. No";
label values breastfed breastfed2;
label var breastfed "Last child ever breastfed";

label define stopAge2 
    1 "1. <= 1 month" 
    2 "2. 1-2 month" 
    3 "3. 3-4 month" 
    4 "4. 4-5 month" 
	5 "5. 5-6 month"
    6 "6. 6-7 month" 
    7 "7. 7-8 month" 
    8 "8. 8-9 months" 
    9 "9. 9-10 months" 
    10 "10. 11-12 months" 
	11 "11. 12-18 months" 
    12 "12. 18-24 months" 
    13 "13. >24 months";
label values stopAge stopAge2;
label var stopAge "Age stopped breastfeeding child";

label define ageFedOther2 
    1 "1. <1 month" 
    2 "2. 1 to 2 months" 
    3 "3. 2 to 3 months" 
    4 "4. 3 to 4 months"
	5 "5. 4 to 5 months" 
    6 "6. 5 to 6 months" 
    7 "7. 6 to 7 months" 
    8 "8. 7 to 8 months" 
    9 "9. 8 to 9 months"
	10 "10. >9 months";
label values ageFedOther ageFedOther2;
label var ageFedOther "Age first fed other food";

label define exclusive2 
    1 "1. Yes" 
    2 "2. No";
label values exclusive exclusive2;
label var exclusive "Ever exclusively breastfed";

label define formula2 
    1 "1. Yes" 
    2 "2. No";
label values formula formula2;
label var formula "Ever regularly supplemented w/formula";

label define nytimes2 
    0 "0. Facebook Survey" 
    1 "1. Freakonomics Survey";
label values nytimes nytimes2;
label var nytimes "Source = nytimes survey";

label define race2 
    1 "1. White" 
    2 "2. Black"
    3 "3. American Indian"
    4 "4. Asian/Pacific"
    5 "5. Hispanic"
    6 "6. Other"
    9 "9. Multiple races";
label values race race2;
label var race "Race/Ethnicity";

label define income2 
    1 "1. Family income < 25k last 12 months"
    2 "2. 25k - 50k"
    3 "3. 50k - 75k"
    4 "4. 75k - 100k"
    5 "5. 100k-150k"
    6 "6. 150k-250k"
    7 "7. 250k-500k"
    8 "8. >=500k"
    9 "9. Prefer not to say"
    99 "99. Multiple responses given";
label values income income2;
label var income "Total hh inc, last 12 months";

label define reasonNot2
    1 "1. No benefit"
    2 "2. Trouble latching" 
    3 "3. Painful"
    4 "4. No time"
    5 "5. Too difficult"
    6 "6. Not comfortable" 
    7 "7. Other"
    9 "9. Multiple reasons given";
label values reasonNot reasonNot2;
label var reasonNot "Reason for not breastfeeding";

label define reasonStopped2
    1  "1. Still breastfeeding"
    2  "2. Eating solids"
    3  "3. No further benefit"
    4  "4. Trouble latching"
    5  "5. Painful"
    6  "6. No time" 
    7  "7. No plc to pump at wrk"
    8  "8. Uncomft pump at wrk"
    9  "9. Trouble pumping"
    10 "10. Too difficult"
    11 "11. Not cmfrt w/idea"
    12 "12. Other"
    99 "99. Multiple reasons given";
label values reasonStopped reasonStopped2;
label var reasonStopped "Reason for stopping breastfeeding";

label define reasonWhy2 
    1 "1. Health"
    2 "2. Intelligence"
    3 "3. Bonding" 
    4 "4. Enjoyed time"
    5 "5. Health of mom"
    6 "6. Convenience"
    7 "7. Other"
    9 "9. Multiple reasons given";
label values reasonWhy reasonWhy2;
label var reasonWhy "Reason for breastfeeding";

label define finished2 
    0 "0. Did not finish"
    1 "1. Finished";
label values finished finished2; 
label var finished "Whether finished survey";

label var date "Date survey taken";
label var time "Time survey taken";

label var yearBorn "Year respondent born";

label var yearLastChildBorn "Year last child born";

/*************************/
/* Save the new data set */
/*************************/;

desc;
save "$basedir/breastfeedingSurvey", replace;
label drop _all;
outsheet using "$basedir/breastfeedingSurvey.csv", comma replace;
log close;

