data = read.csv("C:/Users/Bamb0o0o0/Desktop/dataset.csv",header = TRUE) # import the data as Data Frame
# dummies
data$logearnings = log(data$earnings)
data$occ1 = ifelse(data$occ == 1,1,0)
data$occ2 = ifelse(data$occ == 2,1,0)
data$occ3 = ifelse(data$occ == 3,1,0)
data$occ4 = ifelse(data$occ == 4,1,0)
data$northeast = ifelse(data$region == 1,1,0)
data$south = ifelse(data$region == 2,1,0)
data$midwest = ifelse(data$region == 3,1,0)
data$west = ifelse(data$region == 4,1,0)
data$central = ifelse(data$region == 5,1,0)
data$exp = data$age - data$edu - 6 # 6 the age people gets in to primary school
data$exp_2 = (data$exp)**2
data$exp_3 = (data$exp)**3
data$edu_ability = data$edu*data$ability
Data=subset(data,select=-c(earnings,occ,region)) # create a new Data Frame with out column earnings, occ, region
female_earnings = subset(Data,Data$female == 1)
male_earnings = subset(Data,Data$female == 0)
############################################################################################
################ Bootstrap ################
############################################################################################
### empty Matrix for coefficients of OLS
matrix_Female = matrix(, nrow = 0, ncol = 11)
matrix_Male = matrix(, nrow = 0, ncol = 11)
for (s in 1:500){
### empty data frame for new sample
df = data.frame(id = integer(), ability = integer(), age = integer(),
female = integer(), edu = integer(), logearnings = double(), occ1 = integer(),
occ2 = integer(), occ3 = integer(), occ4 = integer(), northeast = integer(),
south = integer(), midwest = integer(), west = integer(), central = integer(),
exp = integer(), exp_2 = integer(), exp_3 = integer(), edu_ability = integer())
### create new data frame/ sample)
A = sample(Data$id, 10000, replace = TRUE)
for (i in A) {
df = rbind(df,Data[i,])}
df_female_earnings= subset(df,df$female == 1)
df_male_earnings = subset(df,df$female == 0)
### OLS for new Sample
Model_11_F = lm(df_female_earnings$logearnings ~ df_female_earnings$edu + df_female_earnings$northeast
+ df_female_earnings$south + df_female_earnings$midwest + df_female_earnings$age
+ df_female_earnings$ability + df_female_earnings$exp_2 + df_female_earnings$exp_3
+ df_female_earnings$edu_ability + df_female_earnings$west)
#summary(Model_11_F)
coef(Model_11_F)
matrix_Female=rbind(matrix_Female,coef(Model_11_F))
Model_11_M = lm(df_male_earnings$logearnings ~ df_male_earnings$edu + df_male_earnings$northeast
+ df_male_earnings$south + df_male_earnings$midwest + df_male_earnings$age
+ df_male_earnings$ability + df_male_earnings$exp_2 + df_male_earnings$exp_3
+ df_male_earnings$edu_ability + df_male_earnings$west)
#summary(Model_11_M)
coef(Model_11_M)
matrix_Male=rbind(matrix_Male,coef(Model_11_M))
}
Female_Mean = colMeans(matrix_Female)
Male_Mean = colMeans(matrix_Male)
Matrix_Female_Mean = matrix(colMeans(matrix_Female), nrow=500, ncol=11, byrow=TRUE)
Matrix_Male_Mean = matrix(colMeans(matrix_Male), nrow=500, ncol=11, byrow=TRUE)
#deviation matrix
F = (matrix_Female - Matrix_Female_Mean)^2
M = (matrix_Male - Matrix_Male_Mean)^2
F_sum = colSums(F)
M_sum = colSums(M)
哦,现在贴,第一次发这样的帖子不知道该怎么办。
老师的要就是不让用 boot package ,所以我就索性自己弄了。结果速度慢的一塌糊涂