library(dplyr)
library(ggplot2)
library(patchwork)
library(broom)
library(fixest)
library(modelsummary)
第13章のRコード
第13章 実証分析の手順
パッケージの読み込み
13.1 データを集める
13.1.2 Rの組み込みデータ
data()
help("datasets")
plot(women)
lm(height ~ weight,
data = women)
13.1.6 データをながめる
<- readr::read_csv("Males1980.csv")
malesdata
malesdata
# A tibble: 545 × 3
school exper wage
<dbl> <dbl> <dbl>
1 14 1 1.20
2 13 4 1.68
3 12 4 1.52
4 12 2 1.89
5 12 5 1.95
6 10 2 0.259
7 13 2 1.74
8 12 2 1.21
9 11 2 0.502
10 10 2 1.01
# ℹ 535 more rows
%>%
malesdata summary()
school exper wage
Min. : 3.00 Min. : 0.000 Min. :-1.114
1st Qu.:11.00 1st Qu.: 2.000 1st Qu.: 1.165
Median :12.00 Median : 3.000 Median : 1.448
Mean :11.77 Mean : 3.015 Mean : 1.393
3rd Qu.:12.00 3rd Qu.: 4.000 3rd Qu.: 1.742
Max. :16.00 Max. :11.000 Max. : 2.822
View(malesdata)
<- malesdata %>%
g1 ggplot(aes(x = wage)) +
geom_histogram(bins = 15)
<- malesdata %>%
g2 ggplot(aes(x = school)) +
geom_histogram(bins = 15)
<- malesdata %>%
g3 ggplot(aes(x = exper)) +
geom_histogram(bins = 15)
+ g2 + g3 + plot_layout(ncol = 1) g1
13.1.7 データの相関
cor(malesdata)
school exper wage
school 1.0000000 -0.57975653 0.18114299
exper -0.5797565 1.00000000 0.07689126
wage 0.1811430 0.07689126 1.00000000
13.2 推定結果を検証する
13.2.1 統計的有意性を確認する
%>%
malesdata lm(wage ~ school + exper,
data = .) %>%
tidy()
# A tibble: 3 × 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) -0.161 0.224 -0.718 4.73e- 1
2 school 0.108 0.0161 6.73 4.22e-11
3 exper 0.0923 0.0170 5.43 8.66e- 8
%>%
malesdata lm(wage ~ school + exper,
data = .) %>%
glance() %>%
select(r.squared)
# A tibble: 1 × 1
r.squared
<dbl>
1 0.0827
13.2.2 モデルを変えてみる
<- readr::read_csv("Males.csv")
malesdata
<- malesdata %>%
malesdata mutate(black = 1 * (ethn == "black"),
industry = as.factor(industry),
industry = relevel(industry, ref = "Manufacturing"),
occupation = as.factor(occupation),
occupation = relevel(occupation, ref = "Clerical_and_kindred"))
%>%
malesdata filter(year == 1980) %>%
lm(wage ~ school + exper + black + health + married +
+ industry + occupation,
union data = .) %>%
tidy() %>%
filter(p.value < 0.01)
# A tibble: 8 × 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 school 0.0847 0.0166 5.11 4.43e-7
2 exper 0.0625 0.0170 3.67 2.69e-4
3 marriedyes 0.158 0.0596 2.66 8.08e-3
4 unionyes 0.210 0.0543 3.87 1.24e-4
5 industryEntertainment -0.698 0.204 -3.42 6.74e-4
6 industryMining -0.577 0.216 -2.68 7.66e-3
7 industryProfessional_and_Related Service -0.356 0.101 -3.53 4.44e-4
8 industryTrade -0.217 0.0708 -3.07 2.29e-3
%>%
malesdata filter(year == 1980) %>%
lm(wage ~ school + exper + black + health + married +
+ industry + occupation,
union data = .) %>%
glance() %>%
select(r.squared)
# A tibble: 1 × 1
r.squared
<dbl>
1 0.210
13.2.3 データを変えてみる
%>%
malesdata feols(wage ~ school + exper + black + health + married +
+ industry + occupation,
union data = .) %>%
tidy() %>%
filter(p.value < 0.01)
# A tibble: 15 × 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 0.418 0.0695 6.02 1.93e- 9
2 school 0.0845 0.00471 17.9 2.31e-69
3 exper 0.0434 0.00281 15.5 1.37e-52
4 black -0.127 0.0224 -5.66 1.65e- 8
5 marriedyes 0.0838 0.0152 5.52 3.55e- 8
6 unionyes 0.182 0.0172 10.6 6.40e-26
7 industryAgricultural -0.378 0.0524 -7.22 6.01e-13
8 industryBusiness_and_Repair_Service -0.143 0.0294 -4.87 1.13e- 6
9 industryConstruction -0.0898 0.0297 -3.02 2.51e- 3
10 industryEntertainment -0.522 0.0597 -8.74 3.23e-18
11 industryPersonal_Service -0.150 0.0571 -2.63 8.57e- 3
12 industryProfessional_and_Related Servi… -0.319 0.0312 -10.2 3.50e-24
13 industryTrade -0.225 0.0210 -10.8 1.16e-26
14 occupationManagers, Officials_and_Prop… 0.136 0.0318 4.28 1.94e- 5
15 occupationProfessional, Technical_and_… 0.158 0.0317 4.97 6.95e- 7
13.2.4 推定手法を変えてみる
%>%
malesdata feols(wage ~ school + exper + black + health + married +
+ industry + occupation | nr + year) %>%
union tidy() %>%
filter(p.value < 0.01)
The variables 'school', 'exper' and 'black' have been removed because of collinearity (see $collin.var).
# A tibble: 4 × 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 unionyes 0.0828 0.0221 3.75 1.95e-4
2 industryEntertainment -0.223 0.0685 -3.25 1.23e-3
3 industryTrade -0.106 0.0274 -3.87 1.23e-4
4 occupationManagers, Officials_and_Propri… 0.0896 0.0320 2.79 5.38e-3
補足:テーブルの作成方法
記述統計表
<- readr::read_csv("Males.csv")
malesdata
%>%
malesdata datasummary_skim(histogram = FALSE)
Unique (#) | Missing (%) | Mean | SD | Min | Median | Max | |
---|---|---|---|---|---|---|---|
...1 | 4360 | 0 | 2180.5 | 1258.8 | 1.0 | 2180.5 | 4360.0 |
nr | 545 | 0 | 5262.1 | 3496.1 | 13.0 | 4569.0 | 12548.0 |
year | 8 | 0 | 1983.5 | 2.3 | 1980.0 | 1983.5 | 1987.0 |
school | 13 | 0 | 11.8 | 1.7 | 3.0 | 12.0 | 16.0 |
exper | 19 | 0 | 6.5 | 2.8 | 0.0 | 6.0 | 18.0 |
wage | 3631 | 0 | 1.6 | 0.5 | −3.6 | 1.7 | 4.1 |
<- malesdata %>%
malesdata mutate(union = 1 * (union == "yes"),
black = 1 * (ethn == "black"),
married = 1 * (married == "yes"),
health = 1 * (health == "yes"))
%>%
malesdata select(wage, married, union, health,
%>%
school, exper, black) datasummary_skim(histogram = FALSE,
fmt = "%.2f")
Unique (#) | Missing (%) | Mean | SD | Min | Median | Max | |
---|---|---|---|---|---|---|---|
wage | 3631 | 0 | 1.65 | 0.53 | −3.58 | 1.67 | 4.05 |
married | 2 | 0 | 0.44 | 0.50 | 0.00 | 0.00 | 1.00 |
union | 2 | 0 | 0.24 | 0.43 | 0.00 | 0.00 | 1.00 |
health | 2 | 0 | 0.02 | 0.13 | 0.00 | 0.00 | 1.00 |
school | 13 | 0 | 11.77 | 1.75 | 3.00 | 12.00 | 16.00 |
exper | 19 | 0 | 6.51 | 2.83 | 0.00 | 6.00 | 18.00 |
black | 2 | 0 | 0.12 | 0.32 | 0.00 | 0.00 | 1.00 |
推定結果表
<- malesdata %>%
m1 feols(wage ~ married + union + health +
+ exper + black)
school
<- malesdata %>%
m2 feols(wage ~ married + union + health +
+ exper + black + industry + occupation)
school
<- malesdata %>%
m3 feols(wage ~ married + union + health +
+ exper + black + industry + occupation | nr + year) school
The variables 'school', 'exper' and 'black' have been removed because of collinearity (see $collin.var).
list(m1, m2, m3) %>%
modelsummary()
Model 1 | Model 2 | Model 3 | |
---|---|---|---|
(Intercept) | 0.034 | 0.040 | |
(0.062) | (0.084) | ||
married | 0.112 | 0.084 | 0.051 |
(0.016) | (0.015) | (0.021) | |
union | 0.184 | 0.182 | 0.083 |
(0.017) | (0.017) | (0.022) | |
health | −0.052 | −0.033 | −0.009 |
(0.057) | (0.054) | (0.048) | |
school | 0.103 | 0.084 | |
(0.004) | (0.005) | ||
exper | 0.050 | 0.043 | |
(0.003) | (0.003) | ||
black | −0.145 | −0.127 | |
(0.023) | (0.022) | ||
industryBusiness_and_Repair_Service | 0.235 | 0.016 | |
(0.057) | (0.072) | ||
industryConstruction | 0.288 | −0.005 | |
(0.057) | (0.079) | ||
industryEntertainment | −0.143 | −0.148 | |
(0.077) | (0.088) | ||
industryFinance | 0.410 | 0.204 | |
(0.064) | (0.087) | ||
industryManufacturing | 0.378 | 0.074 | |
(0.052) | (0.065) | ||
industryMining | 0.479 | 0.043 | |
(0.075) | (0.113) | ||
industryPersonal_Service | 0.228 | 0.064 | |
(0.075) | (0.082) | ||
industryProfessional_and_Related Service | 0.060 | −0.015 | |
(0.058) | (0.076) | ||
industryPublic_Administration | 0.292 | 0.056 | |
(0.062) | (0.076) | ||
industryTrade | 0.153 | −0.032 | |
(0.053) | (0.065) | ||
industryTransportation | 0.411 | 0.045 | |
(0.058) | (0.070) | ||
occupationCraftsmen, Foremen_and_kindred | 0.031 | 0.046 | |
(0.028) | (0.031) | ||
occupationFarm_Laborers_and_Foreman | −0.059 | 0.055 | |
(0.078) | (0.074) | ||
occupationLaborers_and_farmers | −0.067 | 0.040 | |
(0.032) | (0.032) | ||
occupationManagers, Officials_and_Proprietors | 0.136 | 0.090 | |
(0.032) | (0.032) | ||
occupationOperatives_and_kindred | −0.059 | 0.039 | |
(0.028) | (0.027) | ||
occupationProfessional, Technical_and_kindred | 0.158 | 0.092 | |
(0.032) | (0.036) | ||
occupationSales_Workers | 0.079 | 0.027 | |
(0.037) | (0.053) | ||
occupationService_Workers | −0.077 | 0.043 | |
(0.030) | (0.039) | ||
Num.Obs. | 4360 | 4360 | 4360 |
R2 | 0.184 | 0.261 | 0.622 |
R2 Adj. | 0.183 | 0.257 | 0.565 |
R2 Within | 0.024 | ||
R2 Within Adj. | 0.018 | ||
AIC | 6010.2 | 5615.2 | 2684.9 |
BIC | 6061.2 | 5787.5 | 2831.6 |
RMSE | 0.48 | 0.46 | 0.33 |
Std.Errors | IID | IID | by: nr |
FE: nr | X | ||
FE: year | X |
<- tribble(~term, ~`Model 1`, ~`Model 2`, ~`Model 3`,
rows "Industry", "No", "Yes", "Yes",
"Occupation", "No", "Yes", "Yes",
"Individual FE", "No", "No", "Yes",
"Year FE", "No", "No", "Yes")
attr(rows, "position") <- 15:18
msummary(list(m1, m2, m3),
coef_omit = "industry|occupation",
coef_rename = c("(Intercept)" = "切片"),
gof_map = "nobs",
add_rows = rows,
stars = TRUE,
notes = "備考:対数賃金を被説明変数として線形回帰モデルを最小二乗法により推定して得られた推定値。丸括弧内は標準誤差。")
Model 1 | Model 2 | Model 3 | |||||
---|---|---|---|---|---|---|---|
切片 | 0.034 | 0.040 | |||||
(0.062) | (0.084) | ||||||
married | 0.112*** | 0.084*** | 0.051* | ||||
(0.016) | (0.015) | (0.021) | |||||
union | 0.184*** | 0.182*** | 0.083*** | ||||
(0.017) | (0.017) | (0.022) | |||||
health | −0.052 | −0.033 | −0.009 | ||||
(0.057) | (0.054) | (0.048) | |||||
school | 0.103*** | 0.084*** | |||||
(0.004) | (0.005) | ||||||
exper | 0.050*** | 0.043*** | |||||
(0.003) | (0.003) | ||||||
black | −0.145*** | −0.127*** | |||||
(0.023) | (0.022) | ||||||
Industry | No | Yes | Yes | ||||
Occupation | No | Yes | Yes | ||||
Individual FE | No | No | Yes | ||||
Year FE | No | No | Yes | ||||
Num.Obs. | 4360 | 4360 | 4360 | ||||
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 | |||||||
備考:対数賃金を被説明変数として線形回帰 モデルを最小二乗法により推定して得られた 推定値。丸括弧内は標準誤差。 |