set.seed(2022)
第3章のRコード
第3章 確率論の基礎
乱数の設定
3.1.2 試行と事象
<- c("Head", "Tail")
coin sample(coin, 100, replace = TRUE)
[1] "Tail" "Head" "Tail" "Head" "Head" "Tail" "Tail" "Head" "Tail" "Tail"
[11] "Head" "Head" "Tail" "Tail" "Head" "Head" "Tail" "Head" "Tail" "Tail"
[21] "Head" "Head" "Head" "Tail" "Head" "Tail" "Tail" "Head" "Head" "Tail"
[31] "Head" "Tail" "Tail" "Tail" "Head" "Head" "Tail" "Head" "Head" "Tail"
[41] "Head" "Tail" "Tail" "Tail" "Head" "Tail" "Head" "Tail" "Tail" "Tail"
[51] "Head" "Tail" "Head" "Head" "Tail" "Head" "Head" "Head" "Head" "Tail"
[61] "Head" "Tail" "Head" "Head" "Tail" "Tail" "Head" "Head" "Tail" "Head"
[71] "Tail" "Head" "Tail" "Tail" "Tail" "Head" "Tail" "Head" "Tail" "Tail"
[81] "Tail" "Tail" "Head" "Head" "Head" "Tail" "Head" "Tail" "Tail" "Head"
[91] "Tail" "Tail" "Tail" "Head" "Tail" "Head" "Tail" "Tail" "Head" "Head"
3.1.3 コイン投げのシミュレーション
factorial(100) / (factorial(50) ^ 2) / (2 ^ 100)
[1] 0.07958924
<- c(1, 0)
coin <- sample(coin, 100, replace = TRUE)
z
sum(z)
[1] 55
<- 100000
S <- numeric(S)
rec <- c(1, 0)
coin
for(i in 1:S){
<- sample(coin, 100, replace = 100)
z <- sum(z)
rec[i]
}
hist(rec)
summary(rec)
Min. 1st Qu. Median Mean 3rd Qu. Max.
27.00 47.00 50.00 49.99 53.00 72.00
3.1.4 論理演算によるカウントの方法
2 > 1
[1] TRUE
2 > 1000
[1] FALSE
200 == 100 * 2
[1] TRUE
TRUE + TRUE
[1] 2
TRUE * FALSE
[1] 0
<- (rec == 50)
count head(count)
[1] FALSE FALSE FALSE FALSE TRUE FALSE
sum(count)
[1] 7966
mean(count)
[1] 0.07966
3.2.3 独立ではない例
# 独立性の確認
<- 100000 # シミュレーション回数
S <- rnorm(S, 50, 10) # Xを抽出
X <- rnorm(S, 50, 10) # Yを抽出
Y <- X + Y # Zを構成 Z
# Pr(X > 70) * Pr(Z > 100)
mean(X > 70) * mean(Z > 100)
[1] 0.01119444
# Pr(X > 70 かつ Z > 100)
mean((X > 70) * (Z > 100))
[1] 0.02207
3.2.4 独立性と相関係数
<- rnorm(100000, 50, 10)
X <- rnorm(100000, 50, 10)
Y
cor(X, Y)
[1] 0.003516287
<- - ((X - 50) ^ 2) / 10
Z cor(X, Z)
[1] 0.01343314
plot(X, Z)
3.3.1 分布関数
curve(pnorm(x, 50, 10), 0, 100)
pnorm(60, 50, 10) - pnorm(40, 50, 10)
[1] 0.6826895
3.3.2 確率密度関数
pnorm(50 + 0.1, 50, 10) - pnorm(50, 50, 10)
[1] 0.003989356
dnorm(50, 50, 10)
[1] 0.03989423
dnorm(80, 50, 10)
[1] 0.0004431848
curve(dnorm(x, 50, 10), 0, 100)
3.3.7 データによる条件付期待値の推定
<- read.csv("wage.csv")
malesdata
head(malesdata)
educ exper wage
1 7 16 548
2 12 9 481
3 12 16 721
4 11 10 250
5 12 16 729
6 12 8 500
summary(malesdata)
educ exper wage
Min. : 1.00 Min. : 0.000 Min. : 100.0
1st Qu.:12.00 1st Qu.: 6.000 1st Qu.: 394.2
Median :13.00 Median : 8.000 Median : 537.5
Mean :13.26 Mean : 8.856 Mean : 577.3
3rd Qu.:16.00 3rd Qu.:11.000 3rd Qu.: 708.8
Max. :18.00 Max. :23.000 Max. :2404.0
plot(malesdata)
<- malesdata[malesdata$school == 12, ] # 高校卒業者の抜き出し
sch12 <- malesdata[malesdata$school == 16, ] # 学部卒業者の抜き出し sch16
exp(mean(sch12$wage)) # 高校卒業者の平均賃金
[1] NaN
exp(mean(sch16$wage)) # 学部卒業者の平均賃金
[1] NaN
<- malesdata[malesdata$school <= 11, ] # 高卒未満の抜き出し
sch11 <- malesdata[malesdata$school >= 12, ] # 高卒以上の抜き出し sch12
exp(mean(sch11$wage)) # 高卒未満の平均賃金
[1] NaN
exp(mean(sch12$wage)) # 高卒以上の平均賃金
[1] NaN
3.4.2 中心極限定理のシミュレーション
<- 10000
S <- 10000
n <- numeric(S)
Zn
for(i in 1:S){
<- rnorm(n, 50, 10)
X <- mean(X)
Xbar <- var(X)
Sn <- sqrt(n) * (Xbar - 50) / sqrt(Sn)
Zn[i]
}
hist(Zn)
summary(Zn)
Min. 1st Qu. Median Mean 3rd Qu. Max.
-3.786927 -0.685862 0.004751 0.000495 0.683271 3.361441
3.4.3 信頼区間のシミュレーション
# 信頼区間シミュレーション
<- 10000 # シミュレーション回数
S <- 10000 # 標本の大きさ
n <- numeric(S) # 結果記録用のベクトル
rec
for(i in 1:S){ # 繰り返し開始
<- rnorm(n, 50, 10) # N(50,10^2) から標本抽出
X <- mean(X)
Xbar <- var(X)
Sn <- (Xbar - 1.96 * sqrt(Sn / n) < 50) * (50 < Xbar + 1.96 * sqrt(Sn / n))
rec[i]
}
mean(rec) # 確率を計算
[1] 0.944
3.4.4 信頼区間の導出
pnorm(69.6,50,10) - pnorm(30.4,50,10)
[1] 0.9500042