Figure 3.18
prestige.data <- read.table("./Duncan.txt")
pairs(prestige.data[, c("income", "education", "prestige")])
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
prestige.df <- tibble(prestige.data)
ggpairs(data = prestige.df, columns = c("income", "education", "prestige"))
Construct X, X’X, etc
X <- as.matrix(cbind(1, prestige.data[, c(2, 3)]))
t(X) %*% X
## 1 income education
## 1 45 1884 2365
## income 1884 105148 122197
## education 2365 122197 163265
solve(t(X) %*% X)
## 1 income education
## 1 0.1021058996 -8.495732e-04 -8.432006e-04
## income -0.0008495732 8.012198e-05 -4.766131e-05
## education -0.0008432006 -4.766131e-05 5.401182e-05
Y <- as.matrix(prestige.data[, "prestige"])
beta.estimate <- solve(crossprod(X), crossprod(X, Y))
beta.estimate
## [,1]
## 1 -6.0646629
## income 0.5987328
## education 0.5458339
sse <- crossprod(Y - X %*% beta.estimate)
sse
## [,1]
## [1,] 7506.699
mse <- sse / (45 - 2 - 1)
mse
## [,1]
## [1,] 178.7309
sigma.matrix <- as.numeric(mse) * solve(t(X) %*% X)
sigma.matrix
## 1 income education
## 1 18.249481 -0.151845008 -0.150706025
## income -0.151845 0.014320275 -0.008518551
## education -0.150706 -0.008518551 0.009653582
t(0.025, 42) critical value:
qt(1.0 - 0.025, 42) #take a look at the documentation of "qt"
## [1] 2.018082
# t statistic
beta.estimate[2] / sqrt(sigma.matrix[2, 2])
## [1] 5.00331
Mean prestige estimate given income = 42 and education = 84:
mean.prestige.est <- c(1, 42, 84) %*% beta.estimate