Figure 3.18

prestige.data <- read.table("./Duncan.txt")

pairs(prestige.data[, c("income", "education", "prestige")])

library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
prestige.df <- tibble(prestige.data)
ggpairs(data = prestige.df, columns = c("income", "education", "prestige"))

Construct X, X’X, etc

X <- as.matrix(cbind(1, prestige.data[, c(2, 3)]))
t(X) %*% X
##              1 income education
## 1           45   1884      2365
## income    1884 105148    122197
## education 2365 122197    163265
solve(t(X) %*% X)
##                       1        income     education
## 1          0.1021058996 -8.495732e-04 -8.432006e-04
## income    -0.0008495732  8.012198e-05 -4.766131e-05
## education -0.0008432006 -4.766131e-05  5.401182e-05
Y <- as.matrix(prestige.data[, "prestige"])
beta.estimate <- solve(crossprod(X), crossprod(X, Y))
beta.estimate
##                 [,1]
## 1         -6.0646629
## income     0.5987328
## education  0.5458339
sse <- crossprod(Y - X %*% beta.estimate)
sse
##          [,1]
## [1,] 7506.699
mse <- sse / (45 - 2 - 1)
mse
##          [,1]
## [1,] 178.7309
sigma.matrix <- as.numeric(mse) * solve(t(X) %*% X)
sigma.matrix
##                   1       income    education
## 1         18.249481 -0.151845008 -0.150706025
## income    -0.151845  0.014320275 -0.008518551
## education -0.150706 -0.008518551  0.009653582

t(0.025, 42) critical value:

qt(1.0 - 0.025, 42) #take a look at the documentation of "qt"
## [1] 2.018082
# t statistic
beta.estimate[2] / sqrt(sigma.matrix[2, 2])
## [1] 5.00331

Mean prestige estimate given income = 42 and education = 84:

mean.prestige.est <- c(1, 42, 84) %*% beta.estimate