Chapter 29 Multiple linear regression
Source: Анализ данных в R. Множественная линейная регрессия
# Dataset swiss
?swiss<- data.frame(swiss)
swiss str(swiss)
# Histogram of fertility
hist(swiss$Fertility, col='red')
# Numeric predictors for Fertility prediction
<- lm(Fertility ~ Examination + Catholic, data = swiss)
fit summary(fit)
# the principal predictor is an 'examination' with negative correlation.
# Interaction of variables 'examination' and 'catholics' '*'
<- lm(Fertility ~ Examination*Catholic, data = swiss)
fit2 summary(fit2)
confint(fit2)
# Categorical predictors
# Histogram obviously have two parts -> we can split data for two factors
hist(swiss$Catholic, col = 'red')
# Lets split 'Catholics' for two groups: with many 'lots' and few 'few'
$religious <- ifelse(swiss$Catholic > 60, 'Lots', 'Few')
swiss$religious <- as.factor(swiss$religious)
swiss
<- lm(Fertility ~ Examination + religious, data = swiss)
fit3 summary(fit3)
# Interaction of variables
<- lm(Fertility ~ religious*Examination, data = swiss)
fit4 summary(fit4)
# plots
ggplot(swiss, aes(x = Examination, y = Fertility)) +
geom_point()
ggplot(swiss, aes(x = Examination, y = Fertility)) +
geom_point() +
geom_smooth()
ggplot(swiss, aes(x = Examination, y = Fertility)) +
geom_point() +
geom_smooth(method = 'lm')
ggplot(swiss, aes(x = Examination, y = Fertility, col = religious)) +
geom_point()
ggplot(swiss, aes(x = Examination, y = Fertility, col = religious)) +
geom_point() +
geom_smooth()
ggplot(swiss, aes(x = Examination, y = Fertility, col = religious)) +
geom_point() +
geom_smooth(method = 'lm')
#
<- lm(Fertility ~ religious*Infant.Mortality*Examination, data = swiss)
fit5 summary(fit5)
# model comparison
rm(swiss)
<- data.frame(swiss)
swiss
<- lm(Fertility ~ ., data = swiss)
fit_full summary(fit_full)
<- lm(Fertility ~ Infant.Mortality + Examination + Catholic + Education, data = swiss)
fit_reduced1 summary(fit_reduced1)
anova(fit_full, fit_reduced1)
<- lm(Fertility ~ Infant.Mortality + Education + Catholic + Agriculture, data = swiss)
fit_reduced2 summary(fit_reduced2)
anova(fit_full, fit_reduced2)
# model selection
<- step(fit_full, direction = 'backward')
optimal_fit summary(optimal_fit)