Chapter 29 Multiple linear regression
Source: Анализ данных в R. Множественная линейная регрессия
# Dataset swiss
?swiss
swiss <- data.frame(swiss)
str(swiss)
# Histogram of fertility
hist(swiss$Fertility, col='red')
# Numeric predictors for Fertility prediction
fit <- lm(Fertility ~ Examination + Catholic, data = swiss)
summary(fit)
# the principal predictor is an 'examination' with negative correlation.
# Interaction of variables 'examination' and 'catholics' '*'
fit2 <- lm(Fertility ~ Examination*Catholic, data = swiss)
summary(fit2)
confint(fit2)
# Categorical predictors
# Histogram obviously have two parts -> we can split data for two factors
hist(swiss$Catholic, col = 'red')
# Lets split 'Catholics' for two groups: with many 'lots' and few 'few'
swiss$religious <- ifelse(swiss$Catholic > 60, 'Lots', 'Few')
swiss$religious <- as.factor(swiss$religious)
fit3 <- lm(Fertility ~ Examination + religious, data = swiss)
summary(fit3)
# Interaction of variables
fit4 <- lm(Fertility ~ religious*Examination, data = swiss)
summary(fit4)
# plots
ggplot(swiss, aes(x = Examination, y = Fertility)) +
geom_point()
ggplot(swiss, aes(x = Examination, y = Fertility)) +
geom_point() +
geom_smooth()
ggplot(swiss, aes(x = Examination, y = Fertility)) +
geom_point() +
geom_smooth(method = 'lm')
ggplot(swiss, aes(x = Examination, y = Fertility, col = religious)) +
geom_point()
ggplot(swiss, aes(x = Examination, y = Fertility, col = religious)) +
geom_point() +
geom_smooth()
ggplot(swiss, aes(x = Examination, y = Fertility, col = religious)) +
geom_point() +
geom_smooth(method = 'lm')
#
fit5 <- lm(Fertility ~ religious*Infant.Mortality*Examination, data = swiss)
summary(fit5)
# model comparison
rm(swiss)
swiss <- data.frame(swiss)
fit_full <- lm(Fertility ~ ., data = swiss)
summary(fit_full)
fit_reduced1 <- lm(Fertility ~ Infant.Mortality + Examination + Catholic + Education, data = swiss)
summary(fit_reduced1)
anova(fit_full, fit_reduced1)
fit_reduced2 <- lm(Fertility ~ Infant.Mortality + Education + Catholic + Agriculture, data = swiss)
summary(fit_reduced2)
anova(fit_full, fit_reduced2)
# model selection
optimal_fit <- step(fit_full, direction = 'backward')
summary(optimal_fit)