data <- iris[,-5]
fit <- randomForest(iris[,5] ~ ., data=data, ntree=200)
fit
require(randomForest)
require(MASS)
attach(Boston)
head(Boston)
?Boston
str(Boston)
library(mlbench)
data(PimaIndiansDiabetes)
str(PimaIndiansDiabetes)
df <- PimaIndiansDiabetes
inTrain = sample(nrow(df), nrow(df)*0.8)
# split data
train = df[inTrain, ]
test = df[-inTrain, ]
fit <- randomForest(diabetes ~ . , data=train, ntree=100, proximity=TRUE)
fit
table(predict(fit), train$diabetes)
plot(fit)
importance(fit)
varImpPlot(fit)
pred <- predict(fit, newdata=test)
table(pred, test$diabetes)
plot(margin(fit, test$Species))
# Tune Random Forest
tune.rf <- tuneRF(iris[,-5], iris[,5], stepFactor=.5)
print(tune.rf)
# Random Forest
# Methods of improving classifiers:
# 1. Stacking
# 2. Bagging (bootstrap aggregation)
# 3. Boosting
# Random Forest = Bootstrap aggregation
# N samples, M characteristics
install.packages("randomForest")
library(randomForest)
# Read data
zzz <- read.table("~/DataAnalysis/R_data_analysis/DATA/Wine.txt", header=T, sep="", dec=".")
zzz
names(zzz) <- c("Alcohol", "Malic_acid", "Ash", "Alcalinity_of_ash", "Magnesium",
"Total_phenols", "Flavanoids", "Nonflavanoid_phenols", "Proanthocyanins", "Color_intensity",
"Hue", "OD280_OD315_of_diluted_wines", "Proline", "Wine_type")
zzz
# Predictors
x <- zzz[,1:13]
# Responce
y <- zzz[,14]
y.1 <- as.factor(y)
table(y)
set.seed(123)
ntree.1 <- 500 # number of trees in the forest
nodesize.1 <- 1 # minimum size of terminal nodes
keep.forest.1 <- TRUE # results include forest
rf.res <- randomForest(x, y=y.1,
ntree=ntree.1,
mtry=floor(sqrt(ncol(zzz))),
replace=FALSE,
nodesize = nodesize.1,
importance=TRUE,
localImp=FALSE,
proximity=FALSE,
norm.votes=TRUE,
do.trace=ntree.1/10,
keep.forest=keep.forest.1,
corr.bias=FALSE,
keep.inbag=FALSE)
# Detect number of trees
zzz.predict <- predict(rf.res, newdata = x)
# Предпочтительнее вариант
# zzz.predict <- predict(rf.res, newdata = x, type = "prob")
# Оценим качество результата
table(y, zzz.predict)
# zzz.predict
# y 0 1 2
# 0 59 0 0
# 1 0 48 0
# 2 0 0 71
# import.wine <- importance(rf.res, type=NULL, class=1, scale=TRUE)
# import.wine.2 <- as.data.frame(import.wine)
varImpPlot(rf.res, sort=F)
varUsed(rf.res, by.tree=FALSE, count=TRUE)