# 분류분석 중 Random Forest 
install.packages("randomForest")
library(randomForest)

model<-randomForest(Species ~., data=iris)
model
#트리 500개 OOB오차범위는 4.67%

#계산하면
(50 +47 +45)/nrow(iris) #0.9466667

#랜덤 포레스트 파라미터 값 조정 ()
model2<-randomForest(Species ~., data=iris, ntree=300, mtry=4, na.action = na.omit) #결측치 제거na.action = na.omit()
model2 #err 는 4%
(50 + 47 + 46) /nrow(iris) #0.9533333 살짝 오름

#중요 변수 확인
model3<-randomForest(Species ~., data=iris, importance=T, ntree=300, mtry=4, na.action = na.omit) #결측치 제거na.action = na.omit()
importance(model3) # petal.width가 중요도가 높다. #gini가 클수록 중요도가 높다.

varImpPlot(model3)

#최적의 파라미터값 구하기 (ntree, mtry)
ntree <- c(400,500,600)
mtry <-c(2:4)
param<-data.frame(n=ntree,m=mtry)
param

#for문으로 돌림
for (i in param$n) {
  cat('ntree = ' ,i,'\n')
  for (j in param$m) {
    cat('mtry = ' ,j,'\n')
    model_iris <- randomForest(Species~.,data = iris,ntree=i,mtry=j,na.action = na.omit)
    print(model_iris)
  }
}