# 종속변수(범주형)와 독립변수(연속형) 간의 관계를 통해 예측 모델을 작성
# iris의 Species 중에서 setosa와 versicolor 대상으로 연습
str(iris) 
tail(iris) #tail은 뒤로 보기 head는 앞에서보기
#결과값은 versicolor 아님 setosa 로 나옴

d<-subset(iris, Species == 'setosa' | Species == 'versicolor')
d
str(d)
d$Species <-factor(d$Species)
str(d)
nrow(d) #d data는 100개

#모델 만들기
(m<-glm(Species ~ ., data = d, family = "binomial")) #glm(Generalized Linear Model) 일반적인 선형 모델
# 기울기 값 -9.879        -7.418        19.054        25.033

fitted(m)[c(1:5, 51:55)] # fitted(모델) [데이터 값]


f <- fitted(m)
f
as.numeric(d$Species) #숫자값으로 보임 #앞의 50개는 setosa 뒤에 50개는 versicolor

ifelse(f > 0.5, 1, 0) == as.numeric(d$Species) -1 # ifelse(f > 0.5, 1, 0) 예측한 값이 0이면 세포사 0보다 크면 버지칼라

#TRUE 갯수를 얻기 위한 작업
is_correct <-(ifelse(f > 0.5, 1, 0) == as.numeric(d$Species) -1)
sum(is_correct) #100개 나옴
sum(is_correct) /NROW(d) #정확도 100%가 나옴

# 새로운 데이터에 의한 예측
d[1,]
d[10,]
d[55,]
pred <-predict(m,newdata=d[c(1,10,55),],type="response") #d에 1번째 10번쨰 55번쨰 데이터를 꺼냄
pred
result <- ifelse(pred >=0.5,1,0)
result # 1= 세토사 10= 세토사 55= 버지칼라

#새로운 자료 작성해 분류 예측
my <- d
my <- my[c(1,2,3), ] # 1,2,3 번쨰 만 보기
my <- edit(my) #값 바꾸기
my

mypred <-predict(m,newdata=d[c(1,10,55),],type="response")
mypred
myresult <- ifelse(mypred >=0.5,1,0)
myresult # 0  0  1  세토사 세토사 버지칼라