|
               |
               |
               |
            |
library(MASS) # MASS ライブラリィーの呼び出し Cushings # Cushings データ cush2 <- log(Cushings[1:16,1:2]) tp2 <- factor(Cushings$Type[1:16]) # カテゴリー変数化 cush2.lda <- lda(cush2, tp2) # 線形判別 cush2.qda <- qda(cush2, tp2) # 2次判別 Tetrahydrocortisone <- cush2[,1] Pregnanetriol <- cush2[,2] # ロジスティック回帰 cush2.glm <- glm(tp2 ~ Tetrahydrocortisone + Pregnanetriol, family=binomial) # # 散布図上の判別線表示 len <- 100 plot(cush2, type="n", xlim=c(0,3), ylim=c(-4,3), xlab="log(Tetrahydrocortisone)", ylab="log(Pregnanetriol)") text(cush2, as.character(tp2), col=as.numeric(tp2)+1) title(main="2群の判別") xp <- seq(0, 3, length = len) yp <- seq(-4, 3, length = len) # 100 × 100 のグリッドの定義 cushT <- expand.grid(Tetrahydrocortisone=xp, Pregnanetriol=yp) Z <- predict(cush2.lda, cushT) # 線形判別でのグリッド上の事後確率 zp <- Z$posterior[,1] - Z$posterior[,2] # a の事後確率 − b の事後確率 contour(xp, yp, matrix(zp, len), add=T, levels=0) # 等事後確率線 # Z <- predict(cush2.qda, cushT) # 2次判別でのグリッド上の事後確率 zp <- Z$posterior[,1] - Z$posterior[,2] contour(xp, yp, matrix(zp, len), add=T, levels=0, col="blue") # Z <- predict(cush2.glm, cushT, type="response") # a の確率 contour(xp, yp, matrix(Z, len), add=T, levels=0.5, col="purple") # a が 0.5 になる線 legend("bottomright", legend=c("線形判別","2次判別","ロジスティック判別"), lty=1, col=c("black","blue","purple"), cex=0.8)
cush <- log(Cushings[1:21,1:2]) tp <- factor(Cushings$Type[1:21]) len <- 100 xp <- seq(0, 4.5, length = len) yp <- seq(-4, 3, length = len) cushT <- expand.grid(Tetrahydrocortisone=xp, Pregnanetriol=yp) # # 判別線表示関数定義 predplot <- function(pred, main=""){ plot(cush, type="n", xlim=c(0,4.5), ylim=c(-4,3), xlab="log(Tetrahydrocortisone)", ylab="log(Pregnanetriol)", main=main) text(cush, as.character(tp), col=as.numeric(tp)+1) zp <- pred[,3] - pmax(pred[,2], pred[,1]) contour(xp, yp, matrix(zp, len), add=T, levels=0) zp <- pred[,1] - pmax(pred[,2], pred[,3]) contour(xp, yp, matrix(zp, len), add=T, levels=0) } # # # 線形判別 cush.lda <- lda(cush, tp) # 線形判別関数 Z <- predict(cush.lda, cushT) main <- "線形判別" predplot(Z$posterior, main=main) # # 2次判別 cush.qda <- qda(cush, tp) Z <- predict(cush.qda, cushT) main <- "2次判別" predplot(Z$posterior, main=main) # # 多項ロジスティック判別 Tetrahydrocortisone <- cush[,1] Pregnanetriol <- cush[,2] library(nnet) cush.multinom <- multinom(tp ~ Tetrahydrocortisone + Pregnanetriol, maxit=250) zz <- predict(cush.multinom, cushT, type="probs") main <- "多項ロジスティック判別" predplot(zz, main=main) # # 決定木,分類木 library(rpart) cush.tree <- rpart(tp ~ Tetrahydrocortisone + Pregnanetriol) plot(cush.tree); text(cush.tree) zz <- predict(cush.tree, cushT, type="prob") main <- "分類木" predplot(zz, main=main) # # library(e1071) # ナイーブベイズ(naive bayes) cush.bayes <- naiveBayes(cush, tp) zz <- predict(cush.bayes, cushT, type="raw") main <- "ナイーブベイズ" predplot(zz, main=main) # # サポートベクトルマシン(SVM) cush.svm <- svm(tp ~ Tetrahydrocortisone + Pregnanetriol, probability = TRUE) zz <- predict(cush.svm, cushT, decision.values = TRUE, probability = TRUE) zz.dec <- attr(zz, "decision.values") zz.prob <- attr(zz, "probabilities") main <- "サポートベクトルマシン" predplot(zz.prob, main=main) # # ランダムフォレスト library(randomForest) set.seed(71) cush.rf <- randomForest(cush, tp) zz <- predict(cush.rf, cushT, type="prob") main <- "ランダムフォレスト" predplot(zz, main=main)
               |
               |
library(MASS) head(fgl) nf <- which(fgl$type == "WinF") nn <- which(fgl$type == "WinNF") nv <- which(fgl$type == "Ven") nc <- which(fgl$type == "Con") nt <- which(fgl$type == "Tabl") nh <- which(fgl$type == "Head") type2 <- rep("", nrow(fgl)) type2[nf] <- "F" type2[nn] <- "N" type2[nv] <- "V" type2[nc] <- "C" type2[nt] <- "T" type2[nh] <- "H" typecol <- rep(0, nrow(fgl)) typecol[nf] <- 1 typecol[nn] <- 2 typecol[nv] <- 3 typecol[nc] <- 4 typecol[nt] <- 5 typecol[nh] <- 6 fgl.pca <- prcomp(scale(fgl[,-10])) summary(fgl.pca) plot(fgl.pca$x[,1:2], type="n") text(fgl.pca$x[,1:2], type2, col=typecol+1, cex=0.7) title(main="主成分スコア") fgl.lda <- lda(type ~ ., data=fgl) fgl.lda fgl.pred <- predict(fgl.lda) # # xp <- seq(-3.6,7.4, length=len) yp <- seq(-8,3.8, length=len) fglT <- expand.grid(LD1=xp, LD2=yp) mld1 <- tapply(fgl.pred$x[,1], fgl$type, mean) mld2 <- tapply(fgl.pred$x[,2], fgl$type, mean) pred <- NULL for(i in 1:nrow(fglT)){ d1 <- dnorm(fglT[i,1],mean=mld1, sd=1) d2 <- dnorm(fglT[i,2],mean=mld2, sd=1) pred <- rbind(pred, fgl.lda$prior*d1*d2/sum(fgl.lda$prior*d1*d2)) } # # plot(fgl.pred$x[,1:2], type="n") text(fgl.pred$x[,1:2], type2, col=typecol+1, cex=0.7) title(main="正準判別スコアと判別境界") # # 判別境界表示 zp <- pred[,1] - pmax(pred[,2], pred[,3], pred[,4], pred[,5], pred[,6]) contour(xp, yp, matrix(zp, len), add=T, levels=0) zp <- pred[,2] - pmax(pred[,1], pred[,3], pred[,4], pred[,5], pred[,6]) contour(xp, yp, matrix(zp, len), add=T, levels=0) zp <- pred[,3] - pmax(pred[,1], pred[,2], pred[,4], pred[,5], pred[,6]) contour(xp, yp, matrix(zp, len), add=T, levels=0) zp <- pred[,4] - pmax(pred[,1], pred[,2], pred[,3], pred[,5], pred[,6]) contour(xp, yp, matrix(zp, len), add=T, levels=0) zp <- pred[,5] - pmax(pred[,1], pred[,2], pred[,3], pred[,4], pred[,6]) contour(xp, yp, matrix(zp, len), add=T, levels=0) # # データ,判別表 fgltab.lda <- table(fgl$type, fgl.pred$class) fgltab.lda 1 - sum(diag(fgltab.lda))/sum(fgltab.lda) # # # force predict to return class labels only mypredict.lda <- function(object, newdata) predict(object, newdata = newdata)$class # 10-fold CV 10 回 library(ipred) set.seed(41) error.lda <- numeric(10) for(i in 1:10) error.lda[i] <- errorest(type ~ ., data = fgl, model = lda, predict= mypredict.lda)$error summary(error.lda) # # # SVM library(e1071) fgl.svm <- svm(type ~ ., data = fgl) # データ,判別表 fgltab.svm <- table(fgl$type, fitted(fgl.svm)) fgltab.svm 1 - sum(diag(fgltab.svm))/sum(fgltab.svm) # 10-fold CV 10 回 set.seed(563) error.SVM <- numeric(10) for (i in 1:10) error.SVM[i] <- errorest(type ~ ., data = fgl, model = svm)$error summary(error.SVM) # # library(randomForest) set.seed(17) fgl.rf <- randomForest(type ~ ., data = fgl, mtry = 2, importance = TRUE, do.trace = 100) print(fgl.rf) fgltab.rf <- table(fgl$type, fgl.rf$predicted) fgltab.rf 1 - sum(diag(fgltab.rf))/sum(fgltab.rf) # # 10-fold CV 10 回 library(ipred) set.seed(131) error.RF <- numeric(10) for(i in 1:10) error.RF[i] <- errorest(type ~ ., data = fgl, model = randomForest, mtry = 2)$error summary(error.RF) #
正準判別 WinF WinNF Veh Con Tabl Head WinF 52 15 3 0 0 0 WinNF 17 54 0 3 2 0 Veh 11 6 0 0 0 0 Con 0 5 0 7 0 1 Tabl 1 2 0 0 6 0 Head 1 2 0 1 0 25 判別誤差:0.3271,予測誤差:0.3818 SVM WinF WinNF Veh Con Tabl Head WinF 59 11 0 0 0 0 WinNF 15 61 0 0 0 0 Veh 10 7 0 0 0 0 Con 0 0 0 13 0 0 Tabl 0 1 0 0 8 0 Head 1 0 0 0 0 28 判別誤差:0.2103,予測誤差:0.2935 ランダムフォレスト WinF WinNF Veh Con Tabl Head WinF 63 6 1 0 0 0 WinNF 11 61 1 1 2 0 Veh 7 4 6 0 0 0 Con 0 3 0 9 0 1 Tabl 0 1 0 0 8 0 Head 1 3 0 0 0 25 判別誤差:0.1916,予測誤差:0.2065