安裝 install.packages("randomForest", lib = ".", destdir = ".") ^^^^^^^^^ ^^^^^^^^^^^^^ 安裝目錄 下載目錄 install.packages("fastICA", lib = ".", destdir = ".") install.packages("SparseM", lib = ".", destdir = ".") install.packages("e1071", lib = ".", destdir = ".") ==============RandomForest用法========== 每次執行時 .libPaths() .libPaths(path.expand(".")) library(randomForest) train <- read.table("ada_train.data") label <- read.table("ada_train.labels") valid <- read.table("ada_valid.data") train_x <- as.matrix(train) train_y <- as.matrix(label) set.seed(1) model <- randomForest(train_x, as.factor(train_y), mtry = 8, ntree = 499, classwt = c(1, 2.62)) model predict(model, as.matrix(valid)) write(predict(model, as.matrix(valid)),"ada_valid.resu") # set.seed(1) is used before using randomForest each time # set seed = 1, ntree = 495~499, 501, mtry = 8, classwt = c(1, 2.62), can get # 0.1577 in WCCI contest ==============對 ICA 作圖============= read.matrix.csr("train.csv.out.scale") -> ada as.matrix(ada$x)-> ada.data ada.ica <- fastICA(ada.data, n.comp=40) plot(ada.ica$S,pch=21,bg=c(rainbow(4))[ada$y]) plot(ada.ica$S[,1:2],pch=21,bg=c(rainbow(4))[ada$y]) ada.ica <- fastICA(ada.data, n.comp=5) plot(ada.ica$S[,1:2],pch=21,bg=c(rainbow(4))[ada$y]) ============SVM 格式 作fastICA========== .libPaths() .libPaths(path.expand(".")) library(e1071) library(SparseM) library(fastICA) read.matrix.csr("train.csv.out.scale") -> ada as.matrix(ada$x)-> ada.data ada.ica <- fastICA(ada.data, n.comp=30) val.ica <- fastICA(val.data, n.comp=30) val.ica$X%*%ada.ica$K%*%ada.ica$W -> val.dataica write.matrix.csr(as.matrix.csr(val.dataica), y=val$y, file="ada.ica30.val") write.matrix.csr(as.matrix.csr(ada.ica$S), y=ada$y, file="ada.ica30.train") ada.ica <- fastICA(ada.data, n.comp=25) val.ica <- fastICA(val.data, n.comp=25) val.ica$X%*%ada.ica$K%*%ada.ica$W -> val.dataica write.matrix.csr(as.matrix.csr(ada.ica$S), y=ada$y, file="ada.ica25.train") write.matrix.csr(as.matrix.csr(val.dataica), y=val$y, file="ada.ica25.val") ========================================== ada.ica <- fastICA(ada.data, n.comp=30, method="C") ===========RAW DATA格式 作fastICA========= .libPaths() .libPaths(path.expand(".")) library(e1071) library(SparseM) library(fastICA) ls() #列出舊有物件 dir() #列出工作目錄下檔案 rm(list=ls(all=TRUE)) #刪除舊有物件 sylva <- read.table("sylva_train.data") sylva.val = read.table("sylva_valid.data") sylva.valm = as.matrix(sylva.val) sylvam = as.matrix(sylva) sylva.ica <- fastICA(sylvam, n.comp=30) sylva.val.ica <- fastICA(sylva.valm, n.comp=30) sylva.val.ica$X%*%sylva.ica$K%*%sylva.ica$W -> sylva.val.dataica sylva.valid.label = read.table("sylva_valid.labels") sylva.label = read.table("sylva_train.labels") write.matrix.csr(as.matrix.csr(sylva.val.dataica), y=as.matrix(sylva.valid.label), file="sylva.ica30.valid") write.matrix.csr(as.matrix.csr(sylva.ica$S), y=as.matrix(sylva.label), file="sylva.ica30.train") nova <- read.table("nova_train.new") nova.val <- read.table("nova_valid.new") # .new檔將很少出現的維度刪去(ex.出現次數少於五次,捨掉此維度), # 把它減為四千多維 nova.valm = as.matrix(nova.val) novam = as.matrix(nova) nova.ica <- fastICA(novam, n.comp=30) nova.val.ica <- fastICA(nova.valm, n.comp=30) nova.val.ica$X%*%nova.ica$K%*%nova.ica$W -> nova.val.dataica nova.valid.label = read.table("nova_valid.labels") nova.label = read.table("nova_train.labels") write.matrix.csr(as.matrix.csr(nova.val.dataica), y=as.matrix(nova.valid.label), file="nova.ica30.valid") write.matrix.csr(as.matrix.csr(nova.ica$S), y=as.matrix(nova.label), file="nova.ica30.train") ===========fig. 1 產生方式:============== x1 <- rnorm(5000,mean=1,sd=1.2) x2 <- rnorm(5000,mean=10,sd=1.2) x3 <- rnorm(5000,mean=5) plot(density(x1),xlim=c(-2,12),ylim=c(0,0.4),col="RED",xlab="",ylab="",main="",lwd=3) lines(density(x2),col="RED",lwd=3) lines(density(x3),col="Blue",lwd=3) ========================================== e1071 fastICA library(randomForest) model <- randomForest(train_x, as.factor(train_y)) predict(model, ur_data) write(predict(model, valid),"ada_valid.resu")