티스토리 뷰
install.packages("KoNLP")
install.packages("wordcloud")
install.packages("RcolorBrewer")
library(KoNLP)
library(wordcloud)
library(RColorBrewer)
src_dir <- c("C:/Users/User/Downloads/용인고자체평가/수지구/txt")
src_file <- list.files(src_dir) # list인
src_file_cnt <- length(src_file)
for(i in 1:src_file_cnt) {
# write.table one by one automatiically, using loop program
dataset <- read.table(
paste(src_dir, "/", src_file[i], sep=""),
sep=",", header=F, stringsAsFactors = F , fill = TRUE)
# dataset exporting with 'APPEND = TREU' option, filename = dataset_all.txt
write.table(dataset,
paste(src_dir, "/", "dataset_all.txt", sep=""),
sep = ",",
row.names = FALSE,
col.names = FALSE,
quote = FALSE,
append = TRUE) # appending dataset (stacking)
# delete seperate datasets
rm(dataset)
# printing loop sequence at console to check loop status
print(i)
}
f = file("C:/Users/User/Downloads/용인고자체평가/기흥구/txt/dataset_all.txt", blocking=F )
txtLines = readLines(f)
txtLines = cbind(txtLines , readLines(f) )
nouns = sapply(txtLines , extractNoun , USE.NAMES = F)
undata=unlist(nouns)
data= Filter(function(x){nchar(x)>=2}, undata)
data= Filter(function(x){ length(grep("&", x)) < 1 }, data)
data= Filter(function(x){ length(grep("[0-9]", x)) < 1 }, data)
wordcount1 = table(data)
wordcount1 = wordcount1[wordcount1>4]
wordcount2 = sort(wordcount1, decreasing = T)
head(wordcount2,30)
write.csv(head(wordcount2,30),file="c:/R/ki.csv")
#wordcount1
#pal <- brewer.pal(9, "Pastel1")
#wordcloud(names(wordcount2), freq = wordcount1, scale=c(7,1), rot.per=0.25, min.freq=1, random.order=F, random.color=T, colors=pal)
f = file("C:/Users/User/Downloads/용인고자체평가/수지구/txt/dataset_all.txt", blocking=F )
txtLines = readLines(f)
txtLines = cbind(txtLines , readLines(f) )
nouns = sapply(txtLines , extractNoun , USE.NAMES = F)
undata=unlist(nouns)
data= Filter(function(x){nchar(x)>=2}, undata)
data= Filter(function(x){ length(grep("&", x)) < 1 }, data)
data= Filter(function(x){ length(grep("[0-9]", x)) < 1 }, data)
wordcount1 = table(data)
wordcount1 = wordcount1[wordcount1>4]
wordcount2 = sort(wordcount1, decreasing = T)
head(wordcount2,30)
write.csv(head(wordcount2,30),file="c:/R/su.csv")
#wordcount1
#pal <- brewer.pal(9, "Pastel1")
#wordcloud(names(wordcount2), freq = wordcount1, scale=c(7,1), rot.per=0.25, min.freq=1, random.order=F, random.color=T, colors=pal)
f = file("C:/Users/User/Downloads/용인고자체평가/처인구/txt/dataset_all.txt", blocking=F )
txtLines = readLines(f)
txtLines = cbind(txtLines , readLines(f) )
nouns = sapply(txtLines , extractNoun , USE.NAMES = F)
undata=unlist(nouns)
data= Filter(function(x){nchar(x)>=2}, undata)
data= Filter(function(x){ length(grep("&", x)) < 1 }, data)
data= Filter(function(x){ length(grep("[0-9]", x)) < 1 }, data)
wordcount1 = table(data)
wordcount1 = wordcount1[wordcount1>4]
wordcount2 = sort(wordcount1, decreasing = T)
head(wordcount2,30)
write.csv(head(wordcount2,30),file="c:/R/chu.csv")
#wordcount1
#pal <- brewer.pal(9, "Pastel1")
#wordcloud(names(wordcount2), freq = wordcount1, scale=c(7,1), rot.per=0.25, min.freq=1, random.order=F, random.color=T, colors=pal)
f = file("C:/R/all.txt", blocking=F )
txtLines = readLines(f)
txtLines = cbind(txtLines , readLines(f) )
nouns = sapply(txtLines , extractNoun , USE.NAMES = F)
undata=unlist(nouns)
data= Filter(function(x){nchar(x)>=2}, undata)
data= Filter(function(x){ length(grep("&", x)) < 1 }, data)
data= Filter(function(x){ length(grep("[0-9]", x)) < 1 }, data)
wordcount1 = table(data)
wordcount1 = wordcount1[wordcount1>4]
wordcount2 = sort(wordcount1, decreasing = T)
head(wordcount2,30)
write.csv(head(wordcount2,30),file="c:/R/all.csv")
pal <- brewer.pal(9, "Pastel1")
wordcloud(names(wordcount2), freq = wordcount2, scale=c(7,1), rot.per=0.25, min.freq=1, random.order=F, random.color=T, colors=pal)
'빅데이터 > R' 카테고리의 다른 글
R Markdown 문법 (0) | 2017.01.09 |
---|---|
R정복 2일차 (0) | 2017.01.09 |
R정복 1일차 (0) | 2017.01.05 |