NLP 이용한 주요 워드카운트

티스토리 뷰

빅데이터/R

NLP 이용한 주요 워드카운트

Mac's analystics 2017. 2. 1. 11:06

install.packages("KoNLP")

install.packages("wordcloud")

install.packages("RcolorBrewer")

library(KoNLP)

library(wordcloud)

library(RColorBrewer)

src_dir <- c("C:/Users/User/Downloads/용인고자체평가/수지구/txt")

src_file <- list.files(src_dir) # list인

src_file_cnt <- length(src_file)

for(i in 1:src_file_cnt) {

# write.table one by one automatiically, using loop program

dataset <- read.table(

paste(src_dir, "/", src_file[i], sep=""),

sep=",", header=F, stringsAsFactors = F , fill = TRUE)

# dataset exporting with 'APPEND = TREU' option, filename = dataset_all.txt

write.table(dataset,

paste(src_dir, "/", "dataset_all.txt", sep=""),

sep = ",",

row.names = FALSE,

col.names = FALSE,

quote = FALSE,

append = TRUE) # appending dataset (stacking)

# delete seperate datasets

rm(dataset)

# printing loop sequence at console to check loop status

print(i)

}

f = file("C:/Users/User/Downloads/용인고자체평가/기흥구/txt/dataset_all.txt", blocking=F )

txtLines = readLines(f)

txtLines = cbind(txtLines , readLines(f) )

nouns = sapply(txtLines , extractNoun , USE.NAMES = F)

undata=unlist(nouns)

data= Filter(function(x){nchar(x)>=2}, undata)

data= Filter(function(x){ length(grep("&", x)) < 1 }, data)

data= Filter(function(x){ length(grep("[0-9]", x)) < 1 }, data)

wordcount1 = table(data)

wordcount1 = wordcount1[wordcount1>4]

wordcount2 = sort(wordcount1, decreasing = T)

head(wordcount2,30)

write.csv(head(wordcount2,30),file="c:/R/ki.csv")

#wordcount1

#pal <- brewer.pal(9, "Pastel1")

#wordcloud(names(wordcount2), freq = wordcount1, scale=c(7,1), rot.per=0.25, min.freq=1, random.order=F, random.color=T, colors=pal)

f = file("C:/Users/User/Downloads/용인고자체평가/수지구/txt/dataset_all.txt", blocking=F )

txtLines = readLines(f)

txtLines = cbind(txtLines , readLines(f) )

nouns = sapply(txtLines , extractNoun , USE.NAMES = F)

undata=unlist(nouns)

data= Filter(function(x){nchar(x)>=2}, undata)

data= Filter(function(x){ length(grep("&", x)) < 1 }, data)

data= Filter(function(x){ length(grep("[0-9]", x)) < 1 }, data)

wordcount1 = table(data)

wordcount1 = wordcount1[wordcount1>4]

wordcount2 = sort(wordcount1, decreasing = T)

head(wordcount2,30)

write.csv(head(wordcount2,30),file="c:/R/su.csv")

#wordcount1

#pal <- brewer.pal(9, "Pastel1")

#wordcloud(names(wordcount2), freq = wordcount1, scale=c(7,1), rot.per=0.25, min.freq=1, random.order=F, random.color=T, colors=pal)

f = file("C:/Users/User/Downloads/용인고자체평가/처인구/txt/dataset_all.txt", blocking=F )

txtLines = readLines(f)

txtLines = cbind(txtLines , readLines(f) )

nouns = sapply(txtLines , extractNoun , USE.NAMES = F)

undata=unlist(nouns)

data= Filter(function(x){nchar(x)>=2}, undata)

data= Filter(function(x){ length(grep("&", x)) < 1 }, data)

data= Filter(function(x){ length(grep("[0-9]", x)) < 1 }, data)

wordcount1 = table(data)

wordcount1 = wordcount1[wordcount1>4]

wordcount2 = sort(wordcount1, decreasing = T)

head(wordcount2,30)

write.csv(head(wordcount2,30),file="c:/R/chu.csv")

#wordcount1

#pal <- brewer.pal(9, "Pastel1")

#wordcloud(names(wordcount2), freq = wordcount1, scale=c(7,1), rot.per=0.25, min.freq=1, random.order=F, random.color=T, colors=pal)

f = file("C:/R/all.txt", blocking=F )

txtLines = readLines(f)

txtLines = cbind(txtLines , readLines(f) )

nouns = sapply(txtLines , extractNoun , USE.NAMES = F)

undata=unlist(nouns)

data= Filter(function(x){nchar(x)>=2}, undata)

data= Filter(function(x){ length(grep("&", x)) < 1 }, data)

data= Filter(function(x){ length(grep("[0-9]", x)) < 1 }, data)

wordcount1 = table(data)

wordcount1 = wordcount1[wordcount1>4]

wordcount2 = sort(wordcount1, decreasing = T)

head(wordcount2,30)

write.csv(head(wordcount2,30),file="c:/R/all.csv")

pal <- brewer.pal(9, "Pastel1")

wordcloud(names(wordcount2), freq = wordcount2, scale=c(7,1), rot.per=0.25, min.freq=1, random.order=F, random.color=T, colors=pal)

저작자표시 비영리 동일조건

'빅데이터 > R' 카테고리의 다른 글

R Markdown 문법 (0)	2017.01.09
R정복 2일차 (0)	2017.01.09
R정복 1일차 (0)	2017.01.05

공유하기 링크

페이스북
카카오스토리
트위터

공지사항

최근에 올라온 글

최근에 달린 댓글

Total

Today

Yesterday

링크

White Life Story :: White Life…

TAG more

« 2025/03 »
일	월	화	수	목	금	토
						1
2	3	4	5	6	7	8
9	10	11	12	13	14	15
16	17	18	19	20	21	22
23	24	25	26	27	28	29
30	31

글 보관함

Mac's Developement

티스토리 뷰

NLP 이용한 주요 워드카운트

'빅데이터 > R' 카테고리의 다른 글

티스토리툴바