티스토리 뷰

빅데이터/R

NLP 이용한 주요 워드카운트

Mac's analystics 2017. 2. 1. 11:06

install.packages("KoNLP")

install.packages("wordcloud")

install.packages("RcolorBrewer")




library(KoNLP)

library(wordcloud)

library(RColorBrewer)



src_dir <- c("C:/Users/User/Downloads/용인고자체평가/수지구/txt")

src_file <- list.files(src_dir) # list인

src_file_cnt <- length(src_file)


for(i in 1:src_file_cnt) {

  

  # write.table one by one automatiically, using loop program

  dataset <- read.table(

    paste(src_dir, "/", src_file[i], sep=""), 

    sep=",", header=F, stringsAsFactors = F , fill = TRUE)

  

  # dataset exporting with 'APPEND = TREU' option, filename = dataset_all.txt

  write.table(dataset, 

              paste(src_dir, "/", "dataset_all.txt", sep=""), 

              sep = ",", 

              row.names = FALSE, 

              col.names = FALSE, 

              quote = FALSE, 

              append = TRUE) # appending dataset (stacking)

  

  # delete seperate datasets

  rm(dataset)

  

  # printing loop sequence at console to check loop status

  print(i)

}







f = file("C:/Users/User/Downloads/용인고자체평가/기흥구/txt/dataset_all.txt", blocking=F )

txtLines = readLines(f)


txtLines =  cbind(txtLines , readLines(f) )



nouns = sapply(txtLines , extractNoun , USE.NAMES = F)


undata=unlist(nouns)


data= Filter(function(x){nchar(x)>=2}, undata)

data= Filter(function(x){ length(grep("&", x)) < 1 }, data)

data= Filter(function(x){ length(grep("[0-9]", x)) < 1 }, data)


wordcount1 = table(data)

wordcount1 = wordcount1[wordcount1>4]


wordcount2 = sort(wordcount1, decreasing = T)

head(wordcount2,30)

write.csv(head(wordcount2,30),file="c:/R/ki.csv")




#wordcount1


#pal <- brewer.pal(9, "Pastel1")


#wordcloud(names(wordcount2), freq = wordcount1, scale=c(7,1), rot.per=0.25, min.freq=1, random.order=F, random.color=T, colors=pal)




f = file("C:/Users/User/Downloads/용인고자체평가/수지구/txt/dataset_all.txt", blocking=F )

txtLines = readLines(f)


txtLines =  cbind(txtLines , readLines(f) )



nouns = sapply(txtLines , extractNoun , USE.NAMES = F)


undata=unlist(nouns)


data= Filter(function(x){nchar(x)>=2}, undata)

data= Filter(function(x){ length(grep("&", x)) < 1 }, data)

data= Filter(function(x){ length(grep("[0-9]", x)) < 1 }, data)


wordcount1 = table(data)

wordcount1 = wordcount1[wordcount1>4]


wordcount2 = sort(wordcount1, decreasing = T)

head(wordcount2,30)

write.csv(head(wordcount2,30),file="c:/R/su.csv")



#wordcount1


#pal <- brewer.pal(9, "Pastel1")


#wordcloud(names(wordcount2), freq = wordcount1, scale=c(7,1), rot.per=0.25, min.freq=1, random.order=F, random.color=T, colors=pal)





f = file("C:/Users/User/Downloads/용인고자체평가/처인구/txt/dataset_all.txt", blocking=F )

txtLines = readLines(f)


txtLines =  cbind(txtLines , readLines(f) )



nouns = sapply(txtLines , extractNoun , USE.NAMES = F)


undata=unlist(nouns)


data= Filter(function(x){nchar(x)>=2}, undata)

data= Filter(function(x){ length(grep("&", x)) < 1 }, data)

data= Filter(function(x){ length(grep("[0-9]", x)) < 1 }, data)


wordcount1 = table(data)

wordcount1 = wordcount1[wordcount1>4]


wordcount2 = sort(wordcount1, decreasing = T)

head(wordcount2,30)

write.csv(head(wordcount2,30),file="c:/R/chu.csv")



#wordcount1


#pal <- brewer.pal(9, "Pastel1")


#wordcloud(names(wordcount2), freq = wordcount1, scale=c(7,1), rot.per=0.25, min.freq=1, random.order=F, random.color=T, colors=pal)







f = file("C:/R/all.txt", blocking=F )

txtLines = readLines(f)


txtLines =  cbind(txtLines , readLines(f) )



nouns = sapply(txtLines , extractNoun , USE.NAMES = F)


undata=unlist(nouns)


data= Filter(function(x){nchar(x)>=2}, undata)

data= Filter(function(x){ length(grep("&", x)) < 1 }, data)

data= Filter(function(x){ length(grep("[0-9]", x)) < 1 }, data)


wordcount1 = table(data)

wordcount1 = wordcount1[wordcount1>4]


wordcount2 = sort(wordcount1, decreasing = T)

head(wordcount2,30)

write.csv(head(wordcount2,30),file="c:/R/all.csv")


pal <- brewer.pal(9, "Pastel1")


wordcloud(names(wordcount2), freq = wordcount2, scale=c(7,1), rot.per=0.25, min.freq=1, random.order=F, random.color=T, colors=pal)



'빅데이터 > R' 카테고리의 다른 글

R Markdown 문법  (0) 2017.01.09
R정복 2일차  (0) 2017.01.09
R정복 1일차  (0) 2017.01.05
댓글
공지사항
최근에 올라온 글
최근에 달린 댓글
Total
Today
Yesterday
TAG
more
«   2024/03   »
1 2
3 4 5 6 7 8 9
10 11 12 13 14 15 16
17 18 19 20 21 22 23
24 25 26 27 28 29 30
31
글 보관함