R科研作图学习小组

http://group.keyangou.com/RGraph
组长: 管理员:
  • 访问次数:22641
  • 小组等级:9
  • 话题:593
  • 回答:39
  • 签到:408
  • 小组排名:
  • R2-30第二阶段第三次作业

    土貉 发布于:2018.03.27

    一、

    library(RMySQL)

    killDbConnections()

    con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")

    dbSendQuery(con,'SET NAMES utf8') 

    rs <- dbSendQuery(con, "SELECT * FROM article WHERE isdone=1")

     

    words = data.frame(word=c(), freq = c())

    while (!dbHasCompleted(rs)) {

        chunk <- dbFetch(rs, 10)

        chunk$abstract<-iconv(chunk$abstract,"WINDOWS-1252","UTF-8")

        count=nrow(chunk)

        cnt=1

        while(cnt<=count){

            str = gsub("[[:punct:]]", "", tolower(chunk[cnt,5]))

            temp = as.vector(unlist(strsplit(str, split = " ")))

            temp_len = length(temp)

            cnt2 = 1

            while(cnt2 <= temp_len){

                if(temp[cnt2] %in% words$word){

                    words[words$word == temp[cnt2],]$freq = words[words$word == temp[cnt2],]$freq+1

                }

                else{

                    words = rbind(words,data.frame(word=c(temp[cnt2]),freq=c(1)))

                }

                cnt2 = cnt2+1

            }

            cnt = cnt +1

        }

    }

    head(words)

    01.png

    二、

    install.packages("wordcloud2")

    library(wordcloud2)
    wordcloud2(words[0:1000,])

    02.png


    三、

    new_words = words[order(words$freq,decreasing=T),]
    head(new_words)

    03.png


    del_word = c('of','the','and','in','to','a','that','is','for','buy','with','we','are','an','this','these','as','from','which','at','their','have','or','our','its','but','how','be','as','here','on','can','into','data','between','both','also','by')
    words2<-words[which(!(words$word %in% del_word)),]
    words2 = words2[order(words2$freq,decreasing=T),]
    head(words2)

    04.png

    wordcloud2(words2)

    05.png

     
    0条评论 90浏览 邀请回答
    沙发空缺中~

    小组告示

    科研狗 2012-2016 京ICP备16006621 科研好助手,专业的科研社交共享平台