R科研作图学习小组

http://group.keyangou.com/RGraph
组长: 管理员:
  • 访问次数:27206
  • 小组等级:9
  • 话题:593
  • 回答:39
  • 签到:437
  • 小组排名:
  • R2-09 第2阶段第三次

    Giant Panda 发布于:2018.03.30

    library("DBI")

    library("RMySQL")

    killDbConnections <- function () {

      all_cons <- dbListConnections(MySQL())

      print(all_cons)

      for(con in all_cons)

        +  dbDisconnect(con)

      print(paste(length(all_cons), " connections killed."))

    }

    killDbConnections() #删除连接

    con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")

    dbSendQuery(con,'SET NAMES utf8') #创建新连接

    rs<- dbSendQuery(con, "SELECT * FROM article WHERE isdone=1")

    words = data.frame(word=c(), freq = c()) #创建词汇及频率列表

    while (!dbHasCompleted("rs")) {

      chunk <- dbFetch("rs", 10) #提取前10条数据

      chunk$abstract<-iconv(chunk$abstract,"WINDOWS-1252","UTF-8") #转换Abstract编码格式

      count=nrow(chunk) #取每行的字符

      cnt=1

      while(cnt<=count){

        str = gsub("[[:punct:]]", "", tolower(chunk[cnt,5])) #将字母转换成小写,且去掉所有标点符

        temp = as.vector(unlist(strsplit(str, split = " "))) #将字符串分割成字符串数组的list后再转变位向量

        temp_len = length(temp)

        cnt2 = 1

        while(cnt2 <= temp_len){

          if(temp[cnt2] %in% words$word){ #判断是否存在dataframe中

            words[words$word == temp[cnt2],]$freq = words[words$word == temp[cnt2],]$freq+1

          }else{

            words = rbind(words,data.frame(word=c(temp[cnt2]),freq=c(1)))

          }

          cnt2 = cnt2+1

        }

        cnt = cnt +1

      }

    }

    head("words")


     1.png


    install.packages("wordcloud2")

    install.packages("jsonlite")

    library(wordcloud2)

    wordcloud2(words[0:1000,]) 

    2.png


    new_words=words[order(words$freq,decreasing=T),]
    del_word=c('of','the','and','in','to','a','that','is','for','buy','with',
                 'we','are','an','this','these','as','from','which','at','their',
                 'have','or','our','its','but','how','be','as','here','on','can',
                 'into','data','between','both','also','by','yet','than','well',
                 'it','not')
    words2=words[which(!words$word %in% del_word),]
    wordcloud2(words2,shape='triangle',size=0.5)

    444.png

     
    1条评论 154浏览 邀请回答

    德先森 回答于:2018年04月08日 20:11:291楼

    北京 中国农业科学院 生物化学硕士 

    图好看啊,还有小玩意


    小组告示

    科研狗 2012-2016 京ICP备16006621 科研好助手,专业的科研社交共享平台