R科研作图学习小组

http://group.keyangou.com/RGraph
组长: 管理员:
  • 访问次数:19806
  • 小组等级:9
  • 话题:593
  • 回答:39
  • 签到:393
  • 小组排名:
  • R2-35 第二次

    德先森 发布于:2018.02.13

    第一题

    安装的黄色变不绿的为问题,主要是80的端口占用的问题,具体建议看

    http://blog.csdn.net/it429/article/details/51143836

    install.packages("RMySQL")

    library(RMySQL)

    help(package="RMySQL")

    con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")

    dbSendQuery(con,'SET NAMES utf8')

    killDbConnections <- function () {

      all_cons <- dbListConnections(MySQL())

      print(all_cons)

      for(con in all_cons)

        +  dbDisconnect(con)

      print(paste(length(all_cons), " connections killed."))

    }

    dbGetInfo(con) 

    dbListTables(con) 

    dbRemoveTable(con,"test")

    > dbGetInfo(con) 

    $host

    [1] "localhost"

    $user

    [1] "root"

    $dbname

    [1] "rdb"

    $conType

    [1] "localhost via TCP/IP"

    $serverVersion

    [1] "5.7.19"

    $protocolVersion

    [1] 10

    $threadId

    [1] 45

    $rsId

    $rsId[[1]]

    <MySQLResult:0,0,0>

    >

    > dbListTables(con) 

    [1] "article" "word"  

    >

    > dbRemoveTable(con,"test")

    [1] FALSE

    第二题

    killDbConnections()

    install.packages("httr")

    library(DBI)

    library(RMySQL)

    con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")

    dbSendQuery(con,'SET NAMES utf8')

    library(httr)

    totalNum=562

    pageSize=10

    totalPage=ceiling(totalNum/pageSize)

    currentPage=1

    term='(cell[TA]) AND 2017[DP]'

    usehistory='Y'

    querykey=''

    webenv=''

    postSearchUrl='https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'

    while(currentPage<=totalPage){

      retstart=(currentPage-1)*pageSize

      r <- POST(postSearchUrl,

           

                body = list(

                  db='pubmed',

                  term=term,

                  retmode='json',

                  retstart=retstart,

                  retmax=pageSize,

                  usehistory=usehistory,

                  rettype='uilist'

                )

               

      )

      stop_for_status(r)

      data=content(r, "parsed", "application/json")

      esearchresult=data$esearchresult

      querykey=esearchresult$querykey

      webenv=esearchresult$webenv

      idlist =esearchresult$idlist

      n = length(idlist)

      pmid=c()

      i = 1

      while(i<=n){

        pmid=c(pmid, as.character(idlist[i][1]))

        i = i+1

      }

      article=data.frame('pmid'=pmid)

      dbWriteTable(con,"article",article,append=TRUE)

      currentPage = currentPage + 1

    }

    dbDisconnect(con)

    killDbConnections()

    install.packages("httr")

    library(DBI)

    library(RMySQL)

    con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")

    dbSendQuery(con,'SET NAMES utf8')

    library(httr)

    totalNum=562

    pageSize=10

    totalPage=ceiling(totalNum/pageSize)

    currentPage=1

    term='(cell[TA]) AND 2017[DP]'

    usehistory='Y'

    querykey=''

    webenv=''

    postSearchUrl='https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'

    while(currentPage<=totalPage){

      retstart=(currentPage-1)*pageSize

      r <- POST(postSearchUrl,

           

                body = list(

                  db='pubmed',

                  term=term,

                  retmode='json',

                  retstart=retstart,

                  retmax=pageSize,

                  usehistory=usehistory,

                  rettype='uilist'

                )

               

      )

      stop_for_status(r)

      data=content(r, "parsed", "application/json")

      esearchresult=data$esearchresult

      querykey=esearchresult$querykey

      webenv=esearchresult$webenv

      idlist =esearchresult$idlist

      n = length(idlist)

      pmid=c()

      i = 1

      while(i<=n){

        pmid=c(pmid, as.character(idlist[i][1]))

        i = i+1

      }

      article=data.frame('pmid'=pmid)

      dbWriteTable(con,"article",article,append=TRUE)

      currentPage = currentPage + 1

    }

    dbDisconnect(con)

    第三题

    install.packages("xml2")
    library(RMySQL)
    library(xml2)
    library(httr)

    con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")
    dbSendQuery(con,'SET NAMES utf8')
    on.exit(dbDisconnect(con))

    rs <- dbSendQuery(con, "SELECT * FROM article WHERE isdone=0")
    while (!dbHasCompleted(rs)) {
      chunk <- dbFetch(rs, 10)
      #mode(chunk)
      #print(chunk)
      #chunk[x,3] 第3列为获取到的pmid
      pmidStr=""
      i=1
      n=nrow(chunk) #获得总行数,和上面设置的10一致,最后的时候是3
      while (i<=n){ #循环将各个pmid之间用逗号连接起来
        pmidStr = paste(pmidStr,chunk[i,3],sep=",")
        i = i + 1
      }
      #pmid=",29195067,29195066,29195065,29195064,29153837,29153836,29153835,29153834,29153833,29153832"
      #去掉第一个逗号,从第2位起,到100000位,上面字符串没有这么多字符,因此到末尾
      pmidStr=substr(pmidStr,2,100000)
      #上面字符串就是我们post到pubmed上面的字符串,用于获取title和abstract
     
      #下面就是第一次作业里面获取title和abstract
      postFetchUrl='https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
      r2 <- POST(postFetchUrl,
                 body = list(
                   db='pubmed',
                   id=pmidStr,
                   retmode='xml'
                 )
      )
      stop_for_status(r2) #clear http status
      data2=content(r2, "parsed", "application/xml")
      article=xml_children(data2)
      #xml_length(article)为里面文章的数量
      count=length(article)
      cnt=1
      while(cnt<=count){
        #下面的xml_text和xml_find_first均为XML2包里面的函数
        title=xml_text(xml_find_first(article[cnt],".//ArticleTitle")) #找到第一个ArticleTitle节点
        abstract=xml_text(xml_find_first(article[cnt],".//AbstractText"))
        pmid=xml_text(xml_find_first(article[cnt],".//PMID"))
        title = gsub("'","",title)
        abstract = gsub("'","",abstract)
        sql=paste("UPDATE article SET title='",title,"',abstract='",abstract,"',isdone=1"," where pmid='",pmid,"'",sep="")
        con2 <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")
        dbSendQuery(con2,'SET NAMES utf8')
        dbSendQuery(con2,sql)
        dbDisconnect(con2)
        cnt = cnt + 1
        Sys.sleep(1)
      }
    }

    在row6 开始报错,还在想办法中,先提交这些结果吧

    1.PNG

     
    0条评论 146浏览 邀请回答
    沙发空缺中~

    小组告示

    科研狗 2012-2016 京ICP备16006621 科研好助手,专业的科研社交共享平台