R科研作图学习小组

http://group.keyangou.com/RGraph
组长: 管理员:
  • 访问次数:28546
  • 小组等级:9
  • 话题:593
  • 回答:39
  • 签到:439
  • 小组排名:
  • R2-30-第二阶段第二次作业

    土貉 发布于:2018.02.08

    一、

    library(DBI)

    library(RMySQL)

    con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")

    dbSendQuery(con,'SET NAMES utf8')

    dbGetInfo(con)  

    dbListTables(con)  

    dbRemoveTable(con,"test")

     test<-data.frame(a=seq(1:10),b=letters[1:10],c=rnorm(10))

    test

    dbWriteTable(con,"test",test,overwrite=TRUE)

    01.png

    dbRemoveTable(con,"test")

     

    数据库连接删除函数

    killDbConnections <- function () {

      all_cons <- dbListConnections(MySQL())

      print(all_cons)

      for(con in all_cons)

        +  dbDisconnect(con)

      print(paste(length(all_cons), " connections killed."))

    }


    二、

    killDbConnections()

    con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")

    dbSendQuery(con,'SET NAMES utf8')

    library(httr)

    totalNum=563

    pageSize=10 

    totalPage=ceiling(totalNum/pageSize)

    currentPage=1

    term='(cell[TA]) AND 2017[DP]'

    usehistory='Y'

    querykey=''

    webenv=''

    postSearchUrl='https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'

    while(currentPage<=totalPage){

    retstart=(currentPage-1)*pageSize

    r <- POST(postSearchUrl,

                body = list(

                db='pubmed',

                term=term,

                retmode='json',

                retstart=retstart,

                retmax=pageSize,

                usehistory=usehistory,

                rettype='uilist'

                )

    )

    stop_for_status(r)

    data=content(r, "parsed", "application/json")

    esearchresult=data$esearchresult

    querykey=esearchresult$querykey

    webenv=esearchresult$webenv

    idlist =esearchresult$idlist

    n = length(idlist)

    pmid=c()

    i = 1

    while(i<=n){

    pmid=c(pmid, as.character(idlist[i][1]))

        i = i+1

        }

    article=data.frame('pmid'=pmid)

    dbWriteTable(con,"article",article,append=TRUE)

    currentPage = currentPage + 1

    }

    dbDisconnect(con)

    02.png



    三、

    library(RMySQL)

    library(xml2)

    library(httr)

    killDbConnections()

    con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")

    dbSendQuery(con,'SET NAMES utf8')

    on.exit(dbDisconnect(con))

    rs <- dbSendQuery(con, "SELECT * FROM article WHERE isdone=0")

    while (!dbHasCompleted(rs)) {

      chunk <- dbFetch(rs, 10)

    mode(chunk)

      print(chunk)

    pmidStr=""

      i=1

      n=nrow(chunk)

    while (i<=n){

    pmidStr = paste(pmidStr,chunk[i,3],sep=",")

        i = i + 1

      }

    pmidStr=substr(pmidStr,2,100000)

    postFetchUrl='https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'

      r2 <- POST(postFetchUrl,

                 body = list(

                   db='pubmed',

                   id=pmidStr,

                   retmode='xml'

                 )

      )

      stop_for_status(r2)

    data2=content(r2, "parsed", "application/xml")

      article=xml_children(data2)

    count=length(article)

      cnt=1

      while(cnt<=count){

    title=xml_text(xml_find_first(article[cnt],".//ArticleTitle"))

    abstract=xml_text(xml_find_first(article[cnt],".//AbstractText"))

     pmid=xml_text(xml_find_first(article[cnt],".//PMID"))

    title = gsub("'","",title)

     abstract = gsub("'","",abstract)

    sql=paste("UPDATE article SET title='",title,"',abstract='",abstract,"',isdone=1"," where pmid='",pmid,"'",sep="")

    con2 <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")

        dbSendQuery(con2,'SET NAMES utf8')

        dbSendQuery(con2,sql)

        dbDisconnect(con2)

    cnt = cnt + 1

    Sys.sleep(1)

    }

    }

    03.png

     
    0条评论 178浏览 邀请回答
    沙发空缺中~

    小组告示

    科研狗 2012-2016 京ICP备16006621 科研好助手,专业的科研社交共享平台