R科研作图学习小组

http://group.keyangou.com/RGraph
组长: 管理员:
  • 访问次数:15626
  • 小组等级:8
  • 话题:567
  • 回答:34
  • 签到:357
  • 小组排名:
  • R2-39 第二期,第二次作业

    redpanda 发布于:2018.02.14


    Task1:

    install.packages("RMySQL")

    install.packages("DBI")

    library(RMySQL)

    con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")

    dbSendQuery(con,'SET NAMES utf8’)

    dbGetInfo(con)  

    Task2:

    > library(httr)

    > totalNum=563 #上一次得到的总数

    > pageSize=10 #每页数目,获取摘要的时候设置数目过大容易引起网络阻塞

    > totalPage=ceiling(totalNum/pageSize) #按题目要求得到总页数

    > currentPage=1 #当前页数

    > term='(cell[TA]) AND 2017[DP]'

    > usehistory='Y'#是否使用历史搜索

    > querykey=''

    > webenv=''

    > postSearchUrl='https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'

    > while(currentPage<=totalPage){

    +   retstart=(currentPage-1)*pageSize

    +   r <- POST(postSearchUrl, 

    +             body = list(db='pubmed',

    +                         term=term,

    +                         retmode='json',

    +                         retstart=retstart,

    +                         retmax=pageSize,

    +                         usehistory=usehistory,

    +                         rettype='uilist' 

    +             )

    +   )

    +   stop_for_status(r) #clear http status

    +   data=content(r, "parsed", "application/json")

    +   esearchresult=data$esearchresult

    +   querykey=esearchresult$querykey

    +   webenv=esearchresult$webenv

    +   idlist =esearchresult$idlist #idlist为搜索结果中pmid的合集,代码用于拼接出Rmysql需要的数据

    +   n = length(idlist)

    +   pmid=c()

    +   i = 1

    +   while(i<=n){

    +     pmid=c(pmid, as.character(idlist[i][1]))

    +     i = i+1

    +   }

    +   article=data.frame('pmid'=pmid)#写入article数据表内,不能加append=TRUE

    +   dbWriteTable(con,"article",article,append=TRUE) 

    +   currentPage = currentPage + 1 #当currentPage>totalPage,退出循环

    + }

    > dbDisconnect(con)

    [1] TRUE

    Task3:

    > library(RMySQL)

    > library(xml2)

    > library(httr)

    > killDbConnections()

    list()

    [1] "0  connections killed."

    > con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")

    > dbSendQuery(con,'SET NAMES utf8')

    <MySQLResult:62718216,1,0>

    > on.exit(dbDisconnect(con))

    > rs <- dbSendQuery(con, "SELECT * FROM article WHERE isdone=0")

    > while (!dbHasCompleted(rs)) {

    +     chunk <- dbFetch(rs, 10)

    + pmidStr=""

    + i=1

    + n=nrow(chunk)

    + while (i<=n){

    +     pmidStr = paste(pmidStr,chunk[i,3],sep=",")

    +     i = i + 1

    + }

    + pmidStr=substr(pmidStr,2,100000)

    + postFetchUrl='https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'

    + r2 <- POST(postFetchUrl, 

    +            body = list(

    +                db='pubmed',

    +                id=pmidStr,

    +                retmode='xml'

    +            )

    + )

    + stop_for_status(r2) #clear http status

    + data2=content(r2, "parsed", "application/xml")

    + article=xml_children(data2)

    + count=length(article)

    + cnt=1

    + while(cnt<=count){

    +     title=xml_text(xml_find_first(article[cnt],".//ArticleTitle"))

    +     abstract=xml_text(xml_find_first(article[cnt],".//AbstractText"))

    +     pmid=xml_text(xml_find_first(article[cnt],".//PMID"))

    + title = gsub("'","",title)

    + abstract = gsub("'","",abstract)

    + sql=paste("UPDATE article SET title='",title,"',abstract='",abstract,"',isdone=1"," where pmid='",pmid,"'",sep="")

    + con2 <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")

    + dbSendQuery(con2,'SET NAMES utf8')

    + dbSendQuery(con2,sql)

    + dbDisconnect(con2)

    + cnt = cnt + 1

    + Sys.sleep(1)

    + }

    + }


     

     
  • Snip20180213_1.png(大小:29.7 KB;下载:0)
  • Snip20180213_2.png(大小:66.9 KB;下载:0)
  • Snip20180213_3.png(大小:93.6 KB;下载:0)
  • 0条评论 31浏览 邀请回答
    沙发空缺中~

    小组告示

    科研狗 2012-2016 京ICP备16006621 科研好助手,专业的科研社交共享平台