R科研作图学习小组

http://group.keyangou.com/RGraph
组长: 管理员:
  • 访问次数:24156
  • 小组等级:9
  • 话题:593
  • 回答:39
  • 签到:419
  • 小组排名:
  • R2-09-第二期-2 R读取pubmed存入mysql数据库

    Giant Panda 发布于:2018.02.07

    任务一:

    111.png


    2222.png


    任务二:获得并存储cell杂志2017年发表文章的pubmedid

    killDbConnections()
    library("httr")
    
    totalNum=562
    pageSize=10 
    totalPage=ceiling(totalNum/pageSize)
    currentPage=1 
    term="(cell[TA]) AND 2017[DP]"
    
    usehistory='Y'
    querykey=" "
    webenv=" "
    
    postSearchUrl="https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
    while(currentPage<=totalPage){
      retstart=(currentPage-1)*pageSize
      r <- POST(postSearchUrl, 
                body = list(
                  db="pubmed",
                  term=term,
                  retmode="json",
                  retstart=retstart,
                  retmax=pageSize,
                  usehistory=usehistory,
                  rettype="uilist"
                )
      )
      
      stop_for_status(r) #clear http status
      data=content(r, "parsed", "application/json")
     
      esearchresult=data$esearchresult
      #$idlist=array $count=562,$retmax=20, $retstart=0,$querykey=1, $webenv=NCID_1_30290513_130.14.18.34_9001_1515165012_617859421_0MetA0_S_MegaStore_F_1
      
      querykey=esearchresult$querykey
      webenv=esearchresult$webenv
      idlist =esearchresult$idlist
      n = length(idlist)
      pmid=c()
      i = 1
      while(i<=n){
        pmid=c(pmid, as.character(idlist[i][1]))
        i = i+1
      }
      article=data.frame("pmid"=pmid)
      dbWriteTable(con,"article",article,append=TRUE) 
      currentPage = currentPage + 1
    }
    #close
    dbDisconnect(con)


    任务三:获得并存储cell杂志2017年发表文章的title和abstract

    library("RMySQL")
    library("xml2")
    library("httr")
    killDbConnections()  
    con <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")
    dbSendQuery(con,'SET NAMES utf8')
    on.exit(dbDisconnect(con))
    rs <- dbSendQuery(con, "SELECT * FROM article WHERE isdone=0")
    while (!dbHasCompleted(rs)) {
      chunk <- dbFetch(rs, 10)
      #mode(chunk)
      #print(chunk)
      #chunk[x,3] 
      pmidStr=""
      i=1
      n=nrow(chunk) 
      while (i<=n){
        pmidStr = paste(pmidStr,chunk[i,3],sep=",")
        i = i + 1
      }
      postFetchUrl="https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
      r2 <- POST(postFetchUrl, 
                 body = list(
                   db="pubmed",
                   id=pmidStr,
                   retmode="xml"
                 )
      )
      stop_for_status(r2) #clear http status
      data2=content(r2, "parsed", "application/xml")
      article=xml_children(data2)
      #xml_length(article)
      count=length(article)
      cnt=1
      while(cnt<=count){
        title=xml_text(xml_find_first(article[cnt],".//ArticleTitle")) 
        abstract=xml_text(xml_find_first(article[cnt],".//AbstractText"))
        pmid=xml_text(xml_find_first(article[cnt],".//PMID"))
    
        title = gsub("'","",title)
        abstract = gsub("'","",abstract)
        
        sql=paste("UPDATE article SET title='",title,"',abstract='",abstract,"',isdone=1"," where pmid='",pmid,"'",sep="")
    
        con2 <- dbConnect(MySQL(),host="localhost",dbname="rdb",user="root",password="")
        dbSendQuery(con2,'SET NAMES utf8')
        dbSendQuery(con2,sql)
        dbDisconnect(con2)
        cnt = cnt + 1

    7777.png

     
    0条评论 159浏览 邀请回答
    沙发空缺中~

    小组告示

    科研狗 2012-2016 京ICP备16006621 科研好助手,专业的科研社交共享平台