Posts Tagged ‘R’

Human Disease Network

Published by chengjun on September 5th, 2011

This is the result, the codes is attached below.

# disease co-occurrence network
# http://barabasilab.neu.edu/projects/hudine/resource/data/data.html
# chengjun @ common room 2011/9/4
 
#~~~~~~~~~~~load data~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
dis<-read.table("D:/NKS & SFI/AllNet3.txt", header = FALSE, sep = "",  dec = ".")
 
#~~~~~~~~~~~Data Description and rename~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# 1  ICD-9 code disease 1
# 2  ICD-9 code disease 2
# 3  Prevalence disease 1
# 4  Prevalence disease 2
# 5  Co-ocurrence between diseases 1 and 2
# 6  Relative Risk
# 7  Relative Risk 99% Conf. Interval (left)
# 8  Relative Risk 99% Conf. Interval (right)
# 9  Phi-correlation
# 10 t-test value
 
names(dis)<-c("dis1","dis2","prevalence_dis1","prevalence_dis2","co_ocurrence","risk",
     "riskleft","riskright","phi","t" ) 
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~distribution~~~~~~~~~~~~~~~~~~~~~~~~#
dis1<-as.data.frame(table(dis[,1]))
names(dis1)<-c("disease","Numbers of combinations of coocurrence with other diseases")
plot(dis1) # this is the number of combinations of coocurrences with other diseases
plot(as.data.frame(table(dis1[,2])))  # distribution of combinations
 
popp<-as.data.frame(dis1)
 
plot(popp[,1],popp[,2],xlab="In-degree",ylab="Frequency",type = "p", col = "black", lwd=2,main = "")
 
powerfit<-lm(log(popp[,2])~log(as.numeric(levels(popp[,1])[popp[,1])))
summary(powerfit)
 
plot(log(as.numeric(levels(popp[,1])[popp[,1]]),log(popp[,2]),
   xlab="In-degree",ylab="Frequency",type = "p", col = "black", lwd=2,main = "")
 
abline(powerfit, col = "grey",lwd=3)
 
#~~~~~~~compute the degree distribution using igraph~~~~~~~~~~~~~~~~~~~~#
library(igraph)# install.packages("igraph")
jj<-graph.data.frame(dis[,1:2], directed=FALSE, vertices=NULL)
class(jj)
dd <- degree(jj, mode="in")
ddd <- degree.distribution(jj, mode="in", cumulative=TRUE)
alpha <- power.law.fit(dd, xmin=20)
alpha
plot(ddd, log="xy", xlab="degree", ylab="cumulative frequency",
     col=1, main="Nonlinear preferential attachment")
lines(10:500, 10*(10:500)^(-coef(alpha)+1))
 
# save data
setwd("D:/NKS & SFI/")
savePlot(filename = "Nonlinear preferential attachment",
         type = c( "png"),
         device = dev.cur(),
         restoreConsole = TRUE)
#####################################################
#
#                               plot the disease graph
#
######################################################
#~~~~~~~~~~~~~~~~subset data~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
backbone<-subset(dis, dis[5]>=100000)
dim(backbone)
 
library(igraph)# install.packages("igraph")
g<-graph.data.frame(backbone[,1:2], directed=FALSE, vertices=NULL)
summary(g)
#------the size of nodes denotes the prevalence of events~~~~~~~~~~~~~~~~~~~~~~~~~~~#
 
prevalence<-data.frame(rbind(cbind(backbone[,1], backbone[,3]), cbind(backbone[,2], backbone[,4])) )
prevalencen<-unique(prevalence, fromLast = F)  ## extract unique elements
 
V(g)$size <- log(prevalencen[,2])-5
 
#------the color of nodes denotes the popularity of events~~~~~~~~~~~~~~~~~~~~------#
V(g)$color <- rainbow(20)[log(prevalencen[,2]+5)]
# V(g)$color <-heat.colors(20, alpha = 1)[log(prevalencen[,2])]
#-------the width of links denots the volume of user traffic on the link------#
E(g)$weight <- log(backbone[,5])/5
 
plot(g, vertex.label= NA,edge.arrow.size=0.2,layout=layout.fruchterman.reingold,edge.width=E(g)$weight+1 )
 
setwd("D:/NKS & SFI/")
savePlot(filename = "Disease Coocurrence plot 100000 prevalence_97nodes_674",
         type = c( "png"),
         device = dev.cur(),
         restoreConsole = TRUE)

My evaluation of twitteR Package

Published by chengjun on July 24th, 2011
# @author Chengjun WANG
# @date July 22, 2011
#~~~~~~~~~~~~~~~~~~Mining twitter with R~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# http://jeffreybreen.wordpress.com/2011/07/21/one-liners-twitter/
library(twitteR)
sessionInfo() # See the information of packages in use.
# update.packages() # press enter to skip, and press 'y' to choose twitteR
# Only twitteR_0.99.9 can run it.
#~~~~~~~~~~~~~~~~~~~~~~~~~~twitter search~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
tweets = searchTwitter("#rstats", n=1500)
# The n=1500 specifies the maximum number of tweets supported by the Search API
# head(tweets) # return first 6 tweets searched
# class(tweets[[1]])
length(tweets)
class(tweets)
tweet=tweets[[1]]
name<-tweet$getScreenName()
name$getLocation()
tweet$getText()
#~~~~~~~~~~~~~~~~~~~~~~~~~~~use plyr~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
library(plyr)
tweets.df = ldply(tweets, function(t) t$toDataFrame())
# ldply: plit list, apply function, and return results in a data frame.
# str(tweets.df)
tweets.text = laply(tweets, function(t) t$getText())
tweets.name1 = laply(tweets, function(t) t$getScreenName())
# compare ldply with laply
tweets.name2 = ldply(tweets, function(t) t$getScreenName())
head(tweets.name1, 2)
head(tweets.name2, 10)
#~~~~~~~~~~~~~~~~~~~~~~~~~publicTimeline~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
publicTweets <- publicTimeline()
length(publicTweets)
publicTweets[1:5]
publicTweets[[1]]$getScreenName()   # isS4(publicTweets[[1]])
#~~~~~~~~~~~~~~~~~~~~~~control the period, place~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
searchTwitter('charlie sheen', since='2011-03-01', until='2011-07-12', n=100)
searchTwitter('charlie sheen', since='2011-03-01',n=10)
searchTwitter('patriots', geocode='42.375,-71.1061111,10mi')
searchTwitter("#beer", n=100)
Rtweets(n=37)
#~~~~~~~~~~~~~~~~~~~~~~~~~Authentication with OAuth~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# ONLY R2.13 CAN INSTALL RJSONIO
library(ROAuth)
cred <- OAuthFactory$new(consumerKey ='1tEqlc1UzY7rzwtgrqbuCQ',
consumerSecret = 'mxHyBeb6qHIv8YvARlV4B0wPVJclnpCjUNWFA2XxBxw',
requestURL= 'http://api.twitter.com/oauth/request_token',
accessURL= 'http://api.twitter.com/oauth/access_token',
authURL= 'http://api.twitter.com/oauth/authorize')
# cred$handshake()
# The OAuth object, once the handshake is complete, can be saved and reused.
# You should not ever have to redo the handshake.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# This example is run, but likely not how you want to do things
us <- userFactory$new(screenName="test", name="Joe Smith")
us$getScreenName()
us$getName()
curTrends <- getTrends("current")
yesterdayTrends <- getTrends("daily", date = as.character(Sys.Date()-1))
lastWeekTrends <- getTrends("weekly", date = as.character(Sys.Date()-7))
#length(lastWeekTrends)
#~~~~~~~~~~~~~~~~get the information source of tweets~~~~~~~~~~~~~~~~~~~~~~~~~~~#
sources <- sapply(publicTweets, function(x) x$getStatusSource())
sources_1 <- gsub("", "", sources)
sources_2 <- strsplit(sources_1, ">")
sources_3 <- sapply(sources_2, function(x) ifelse(length(x) > 1,x[2], x[1]))
pie(table(sources_3))
df <- do.call("rbind", lapply(publicTweets, as.data.frame))
dim(df)
crantastic <- getUser("crantastic")
ChengjunWANG<-getUser("ChengjunWANG")
# a particular user's timeline
cranTweetsLarge <- userTimeline("cranatic", n = 100)
cjwTweets<- userTimeline("ChengjunWANG", n = 100)
# Error in .self$twFromJSON(out) : Error: Not authorized

Jeff Gentry has done quite good job with this package,  which make great progress in the edition 0.99.9, however, there seems to be a long way to go. The potential is great, and I will pay more attention to the future improvement of it.


You can find the full evaluation by the following link here.


0 visitors online now
0 guests, 0 bots, 0 members