Purpose: Explore how a rodents place in the network based on closeness centrality can be used to explain Hantavirus host status.
Load packages
library(forcats)
library(ggplot2)
library(fmsb)
Calculate quantiles for closeness centrality and assign them to each rodent in a new column.
closcen<- read.csv("ClosenessCentralityHanta.csv", header=TRUE) ## From Gephi
closcen<- subset(closcen, closnesscentrality>0) # get rid of rodents without connections
sorted<-closcen[order(closcen$closnesscentrality ),]
quan<-quantile(closcen$closnesscentrality)
genrod<- subset(closcen, closnesscentrality> quan[4])
midrod<- subset(closcen, closnesscentrality<= quan[4] & closnesscentrality> quan[3])
lowrod <- subset(closcen, closnesscentrality<= quan[3] & closnesscentrality> quan[2])
specrod<-subset(closcen, closnesscentrality<= quan[2] & closnesscentrality>= quan[1])
genrod$Quantile<- c(1,1,1,1,1,1,1,1,1,1,1,1)
midrod$Quantile<-c(2,2,2,2,2,2,2,2,2,2,2)
lowrod$Quantile <- c(3,3,3,3,3,3,3,3,3,3,3,3)
specrod$Quantile<- c(4,4,4,4,4,4,4,4,4,4,4,4,4,4)
grod <- genrod[,c("Id", "Quantile", "closnesscentrality")]
mrod<- midrod[,c("Id", "Quantile", "closnesscentrality")]
lrod<- lowrod[,c("Id", "Quantile", "closnesscentrality")]
srod<- specrod[,c("Id", "Quantile", "closnesscentrality")]
netrod<-rbind(grod, mrod, lrod, srod)
Plot the closeness centrality quantiles.
ggplot(netrod, aes(x = fct_reorder(Id, closnesscentrality, .desc=TRUE ), y = closnesscentrality, group=Quantile)) +
geom_line(aes(colour= Quantile)) +
geom_area(data=netrod[netrod$Quantile=="1",],mapping=aes(fill=1), alpha=.6, show.legend = F)+
geom_area(data=netrod[netrod$Quantile=="2",],mapping=aes(fill=2), alpha=.6, show.legend = F)+
geom_area(data=netrod[netrod$Quantile=="3",],mapping=aes(fill=3), alpha=.6, show.legend = F)+
geom_area(data=netrod[netrod$Quantile=="4",],mapping=aes(fill= 4), alpha=.6, show.legend = F)+
geom_point(aes(colour=Quantile), show.legend = FALSE)+
theme_minimal()+
theme(axis.text.x=element_text(angle=90, hjust=0.95,vjust=1, face="italic")) + xlab("Rodent") +ylab("Closeness Centrality")
Explore relationships between range size with closeness centrality, total parasites, and total relationships
cc<- read.csv("AllDataConsol.csv", header=TRUE) #file has rodent names, range size, total parasites, total relationships, and closeness centrality
par(mfrow=c(1,3))
closecen<-ggplot(cc, aes(x=Range, y=Closeness.Centrality))+
geom_point()+
stat_smooth(method="glm")
closecen
trela<-ggplot(cc, aes(x=Range, y=Total.Relationships))+
geom_point()+
stat_smooth(method="glm")
trela
tpara<-ggplot(cc, aes(x=Range, y= Total.Parasites))+
geom_point()+
stat_smooth(method="glm")
tpara
cclm<- glm( Range ~ Closeness.Centrality, data=cc)
p_cc<-coef(summary(cclm))[2,4]
tplm<- glm( Range ~Total.Parasites , data=cc)
p_tp<-coef(summary(tplm))[2,4]
trlm<- glm( Range ~ Total.Relationships, data=cc)
p_tr<- coef(summary(trlm))[2,4]
Range has a significant relationship with closeness centrality (p= 0.0065876), total relationships (p= 1.5202329^{-5}), and total parasites (p= 5.5458519^{-6}).
Compare total parasites and total relationships per rodent on the same plot
ratio<- median(cc$Total.Relationships)/ median(cc$Total.Parasites)
n<-4
ggplot(cc, aes(x=fct_reorder(Rodent, Total.Parasites)))+
geom_line(aes(y=Total.Parasites, group=F), color = 'black') +
geom_point(aes(y=Total.Relationships/n, group= F))+ theme_minimal() +
theme(axis.text.x=element_text(angle=90, hjust=0.95,vjust=1, face="italic") )+
xlab("Rodents") +
scale_y_continuous(
name = "Total Parasites",
sec.axis = sec_axis( trans=~.*4, name="Total Relationships")
)
On average, an ectoparasite connects a rodent to 4.2857143 other rodents. In the plot above, when the dot (total relationships) falls above the line (total parasites) for a rodent, it means that rodents parasites connect it to more rodents than the average. When it falls below the line, the parasites connect it to less rodents on average.
Next, use a binomial logistic regression to test how well closeness centrality predicts hantavirus hosts
cchanta<- read.csv("ClosenessCentralityHanta.csv", header=T) # file with CC and hantavirus status 0/1 for neg/pos
boxplot(closnesscentrality~Hanta, data=cchanta, xlab = "Hantavirus Status", ylab= "Closeness Centrality")
Based on the boxplot, it seems as though known Hantavirus hosts have a higher closeness centrality. Now we will use a binomial logistic regression model to test this.
log<-glm(X ~ closnesscentrality, family= 'binomial', data= cchanta ) #X is a column with 0 and 1 for negative and positive hantavirus status
R2<- NagelkerkeR2(log)
p_hanta<- coef(summary(log))[2,4]
summary(log)
##
## Call:
## glm(formula = X ~ closnesscentrality, family = "binomial", data = cchanta)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.14401 -0.11964 -0.00706 -0.00334 2.35134
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -33.27 13.73 -2.423 0.0154 *
## closnesscentrality 48.40 20.24 2.391 0.0168 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 40.496 on 49 degrees of freedom
## Residual deviance: 12.234 on 48 degrees of freedom
## AIC: 16.234
##
## Number of Fisher Scoring iterations: 9
In the binomial logistic regression model, closeness centrality explains 78% of the variance in Hantavirus status among rodents (p= 0.0168109, R2= 0.7778293)
Next, we will use a Phylogenetic logistic regression for binary dependent variables to see how strong phylogentic signal is.
Load packages
library(picante)
library(ape)
library(phylotools)
library(phytools)
library(geotax)
library(reshape2)
library(ggplot2)
library(phylolm)
library(ggtree)
load files
phy<- read.nexus("tree/output.nex")
consensus<-consensus.edges(phy, consensus.tree=consensus(phy,p=0.5)) #ape for majority rule consensus
plot(consensus)
cc<- read.csv("ClosenessCentralityHanta.csv", header=T, row.names = 1)
row.names(cc)<- sub(" ", "_", row.names(cc))
Fit the model and plot the curve
tre<- consensus
cc<-cc[order(match(row.names(cc),tre$tip.label)),]
set.seed(123456)
class(tre)
## [1] "phylo"
summary(tre)
##
## Phylogenetic tree: tre
##
## Number of tips: 48
## Number of nodes: 46
## Branch lengths:
## mean: 6.125721
## variance: 55.60643
## distribution summary:
## Min. 1st Qu. Median 3rd Qu. Max.
## 0.000000 1.586229 2.868166 7.292117 34.591102
## No root edge.
## First ten tip labels: Cavia_porcellus
## Octodontomys_gliroides
## Octodon_degus
## Octodon_lunatus
## Octodon_bridgesi
## Spalacopus_cyanus
## Aconaemys_porteri
## Aconaemys_sagei
## Aconaemys_fuscus
## Octomys_mimax
## First ten node labels: 1
## 1
## 1
## 1
## 1
## 1
## 1
## 1
## 1
## 1
fit = phyloglm(X ~ closnesscentrality, tre, data=cc, boot=100)
## Warning in phyloglm(X ~ closnesscentrality, tre, data = cc, boot = 100): 2 taxa
## not in the tree: their data will be ignored
## Warning in phyloglm(X ~ closnesscentrality, tre, data = cc, boot = 100): The estimated coefficients in the absence of phylogenetic signal lead
## to some linear predictors beyond 'btol'. Increase btol?
## Starting from beta=0 other than intercept.
## Warning in phyloglm(X ~ closnesscentrality, tre, data = cc, boot = 100): the estimate of 'alpha' (0.000462573319703894) reached the lower bound (0.000453731978536918).
## This may reflect a flat likelihood at low alpha values near the lower bound,
## meaning that the phylogenetic correlation is estimated to be maximal
## under the model in Ives and Garland (2010).
summary(fit)
##
## Call:
## phyloglm(formula = X ~ closnesscentrality, data = cc, phy = tre,
## boot = 100)
## AIC logLik Pen.logLik
## 23.295 -8.648 -9.666
##
## Method: logistic_MPLE
## Mean tip height: 40.36665
## Parameter estimate(s):
## alpha: 0.0004625733
## bootstrap mean: 0.002489067 (on log scale, then back transformed)
## so possible upward bias.
## bootstrap 95% CI: (0.0004543538,0.9567479)
##
## Coefficients:
## Estimate StdErr z.value lowerbootCI upperbootCI p.value
## (Intercept) -6.2937 2.3342 -2.6962 -13.8907 -3.4264 0.007013
## closnesscentrality 9.9426 3.1622 3.1442 5.9304 22.6024 0.001666
##
## (Intercept) **
## closnesscentrality **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Note: Wald-type p-values for coefficients, conditional on alpha=0.0004625733
## Parametric bootstrap results based on 100 fitted replicates
par(las=1,bty="l") ## cosmetic
plot(cc$closnesscentrality,jitter(cc$X,factor=0,amount=0.02),
xlab="Closeness Centrality",ylab="Hantavirus",xlim=c(0,1))
cf <- coef(fit)
x=cc$closnesscentrality
curve(plogis(cf[1]+cf[2]*x),col="red",add=TRUE)