Purpose: Explore how a rodents place in the network based on closeness centrality can be used to explain Hantavirus host status.

Load packages

library(forcats)
library(ggplot2)
library(fmsb)

Quantiles

Calculate quantiles for closeness centrality and assign them to each rodent in a new column.

closcen<- read.csv("ClosenessCentralityHanta.csv", header=TRUE) ## From Gephi

closcen<- subset(closcen, closnesscentrality>0) # get rid of rodents without connections
sorted<-closcen[order(closcen$closnesscentrality ),] 
quan<-quantile(closcen$closnesscentrality)

genrod<- subset(closcen, closnesscentrality> quan[4])
midrod<- subset(closcen, closnesscentrality<= quan[4] & closnesscentrality> quan[3])
lowrod <- subset(closcen, closnesscentrality<= quan[3] & closnesscentrality> quan[2])
specrod<-subset(closcen, closnesscentrality<= quan[2] & closnesscentrality>= quan[1])

genrod$Quantile<- c(1,1,1,1,1,1,1,1,1,1,1,1)
midrod$Quantile<-c(2,2,2,2,2,2,2,2,2,2,2)
lowrod$Quantile <- c(3,3,3,3,3,3,3,3,3,3,3,3)
specrod$Quantile<- c(4,4,4,4,4,4,4,4,4,4,4,4,4,4)

grod <- genrod[,c("Id", "Quantile", "closnesscentrality")]
mrod<- midrod[,c("Id", "Quantile", "closnesscentrality")]
lrod<- lowrod[,c("Id", "Quantile", "closnesscentrality")]
srod<- specrod[,c("Id", "Quantile", "closnesscentrality")]
netrod<-rbind(grod, mrod, lrod, srod)

Plot the closeness centrality quantiles.

ggplot(netrod, aes(x = fct_reorder(Id, closnesscentrality, .desc=TRUE ), y = closnesscentrality, group=Quantile)) + 
  geom_line(aes(colour= Quantile)) + 
  geom_area(data=netrod[netrod$Quantile=="1",],mapping=aes(fill=1), alpha=.6, show.legend = F)+
  geom_area(data=netrod[netrod$Quantile=="2",],mapping=aes(fill=2), alpha=.6, show.legend = F)+
  geom_area(data=netrod[netrod$Quantile=="3",],mapping=aes(fill=3), alpha=.6, show.legend = F)+
  geom_area(data=netrod[netrod$Quantile=="4",],mapping=aes(fill= 4), alpha=.6, show.legend = F)+
  geom_point(aes(colour=Quantile), show.legend = FALSE)+
  theme_minimal()+
  theme(axis.text.x=element_text(angle=90, hjust=0.95,vjust=1, face="italic")) + xlab("Rodent") +ylab("Closeness Centrality")

Range, Closeness Centrality, and Total Relationships and Parasites

Explore relationships between range size with closeness centrality, total parasites, and total relationships

cc<- read.csv("AllDataConsol.csv", header=TRUE) #file has rodent names, range size, total parasites, total relationships, and closeness centrality
par(mfrow=c(1,3))
closecen<-ggplot(cc, aes(x=Range, y=Closeness.Centrality))+ 
  geom_point()+
  stat_smooth(method="glm")
closecen

trela<-ggplot(cc, aes(x=Range, y=Total.Relationships))+ 
  geom_point()+
  stat_smooth(method="glm")
trela

tpara<-ggplot(cc, aes(x=Range, y= Total.Parasites))+ 
  geom_point()+
  stat_smooth(method="glm")
tpara

cclm<- glm( Range ~ Closeness.Centrality, data=cc)
p_cc<-coef(summary(cclm))[2,4]
tplm<- glm( Range ~Total.Parasites , data=cc)
p_tp<-coef(summary(tplm))[2,4]
trlm<- glm( Range ~ Total.Relationships, data=cc)
p_tr<- coef(summary(trlm))[2,4]

Range has a significant relationship with closeness centrality (p= 0.0065876), total relationships (p= 1.5202329^{-5}), and total parasites (p= 5.5458519^{-6}).

Compare total parasites and total relationships per rodent on the same plot

ratio<- median(cc$Total.Relationships)/ median(cc$Total.Parasites)
n<-4
ggplot(cc, aes(x=fct_reorder(Rodent, Total.Parasites)))+ 
  geom_line(aes(y=Total.Parasites, group=F), color = 'black') +
  geom_point(aes(y=Total.Relationships/n, group= F))+ theme_minimal() +
  theme(axis.text.x=element_text(angle=90, hjust=0.95,vjust=1, face="italic") )+
  xlab("Rodents") +
  scale_y_continuous(
    name = "Total Parasites",
    sec.axis = sec_axis( trans=~.*4, name="Total Relationships")
  ) 

On average, an ectoparasite connects a rodent to 4.2857143 other rodents. In the plot above, when the dot (total relationships) falls above the line (total parasites) for a rodent, it means that rodents parasites connect it to more rodents than the average. When it falls below the line, the parasites connect it to less rodents on average.

Hantavirus

Next, use a binomial logistic regression to test how well closeness centrality predicts hantavirus hosts

cchanta<- read.csv("ClosenessCentralityHanta.csv", header=T) # file with CC and hantavirus status 0/1 for neg/pos 

boxplot(closnesscentrality~Hanta, data=cchanta, xlab = "Hantavirus Status", ylab= "Closeness Centrality")

Based on the boxplot, it seems as though known Hantavirus hosts have a higher closeness centrality. Now we will use a binomial logistic regression model to test this.

log<-glm(X ~ closnesscentrality, family= 'binomial', data= cchanta ) #X is a column with 0 and 1 for negative and positive hantavirus status
R2<- NagelkerkeR2(log)
p_hanta<- coef(summary(log))[2,4]
summary(log)
## 
## Call:
## glm(formula = X ~ closnesscentrality, family = "binomial", data = cchanta)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.14401  -0.11964  -0.00706  -0.00334   2.35134  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)  
## (Intercept)          -33.27      13.73  -2.423   0.0154 *
## closnesscentrality    48.40      20.24   2.391   0.0168 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 40.496  on 49  degrees of freedom
## Residual deviance: 12.234  on 48  degrees of freedom
## AIC: 16.234
## 
## Number of Fisher Scoring iterations: 9

In the binomial logistic regression model, closeness centrality explains 78% of the variance in Hantavirus status among rodents (p= 0.0168109, R2= 0.7778293)

Next, we will use a Phylogenetic logistic regression for binary dependent variables to see how strong phylogentic signal is.

Load packages

library(picante)
library(ape)
library(phylotools)
library(phytools)
library(geotax)
library(reshape2)
library(ggplot2)
library(phylolm)
library(ggtree)

load files

phy<- read.nexus("tree/output.nex")
consensus<-consensus.edges(phy, consensus.tree=consensus(phy,p=0.5))  #ape for majority rule consensus
plot(consensus)

cc<- read.csv("ClosenessCentralityHanta.csv", header=T, row.names = 1)
row.names(cc)<- sub(" ", "_", row.names(cc))

Fit the model and plot the curve

tre<- consensus
cc<-cc[order(match(row.names(cc),tre$tip.label)),]
set.seed(123456)

class(tre)
## [1] "phylo"
summary(tre)
## 
## Phylogenetic tree: tre 
## 
##   Number of tips: 48 
##   Number of nodes: 46 
##   Branch lengths:
##     mean: 6.125721 
##     variance: 55.60643 
##     distribution summary:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
##  0.000000  1.586229  2.868166  7.292117 34.591102 
##   No root edge.
##   First ten tip labels: Cavia_porcellus 
##                         Octodontomys_gliroides
##                         Octodon_degus
##                         Octodon_lunatus
##                         Octodon_bridgesi
##                         Spalacopus_cyanus
##                         Aconaemys_porteri
##                         Aconaemys_sagei
##                         Aconaemys_fuscus
##                         Octomys_mimax
##   First ten node labels: 1 
##                          1
##                          1
##                          1
##                          1
##                          1
##                          1
##                          1
##                          1
##                          1
fit = phyloglm(X ~ closnesscentrality, tre, data=cc, boot=100)
## Warning in phyloglm(X ~ closnesscentrality, tre, data = cc, boot = 100): 2 taxa
## not in the tree: their data will be ignored
## Warning in phyloglm(X ~ closnesscentrality, tre, data = cc, boot = 100): The estimated coefficients in the absence of phylogenetic signal lead
##   to some linear predictors beyond 'btol'. Increase btol?
##   Starting from beta=0 other than intercept.
## Warning in phyloglm(X ~ closnesscentrality, tre, data = cc, boot = 100): the estimate of 'alpha' (0.000462573319703894) reached the lower bound (0.000453731978536918).
##  This may reflect a flat likelihood at low alpha values near the lower bound,
##  meaning that the phylogenetic correlation is estimated to be maximal
##  under the model in Ives and Garland (2010).
summary(fit)
## 
## Call:
## phyloglm(formula = X ~ closnesscentrality, data = cc, phy = tre, 
##     boot = 100)
##        AIC     logLik Pen.logLik 
##     23.295     -8.648     -9.666 
## 
## Method: logistic_MPLE
## Mean tip height: 40.36665
## Parameter estimate(s):
## alpha: 0.0004625733 
##       bootstrap mean: 0.002489067 (on log scale, then back transformed)
##       so possible upward bias.
##       bootstrap 95% CI: (0.0004543538,0.9567479)
## 
## Coefficients:
##                    Estimate   StdErr  z.value lowerbootCI upperbootCI  p.value
## (Intercept)         -6.2937   2.3342  -2.6962    -13.8907     -3.4264 0.007013
## closnesscentrality   9.9426   3.1622   3.1442      5.9304     22.6024 0.001666
##                      
## (Intercept)        **
## closnesscentrality **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Note: Wald-type p-values for coefficients, conditional on alpha=0.0004625733
##       Parametric bootstrap results based on 100 fitted replicates
par(las=1,bty="l") ## cosmetic
plot(cc$closnesscentrality,jitter(cc$X,factor=0,amount=0.02),
     xlab="Closeness Centrality",ylab="Hantavirus",xlim=c(0,1))
cf <- coef(fit)
x=cc$closnesscentrality
curve(plogis(cf[1]+cf[2]*x),col="red",add=TRUE)