Vincent Schubert Malbas ®: R code

getwd()

# setwd("C:/Users/aseandi/My Library/COURSERA/Passion-Driven Statistics/datasets")

getwd()

# fileurlcsv <- "http://spark-public.s3.amazonaws.com/pdstatistics/data_sets/nesarc_pds.csv"
# download.file(fileurlcsv, destfile="./nesarc_pds.csv")
# list.files(".")
# dateDownloaded <-date()
# dateDownloaded

nesarc <- read.csv("./nesarc_pds.csv")
str(nesarc)
# summary(nesarc)
# sapply(nesarc[1, ], class)
# sum(is.na(nesarc))
# table(is.na(nesarc))

library(Hmisc)
library(RColorBrewer)

library(reshape2)
library(stringr)
library(plyr)

# # --------------------------------------------------------------------------------------------------------
# # # DEFINING THE VARIABLES UNDER STUDY # # #

# study variables: Sex, average daily quantity of alcohol consumed and cigarettes smoked in past 12 months, and use of sedatives, tranquilizers, cannabis, opioids, amphetamines, cocaine, heroine, hallucinogens, and inhalants
# codes: Sex, s2aq8b, s3aq3c1, S3bq1a1, S3bq1a2, S3bq1a3, S3bq1a4, S3bq1a5, S3bq1a6, S3bq1a7, S3bq1a8, S3bq1a9a

#number of alcohol consumed in past 12 months
table(is.na(nesarc$S2AQ8B))
table(nesarc$S2AQ8B)
sum(table(nesarc$S2AQ8B))

#usual quantity of cigarettes smoked
table(is.na(nesarc$S3AQ3C1))
table(nesarc$S3AQ3C1)
sum(table(nesarc$S3AQ3C1))

# # --------------------------------------------------------------------------------------------------------

# without DATA MUNGING, it becomes weird, since binomials are coded differently [as 1 or 2], and NAs were assigned a numeric value [9]
# here we are also only using LINEAR REGRESSION modeling, not accounting for the binomial distribution of some of these variables
lmNoAdjust <- lm(MAJORDEPLIFE ~ SEX+ S2AQ8B+ S3AQ3C1+ S3BQ1A1+ S3BQ1A2+ S3BQ1A3+ S3BQ1A4+ S3BQ1A5+ S3BQ1A6+ S3BQ1A7+ S3BQ1A8+ S3BQ1A9A, data=nesarc)
summary(lmNoAdjust)

# Coefficients:
#              Estimate Std. Error t value Pr(>|t|)    
# (Intercept)  0.0347409  0.0223817   1.552 0.120639    
# SEX          0.1533657  0.0072438  21.172  < 2e-16 ***
# S2AQ8B       0.0001574  0.0003837   0.410 0.681605    
# S3AQ3C1      0.0010271  0.0002495   4.117 3.86e-05 ***
# S3BQ1A1     -0.0788230  0.0155627  -5.065 4.14e-07 ***
# S3BQ1A2     -0.0113460  0.0163401  -0.694 0.487463    
# S3BQ1A3     -0.0418769  0.0148434  -2.821 0.004791 ** 
# S3BQ1A4     -0.0030821  0.0137318  -0.224 0.822409    
# S3BQ1A5     -0.0815162  0.0080903 -10.076  < 2e-16 ***
# S3BQ1A6     -0.0115252  0.0133963  -0.860 0.389627    
# S3BQ1A7     -0.0105396  0.0134567  -0.783 0.433512    
# S3BQ1A8      0.0517945  0.0150253   3.447 0.000568 ***
# S3BQ1A9A     0.1440555  0.0179928   8.006 1.28e-15 ***

# # --------------------------------------------------------------------------------------------------------
# # # CREATE NEW DATAFRAME WITH FEWER VARIABLES # # #
mddold <- data.frame(nesarc$MAJORDEPLIFE, nesarc$SEX, 
nesarc$S3AQ3B1, nesarc$S3AQ3C1, nesarc$S2AQ8A, nesarc$S2AQ8B, 
nesarc$S3BQ1A1, nesarc$S3BQ1A2, nesarc$S3BQ1A3, nesarc$S3BQ1A4, nesarc$S3BQ1A5, nesarc$S3BQ1A6, nesarc$S3BQ1A7, nesarc$S3BQ1A8, nesarc$S3BQ1A9A)
str(mddold)
names(mddold) <- c("mdd", "sex", 
"smokefreq", "smoke", "drinkfreq", "alcohol", 
"sedatives", "tranquilizers", "opioids", "amphetamines", "cannabis", "cocaine", "hallucinogens", "inhalants", "heroine")
str(mddold)

# # --------------------------------------------------------------------------------------------------------
# # # DATA MUNGING # # #
# for the  following variables: "sex", "sedatives", "tranquilizers", "cannabis", "opioids", "amphetamines", "cocaine", "heroine", "hallucinogens", "inhalants"
	# change the binomial setting [1,2] --> [1,0]
	# change [9] to [NA]

# # # DATA MUNGING # # #
# for the  following variables: "alcohol", "smoke", "smokefreq", "drinkfreq"
	# change [99] for smoke quantity, drinks consumed to [NA]
	# change [99] for drinkfreq to [NA]
	# change [9] for smokefreq to [NA]

mddold$sex[mddold$sex==2]=0 # sex=0 female; sex=1 male

mddold$sedatives[mddold$sedatives==2]=0
mddold$tranquilizers[mddold$tranquilizers==2]=0
mddold$cannabis[mddold$cannabis==2]=0
mddold$opioids[mddold$opioids==2]=0
mddold$amphetamines[mddold$amphetamines==2]=0
mddold$cocaine[mddold$cocaine==2]=0
mddold$heroine[mddold$heroine==2]=0
mddold$hallucinogens[mddold$hallucinogens==2]=0
mddold$inhalants[mddold$inhalants==2]=0

mddold$sedatives[mddold$sedatives==9]=NA
mddold$tranquilizers[mddold$tranquilizers==9]=NA
mddold$cannabis[mddold$cannabis==9]=NA
mddold$opioids[mddold$opioids==9]=NA
mddold$amphetamines[mddold$amphetamines==9]=NA
mddold$cocaine[mddold$cocaine==9]=NA
mddold$heroine[mddold$heroine==9]=NA
mddold$hallucinogens[mddold$hallucinogens==9]=NA
mddold$inhalants[mddold$inhalants==9]=NA


mddold$alcohol[mddold$alcohol==99]=NA
mddold$smoke[mddold$smoke==99]=NA
mddold$smokefreq[mddold$smokefreq==9]=NA
mddold$drinkfreq[mddold$drinkfreq==99]=NA

# # # ADD NEW VARIABLES: PACKYEARS and DRINKYEARS # # #

mddold$smokefreqyr[mddold$smokefreq==1]= 364
mddold$smokefreqyr[mddold$smokefreq==2]= 286
mddold$smokefreqyr[mddold$smokefreq==3]= 182
mddold$smokefreqyr[mddold$smokefreq==4]= 78
mddold$smokefreqyr[mddold$smokefreq==5]= 30
mddold$smokefreqyr[mddold$smokefreq==6]= 1
mddold$smokefreqyr[mddold$smokefreq==NA]= NA

mddold$alcoholfreqyr[mddold$drinkfreq==1]=364
mddold$alcoholfreqyr[mddold$drinkfreq==2]=286
mddold$alcoholfreqyr[mddold$drinkfreq==3]=182
mddold$alcoholfreqyr[mddold$drinkfreq==4]=104
mddold$alcoholfreqyr[mddold$drinkfreq==5]=52
mddold$alcoholfreqyr[mddold$drinkfreq==6]=30
mddold$alcoholfreqyr[mddold$drinkfreq==7]=12
mddold$alcoholfreqyr[mddold$drinkfreq==8]=9
mddold$alcoholfreqyr[mddold$drinkfreq==9]=4.5
mddold$alcoholfreqyr[mddold$drinkfreq==10]=1.5
mddold$alcoholfreqyr[mddold$drinkfreq==NA]=NA

mddold$cigsperyear <- (mddold$smokefreqyr * mddold$smoke)
mddold$swigsperyear <- (mddold$alcoholfreqyr * mddold$alcohol)

summary(mddold$smokefreqyr)
summary(mddold$alcoholfreqyr)
summary(mddold$cigsperyear)
summary(mddold$swigsperyear)

table(mddold$smokefreqyr)
table(mddold$alcoholfreqyr)
table(mddold$cigsperyear)
table(mddold$swigsperyear)

mddnew <- mddold
str(mddnew)

# # --------------------------------------------------------------------------------------------------------
# FREQUENCY TABLES
# ("mdd", "sex", "drinkyears", "packyears", "sedatives", "tranquilizers", "cannabis", "opioids", "amphetamines", "cocaine", "heroine", "hallucinogens", "inhalants")

library(gmodels)
CrossTable(mddnew$mdd, mddnew$sex)
CrossTable(mddnew$mdd, mddnew$sedatives)
CrossTable(mddnew$mdd, mddnew$tranquilizers)
CrossTable(mddnew$mdd, mddnew$cannabis)
CrossTable(mddnew$mdd, mddnew$opioids)
CrossTable(mddnew$mdd, mddnew$amphetamines)
CrossTable(mddnew$mdd, mddnew$cocaine)
CrossTable(mddnew$mdd, mddnew$heroine)
CrossTable(mddnew$mdd, mddnew$hallucinogens)
CrossTable(mddnew$mdd, mddnew$inhalants)

library(Hmisc)
mddnew$g4cigs <- cut2(mddnew$cigsperyear, g = 4)
CrossTable(mddnew$mdd, mddnew$g4cigs)
# intervals
# [   1, 2002) | [2002, 4550) | [4550, 7644) | [7644,35672]
  
mddnew$g4swigs <- cut2(mddnew$swigsperyear, g = 4)
CrossTable(mddnew$mdd, mddnew$g4swigs)
# intervals
# [  1.5,   10.5) | [ 10.5,   63.0) | [ 63.0,  360.0) | [360.0,35672.0] 

# # --------------------------------------------------------------------------------------------------------
# # # MULTIVARIATE GRAPHS FOR EXPLORATORY ANALYSIS # # #

library(RColorBrewer)

mypar <- function(a = 1, b = 1, brewer.n = 4, brewer.name = "RdYlGn", ...) {
    par(mar = c(2.5, 2.5, 1.6, 1.1), mgp = c(1.5, 0.5, 0))
    par(mfrow = c(a, b), ...)
    palette(brewer.pal(brewer.n, brewer.name))
}

cols4v1 <- brewer.pal(5, "YlGnBu")
pal4v1 <- colorRampPalette(cols4v1)  # <- dark blue cyan white gradient

cols4v2 <- brewer.pal(5, "YlOrBr")
pal4v2 <- colorRampPalette(cols4v2)  # <- brown orange white gradient

cols4v3 <- brewer.pal(5, "PuRd")
pal4v3 <- colorRampPalette(cols4v3)  # <- purple pink white gradient

cols8 <- brewer.pal(8, "Set3")
pal8 <- colorRampPalette(cols8)

cols12 <- brewer.pal(12, "Set3")
pal12 <- colorRampPalette(cols12)

# create table for mdd vs cigs category
mddvg4cigs = table(mddnew$mdd,mddnew$g4cigs)

# To get the graph we want, we need to exchange the rows in this table
mddvg4cigs = rbind(mddvg4cigs[2,],mddvg4cigs[1,])

# and turn them into percents (dividing by the num. of observations # in each cigs category)
mddvg4cigs[1,]=mddvg4cigs[1,]/table(mddnew$g4cigs)
mddvg4cigs[2,]=mddvg4cigs[2,]/table(mddnew$g4cigs)
str(mddvg4cigs)

# create table for mdd vs alcohol category
mddvg4swigs = table(mddnew$mdd,mddnew$g4swigs)

# To get the graph we want, we need to exchange the rows in this table
mddvg4swigs = rbind(mddvg4swigs[2,],mddvg4swigs[1,])

# and turn them into percents (dividing by the num. of observations # in each swigs category)
mddvg4swigs[1,]=mddvg4swigs[1,]/table(mddnew$g4swigs)
mddvg4swigs[2,]=mddvg4swigs[2,]/table(mddnew$g4swigs)
str(mddvg4swigs)

# create table for mdd vs sex
mddvsex = table(mddnew$mdd,mddnew$sex)

# To get the graph we want, we need to exchange the rows in this table
mddvsex = rbind(mddvsex[2,],mddvsex[1,])

# and turn them into percents (dividing by the num. of observations # in either con or sans sex)
mddvsex[1,]=mddvsex[1,]/table(mddnew$sex)
mddvsex[2,]=mddvsex[2,]/table(mddnew$sex)
str(mddvsex)

# create table for mdd vs sedatives
mddvsedatives = table(mddnew$mdd,mddnew$sedatives)

# To get the graph we want, we need to exchange the rows in this table
mddvsedatives = rbind(mddvsedatives[2,],mddvsedatives[1,])

# and turn them into percents (dividing by the num. of observations # in either con or sans sedatives)
mddvsedatives[1,]=mddvsedatives[1,]/table(mddnew$sedatives)
mddvsedatives[2,]=mddvsedatives[2,]/table(mddnew$sedatives)
str(mddvsedatives)

# create table for mdd vs cannabis
mddvcannabis = table(mddnew$mdd,mddnew$cannabis)

# To get the graph we want, we need to exchange the rows in this table
mddvcannabis = rbind(mddvcannabis[2,],mddvcannabis[1,])

# and turn them into percents (dividing by the num. of observations # in either con or sans cannabis)
mddvcannabis[1,]=mddvcannabis[1,]/table(mddnew$cannabis)
mddvcannabis[2,]=mddvcannabis[2,]/table(mddnew$cannabis)
str(mddvcannabis)

# create table for mdd vs opioids
mddvopioids = table(mddnew$mdd,mddnew$opioids)

# To get the graph we want, we need to exchange the rows in this table
mddvopioids = rbind(mddvopioids[2,],mddvopioids[1,])

# and turn them into percents (dividing by the num. of observations # in either con or sans opioids)
mddvopioids[1,]=mddvopioids[1,]/table(mddnew$opioids)
mddvopioids[2,]=mddvopioids[2,]/table(mddnew$opioids)
str(mddvopioids)

# create table for mdd vs tranquilizers
mddvtranquilizers = table(mddnew$mdd,mddnew$tranquilizers)

# To get the graph we want, we need to exchange the rows in this table
mddvtranquilizers = rbind(mddvtranquilizers[2,],mddvtranquilizers[1,])

# and turn them into percents (dividing by the num. of observations # in either con or sans tranquilizers)
mddvtranquilizers[1,]=mddvtranquilizers[1,]/table(mddnew$tranquilizers)
mddvtranquilizers[2,]=mddvtranquilizers[2,]/table(mddnew$tranquilizers)
str(mddvtranquilizers)

# create table for mdd vs amphetamines
mddvamphetamines = table(mddnew$mdd,mddnew$amphetamines)

# To get the graph we want, we need to exchange the rows in this table
mddvamphetamines = rbind(mddvamphetamines[2,],mddvamphetamines[1,])

# and turn them into percents (dividing by the num. of observations # in either con or sans amphetamines)
mddvamphetamines[1,]=mddvamphetamines[1,]/table(mddnew$amphetamines)
mddvamphetamines[2,]=mddvamphetamines[2,]/table(mddnew$amphetamines)
str(mddvamphetamines)

# create table for mdd vs cocaine
mddvcocaine = table(mddnew$mdd,mddnew$cocaine)

# To get the graph we want, we need to exchange the rows in this table
mddvcocaine = rbind(mddvcocaine[2,],mddvcocaine[1,])

# and turn them into percents (dividing by the num. of observations # in either con or sans cocaine)
mddvcocaine[1,]=mddvcocaine[1,]/table(mddnew$cocaine)
mddvcocaine[2,]=mddvcocaine[2,]/table(mddnew$cocaine)
str(mddvcocaine)

# create table for mdd vs heroine
mddvheroine = table(mddnew$mdd,mddnew$heroine)

# To get the graph we want, we need to exchange the rows in this table
mddvheroine = rbind(mddvheroine[2,],mddvheroine[1,])

# and turn them into percents (dividing by the num. of observations # in either con or sans heroine)
mddvheroine[1,]=mddvheroine[1,]/table(mddnew$heroine)
mddvheroine[2,]=mddvheroine[2,]/table(mddnew$heroine)
str(mddvheroine)

# create table for mdd vs hallucinogens
mddvhallucinogens = table(mddnew$mdd,mddnew$hallucinogens)

# To get the graph we want, we need to exchange the rows in this table
mddvhallucinogens = rbind(mddvhallucinogens[2,],mddvhallucinogens[1,])

# and turn them into percents (dividing by the num. of observations # in either con or sans hallucinogens)
mddvhallucinogens[1,]=mddvhallucinogens[1,]/table(mddnew$hallucinogens)
mddvhallucinogens[2,]=mddvhallucinogens[2,]/table(mddnew$hallucinogens)
str(mddvhallucinogens)

# create table for mdd vs inhalants
mddvinhalants = table(mddnew$mdd,mddnew$inhalants)

# To get the graph we want, we need to exchange the rows in this table
mddvinhalants = rbind(mddvinhalants[2,],mddvinhalants[1,])

# and turn them into percents (dividing by the num. of observations # in either con or sans inhalants)
mddvinhalants[1,]=mddvinhalants[1,]/table(mddnew$inhalants)
mddvinhalants[2,]=mddvinhalants[2,]/table(mddnew$inhalants)
str(mddvinhalants)

mypar(mfrow = c(2,2))
# MDD diagnosis {RESPONSE} by Estimated Cigarette Use per Year {EXPLANATORY} among all Adults in the NESARC Study
bp_mddvg4cigs <- barplot(mddvg4cigs[1,], col=unique(mddnew$g4cigs), xlab="cigarettes per year", ylab="diagnosed depression", cex.axis=0.8)
# MDD diagnosis {RESPONSE} by Estimated Cigarette Use per Year {EXPLANATORY} among all Adults in the NESARC Study
bp_mddvg4cigs <- barplot(mddvg4cigs, col=unique(mddnew$g4cigs), xlab="cigarettes per year", ylab="diagnosed depression", cex.axis=0.8)
# MDD diagnosis {RESPONSE} by Estimated Alcohol Consumed per Year {EXPLANATORY} among all Adults in the NESARC Study
bp_mddvg4swigs <- barplot(mddvg4swigs[1,], col=unique(mddnew$g4swigs), xlab="alcohol per year", ylab="diagnosed depression", cex.axis=0.8)
# MDD diagnosis {RESPONSE} by Estimated Alcohol Consumed per Year {EXPLANATORY} among all Adults in the NESARC Study
bp_mddvg4swigs <- barplot(mddvg4swigs, col=unique(mddnew$g4swigs), xlab="alcohol per year", ylab="diagnosed depression", cex.axis=0.8)
#dev.copy2pdf(file="mdd_cigarettes_alcohol.pdf", height =8, width = 11)

mypar(mfrow = c(2, 5))
# MDD diagnosis {RESPONSE} by Biological Sex {EXPLANATORY} among all Adults in the NESARC Study
bp_mddvsex <- barplot(mddvsex, col=pal4v1(4), xlab="biological sex, 0 - female, 1 - male", ylab="diagnosed depression")
# MDD diagnosis {RESPONSE} by Sedatives Use {EXPLANATORY} among all Adults in the NESARC Study
bp_mddvsedatives <- barplot(mddvsedatives, col=pal4v2(4), xlab="sedative use", ylab="diagnosed depression")
# MDD diagnosis {RESPONSE} by Cannabis Use {EXPLANATORY} among all Adults in the NESARC Study
bp_mddvcannabis <- barplot(mddvcannabis, col=pal4v3(4), xlab="cannabis use", ylab="diagnosed depression")
# MDD diagnosis {RESPONSE} by tranquilizers use {EXPLANATORY} among all Adults in the NESARC Study
bp_mddvtranquilizers <- barplot(mddvtranquilizers, col=pal8(4), xlab="tranquilizers use", ylab="diagnosed depression")
# MDD diagnosis {RESPONSE} by opioids use {EXPLANATORY} among all Adults in the NESARC Study
bp_mddvopioids <- barplot(mddvopioids, col=pal12(4), xlab="opioids use", ylab="diagnosed depression")
# MDD diagnosis {RESPONSE} by cocaine use {EXPLANATORY} among all Adults in the NESARC Study
bp_mddvcocaine <- barplot(mddvcocaine, col=pal12(4), xlab="cocaine use", ylab="diagnosed depression")
# MDD diagnosis {RESPONSE} by amphetamines use {EXPLANATORY} among all Adults in the NESARC Study
bp_mddvamphetamines <- barplot(mddvamphetamines, col=pal8(4), xlab="amphetamines use", ylab="diagnosed depression")
# MDD diagnosis {RESPONSE} by heroine use {EXPLANATORY} among all Adults in the NESARC Study
bp_mddvheroine <- barplot(mddvheroine, col=pal4v3(4), xlab="heroine use", ylab="diagnosed depression")
# MDD diagnosis {RESPONSE} by hallucinogens use {EXPLANATORY} among all Adults in the NESARC Study
bp_mddvhallucinogens <- barplot(mddvhallucinogens, col=pal4v2(4), xlab="hallucinogens use", ylab="diagnosed depression")
# MDD diagnosis {RESPONSE} by inhalants use {EXPLANATORY} among all Adults in the NESARC Study
bp_mddvinhalants <- barplot(mddvinhalants, col=pal4v1(4), xlab="inhalants use", ylab="diagnosed depression")
#dev.copy2pdf(file="mdd_sex_substances.pdf", height =8, width = 11)

mypar(mfrow=c(1,2))
boxplot(mddnew$cigsperyear, col="#FE9929", xlab = "# of cigarettes smoked per year", cex=1.5)
boxplot(mddnew$swigsperyear, log="y", col="#41B6C4", xlab = "# of alcohol consumed per year", cex=1.5)
#dev.copy2pdf(file="boxplots cigs and swigs.pdf", height =4, width = 7)

# # --------------------------------------------------------------------------------------------------------
# # MODELING # #

# SEPARATE MODELS FOR FEMALES AND MALES
males <- mddnew[[2]] == 1
females <- mddnew[[2]] == 0
mddnewmales <- mddnew[males,]
mddnewfemales <- mddnew[females,]
str(mddnewmales)
str(mddnewfemales)

logsubstancesall <- glm(mdd ~ as.factor(g4swigs) + as.factor(g4cigs) + sedatives + tranquilizers + cannabis + opioids + amphetamines + cocaine + heroine + hallucinogens + inhalants, data=mddnew, family="binomial")

logsubstancesmales <- glm(mdd ~ as.factor(g4swigs) + as.factor(g4cigs) + sedatives + tranquilizers + cannabis + opioids + amphetamines + cocaine + heroine + hallucinogens + inhalants, data=mddnewmales, family="binomial")

logsubstancesfemales <- glm(mdd ~ as.factor(g4swigs) + as.factor(g4cigs) + sedatives + tranquilizers + cannabis + opioids + amphetamines + cocaine + heroine + hallucinogens + inhalants, data=mddnewfemales, family="binomial")

summary(logsubstancesall)
exp(logsubstancesall$coeff)
exp(confint(logsubstancesall))
plot (mddnew$mdd, logsubstancesall$fitted)
anova(logsubstancesall,test="Chisq")


summary(logsubstancesmales)
exp(logsubstancesmales$coeff)
exp(confint(logsubstancesmales))
plot (mddnewmales$mdd, logsubstancesmales$fitted)
anova(logsubstancesmales,test="Chisq")

# Call:
# glm(formula = mdd ~ as.factor(g4swigs) + as.factor(g4cigs) + 
#     sedatives + tranquilizers + cannabis + opioids + amphetamines + 
#     cocaine + heroine + hallucinogens + inhalants, family = "binomial", 
#     data = mddnewmales)
# 
# Deviance Residuals: 
#     Min       1Q   Median       3Q      Max  
# -1.3316  -0.5962  -0.4860  -0.4472   2.1951  
# 
# Coefficients:
#                                     Estimate Std. Error z value Pr(>|z|)    
# (Intercept)                       -2.1097030  0.1136347 -18.566  < 2e-16 ***
# as.factor(g4swigs)[ 10.5,   63.0)  0.1165468  0.1174018   0.993 0.320848    
# as.factor(g4swigs)[ 63.0,  360.0) -0.1015568  0.1145740  -0.886 0.375409    
# as.factor(g4swigs)[360.0,35672.0] -0.1425212  0.1073813  -1.327 0.184428    
# as.factor(g4cigs)[2002, 4550)     -0.0627617  0.1074292  -0.584 0.559076    
# as.factor(g4cigs)[4550, 7644)      0.0330148  0.0902443   0.366 0.714486    
# as.factor(g4cigs)[7644,35672]      0.3729660  0.1058383   3.524 0.000425 ***	<-- this is slightly significant
# sedatives                          0.5647761  0.1393265   4.054 5.04e-05 ***			<-- this is slightly significant
# tranquilizers                      0.0351090  0.1557427   0.225 0.821645    
# cannabis                           0.5819275  0.0845202   6.885 5.78e-12 ***			<-- this is slightly significant
# opioids                            0.3639522  0.1264450   2.878 0.003998 ** 				<-- this is slightly significant
# amphetamines                       0.1981210  0.1285637   1.541 0.123308    
# cocaine                            0.0643056  0.1195646   0.538 0.590693    
# heroine                           -0.0002183  0.2815689  -0.001 0.999382    
# hallucinogens                      0.1674107  0.1238734   1.351 0.176546    
# inhalants                          0.1386452  0.1516635   0.914 0.360631    
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1


# (Dispersion parameter for binomial family taken to be 1)
# 
#     Null deviance: 5767.9  on 6544  degrees of freedom
# Residual deviance: 5442.7  on 6529  degrees of freedom
#   (11973 observations deleted due to missingness)
# AIC: 5474.7
# Number of Fisher Scoring iterations: 4
# > exp(logsubstancesmales$coeff)
#                       (Intercept) as.factor(g4swigs)[ 10.5,   63.0) 
#                         0.1212740                         1.1236101 
# as.factor(g4swigs)[ 63.0,  360.0) as.factor(g4swigs)[360.0,35672.0] 
#                         0.9034299                         0.8671692 
#     as.factor(g4cigs)[2002, 4550)     as.factor(g4cigs)[4550, 7644) 
#                         0.9391673                         1.0335659 
#     as.factor(g4cigs)[7644,35672]                         sedatives 
#                         1.4520350                         1.7590538 
#                     tranquilizers                          cannabis 
#                         1.0357326                         1.7894844 
#                           opioids                      amphetamines 
#                         1.4390054                         1.2191099 
#                           cocaine                           heroine 
#                         1.0664182                         0.9997818 
#                     hallucinogens                         inhalants 
#                         1.1822398                         1.1487165 
# > exp(confint(logsubstancesmales))
# Waiting for profiling to be done...
#                                        2.5 %    97.5 %
# (Intercept)                       0.09672887 0.1510365
# as.factor(g4swigs)[ 10.5,   63.0) 0.89350182 1.4160715
# as.factor(g4swigs)[ 63.0,  360.0) 0.72254676 1.1325033
# as.factor(g4swigs)[360.0,35672.0] 0.70381227 1.0724474
# as.factor(g4cigs)[2002, 4550)     0.76016557 1.1585051
# as.factor(g4cigs)[4550, 7644)     0.86652156 1.2344479
# as.factor(g4cigs)[7644,35672]     1.17957468 1.7864351	<-- this is slightly significant
# sedatives                         1.33680824 2.3088374				<-- this is slightly significant
# tranquilizers                     0.76170329 1.4030169
# cannabis                          1.51531707 2.1107442				<-- this is slightly significant
# opioids                           1.12075291 1.8402787				<-- this is slightly significant
# amphetamines                      0.94609989 1.5664145
# cocaine                           0.84226946 1.3461298
# heroine                           0.57008024 1.7265223
# hallucinogens                     0.92610582 1.5053609
# inhalants                         0.85095214 1.5426781

# > plot (mddnewmales$mdd, logsubstancesmales$fitted)
# Error in xy.coords(x, y, xlabel, ylabel, log) : 
#   'x' and 'y' lengths differ
# > anova(logsubstancesmales,test="Chisq")
# Analysis of Deviance Table
# Model: binomial, link: logit
# Response: mdd
# Terms added sequentially (first to last)
#                    Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
# NULL                                6544     5767.9              
# as.factor(g4swigs)  3    4.398      6541     5763.5 0.2215738    
# as.factor(g4cigs)   3   19.022      6538     5744.4 0.0002705 ***	<-- this is significant
# sedatives           1  175.136      6537     5569.3 < 2.2e-16 ***		<-- this is significant
# tranquilizers       1   14.750      6536     5554.6 0.0001227 ***		<-- this is significant
# cannabis            1   89.467      6535     5465.1 < 2.2e-16 ***		<-- this is significant
# opioids             1   12.316      6534     5452.8 0.0004492 ***		<-- this is significant
# amphetamines        1    5.754      6533     5447.0 0.0164489 *  	<-- this is significant
# cocaine             1    1.310      6532     5445.7 0.2523128    
# heroine             1    0.025      6531     5445.7 0.8751546    
# hallucinogens       1    2.193      6530     5443.5 0.1386587    
# inhalants           1    0.829      6529     5442.7 0.3626344    
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 
summary(logsubstancesfemales)
exp(logsubstancesfemales$coeff)
exp(confint(logsubstancesfemales))
plot (mddnewfemales$mdd, logsubstancesfemales$fitted)
anova(logsubstancesfemales,test="Chisq")

# > summary(logsubstancesfemales)
# 
# Call:
# glm(formula = mdd ~ as.factor(g4swigs) + as.factor(g4cigs) + 
#     sedatives + tranquilizers + cannabis + opioids + amphetamines + 
#     cocaine + heroine + hallucinogens + inhalants, family = "binomial", 
#     data = mddnewfemales)
# 
# Deviance Residuals: 
#     Min       1Q   Median       3Q      Max  
# -1.7306  -0.8047  -0.7157   1.2613   1.7912  
# 
# Coefficients:
#                                   Estimate Std. Error z value Pr(>|z|)    
# (Intercept)                       -1.23135    0.07360 -16.731  < 2e-16 ***
# as.factor(g4swigs)[ 10.5,   63.0) -0.10289    0.07709  -1.335 0.181991    
# as.factor(g4swigs)[ 63.0,  360.0) -0.07070    0.07920  -0.893 0.372032    
# as.factor(g4swigs)[360.0,35672.0] -0.11143    0.08503  -1.310 0.190061    
# as.factor(g4cigs)[2002, 4550)      0.08761    0.07727   1.134 0.256842    
# as.factor(g4cigs)[4550, 7644)      0.26987    0.07261   3.716 0.000202 ***
# as.factor(g4cigs)[7644,35672]      0.51903    0.10521   4.934 8.08e-07 ***
# sedatives                          0.23058    0.13434   1.716 0.086080 .  
# tranquilizers                      0.25377    0.14481   1.752 0.079695 .  
# cannabis                           0.60725    0.06880   8.827  < 2e-16 ***
# opioids                            0.44915    0.12660   3.548 0.000388 ***
# amphetamines                       0.34957    0.12553   2.785 0.005357 ** 
# cocaine                            0.10326    0.11098   0.931 0.352105    
# heroine                            0.30044    0.46536   0.646 0.518537    
# hallucinogens                     -0.03703    0.11994  -0.309 0.757524    
# inhalants                         -0.22342    0.19757  -1.131 0.258134    
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

# (Dispersion parameter for binomial family taken to be 1)
# 
#     Null deviance: 7461.1  on 6040  degrees of freedom
# Residual deviance: 7153.5  on 6025  degrees of freedom
#   (18534 observations deleted due to missingness)
# AIC: 7185.5
# 
# Number of Fisher Scoring iterations: 4

# > exp(logsubstancesfemales$coeff)
#                       (Intercept) as.factor(g4swigs)[ 10.5,   63.0) as.factor(g4swigs)[ 63.0,  360.0) 
#                         0.2918978                         0.9022268                         0.9317417 
# as.factor(g4swigs)[360.0,35672.0]     as.factor(g4cigs)[2002, 4550)     as.factor(g4cigs)[4550, 7644) 
#                         0.8945575                         1.0915670                         1.3097916 
#     as.factor(g4cigs)[7644,35672]                         sedatives                     tranquilizers 
#                         1.6803973                         1.2593351                         1.2888798 
#                          cannabis                           opioids                      amphetamines 
#                         1.8353745                         1.5669720                         1.4184636 
#                           cocaine                           heroine                     hallucinogens 
#                         1.1087842                         1.3504529                         0.9636485 
#                         inhalants 
#                         0.7997824 
# > exp(confint(logsubstancesfemales))
# Waiting for profiling to be done...
#                                       2.5 %    97.5 %
# (Intercept)                       0.2524219 0.3368565
# as.factor(g4swigs)[ 10.5,   63.0) 0.7755918 1.0492891
# as.factor(g4swigs)[ 63.0,  360.0) 0.7976125 1.0880415
# as.factor(g4swigs)[360.0,35672.0] 0.7568787 1.0563681
# as.factor(g4cigs)[2002, 4550)     0.9381185 1.2700609
# as.factor(g4cigs)[4550, 7644)     1.1362570 1.5104773
# as.factor(g4cigs)[7644,35672]     1.3663559 2.0641066
# sedatives                         0.9667310 1.6374606
# tranquilizers                     0.9694174 1.7109332
# cannabis                          1.6035308 2.0999779
# opioids                           1.2219634 2.0078102
# amphetamines                      1.1087079 1.8140419
# cocaine                           0.8913747 1.3774276
# heroine                           0.5462273 3.4608405
# hallucinogens                     0.7609479 1.2179831
# inhalants                         0.5414240 1.1761289

# > anova(logsubstancesfemales,test="Chisq")
# Analysis of Deviance Table
# Model: binomial, link: logit
# Response: mdd
# Terms added sequentially (first to last)
#                    Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
# NULL                                6040     7461.1              
# as.factor(g4swigs)  3    1.950      6037     7459.2  0.582951    
# as.factor(g4cigs)   3   34.849      6034     7424.3 1.311e-07 ***
# sedatives           1   86.558      6033     7337.8 < 2.2e-16 ***
# tranquilizers       1   30.171      6032     7307.6 3.955e-08 ***
# cannabis            1  127.106      6031     7180.5 < 2.2e-16 ***
# opioids             1   15.441      6030     7165.1 8.511e-05 ***
# amphetamines        1    8.984      6029     7156.1  0.002723 ** 
# cocaine             1    0.711      6028     7155.4  0.399268    
# heroine             1    0.288      6027     7155.1  0.591425    
# hallucinogens       1    0.252      6026     7154.8  0.615423    
# inhalants           1    1.286      6025     7153.5  0.256804    
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Navigation

R code

No comments:

Post a Comment