Vincent Schubert Malbas ®: R: graphing plots

i have to post this script on this Google blog, because Tumblr does not support the display of colored codes. by the way, this is for my SAS / R course for Wesleyan, and i am cross-referencing this post to my course blog substancestats.tumblr.com.
getwd()

setwd("C:/Users/aseandi/My Library/COURSERA/Passion-Driven Statistics/datasets")

getwd()

# fileurlcsv <- "http://spark-public.s3.amazonaws.com/pdstatistics/data_sets/nesarc_pds.csv"
# download.file(fileurlcsv, destfile="./nesarc_pds.csv")
# list.files(".")
# dateDownloaded <-date()
# dateDownloaded

nesarc <- read.csv("./nesarc_pds.csv")
str(nesarc)
# summary(nesarc)
# sapply(nesarc[1, ], class)
# sum(is.na(nesarc))
# table(is.na(nesarc))

library(Hmisc)
library(RColorBrewer)

library(reshape2)
library(stringr)
library(plyr)

# # --------------------------------------------------------------------------------------------------------
# # # DEFINING THE VARIABLES UNDER STUDY # # #

# study variables: Sex, average daily quantity of alcohol consumed and cigarettes smoked in past 12 months, and use of sedatives, tranquilizers, cannabis, opioids, amphetamines, cocaine, heroine, hallucinogens, and inhalants
# codes: Sex, s2aq8b, s3aq3c1, S3bq1a1, S3bq1a2, S3bq1a3, S3bq1a4, S3bq1a5, S3bq1a6, S3bq1a7, S3bq1a8, S3bq1a9a

#number of alcohol consumed in past 12 months
table(is.na(nesarc$S2AQ8B))
table(nesarc$S2AQ8B)
sum(table(nesarc$S2AQ8B))

#usual quantity of cigarettes smoked
table(is.na(nesarc$S3AQ3C1))
table(nesarc$S3AQ3C1)
sum(table(nesarc$S3AQ3C1))

# # --------------------------------------------------------------------------------------------------------
# # # CREATE NEW DATAFRAME WITH FEWER VARIABLES # # #
mddold <- data.frame(nesarc$MAJORDEPLIFE, nesarc$SEX, nesarc$S3AQ3B1, nesarc$S3AQ3C1, nesarc$S2AQ8A, nesarc$S2AQ8B, nesarc$S3BQ1A1, nesarc$S3BQ1A2, nesarc$S3BQ1A3, nesarc$S3BQ1A4, nesarc$S3BQ1A5, nesarc$S3BQ1A6, nesarc$S3BQ1A7, nesarc$S3BQ1A8, nesarc$S3BQ1A9A)
str(mddold)
names(mddold) <- c("mdd", "sex", "smokefreq", "smoke", "drinkfreq", "alcohol", "sedatives", "tranquilizers", "cannabis", "opioids", "amphetamines", "cocaine", "heroine", "hallucinogens", "inhalants")
str(mddold)

# # --------------------------------------------------------------------------------------------------------
# # # DATA MUNGING # # #
# for the  following variables: "sex", "sedatives", "tranquilizers", "cannabis", "opioids", "amphetamines", "cocaine", "heroine", "hallucinogens", "inhalants"
	# change the binomial setting [1,2] --> [1,0]
	# change [9] to [NA]

# # # DATA MUNGING # # #
# for the  following variables: "alcohol", "smoke", "smokefreq", "drinkfreq"
	# change [99] for smoke quantity, drinks consumed to [NA]
	# change [99] for drinkfreq to [NA]
	# change [9] for smokefreq to [NA]

mddold$sex[mddold$sex==2]=0 # sex=0 female; sex=1 male

mddold$sedatives[mddold$sedatives==2]=0
mddold$tranquilizers[mddold$tranquilizers==2]=0
mddold$cannabis[mddold$cannabis==2]=0
mddold$opioids[mddold$opioids==2]=0
mddold$amphetamines[mddold$amphetamines==2]=0
mddold$cocaine[mddold$cocaine==2]=0
mddold$heroine[mddold$heroine==2]=0
mddold$hallucinogens[mddold$hallucinogens==2]=0
mddold$inhalants[mddold$inhalants==2]=0

mddold$sedatives[mddold$sedatives==9]=NA
mddold$tranquilizers[mddold$tranquilizers==9]=NA
mddold$cannabis[mddold$cannabis==9]=NA
mddold$opioids[mddold$opioids==9]=NA
mddold$amphetamines[mddold$amphetamines==9]=NA
mddold$cocaine[mddold$cocaine==9]=NA
mddold$heroine[mddold$heroine==9]=NA
mddold$hallucinogens[mddold$hallucinogens==9]=NA
mddold$inhalants[mddold$inhalants==9]=NA


mddold$alcohol[mddold$alcohol==99]=NA
mddold$smoke[mddold$smoke==99]=NA
mddold$smokefreq[mddold$smokefreq==9]=NA
mddold$drinkfreq[mddold$drinkfreq==99]=NA

# # # ADD NEW VARIABLES: PACKYEARS and DRINKYEARS # # #

mddold$smokefreqyr[mddold$smokefreq==1]= 364
mddold$smokefreqyr[mddold$smokefreq==2]= 286
mddold$smokefreqyr[mddold$smokefreq==3]= 182
mddold$smokefreqyr[mddold$smokefreq==4]= 78
mddold$smokefreqyr[mddold$smokefreq==5]= 30
mddold$smokefreqyr[mddold$smokefreq==6]= 1
mddold$smokefreqyr[mddold$smokefreq==NA]= NA

mddold$alcoholfreqyr[mddold$drinkfreq==1]=364
mddold$alcoholfreqyr[mddold$drinkfreq==2]=286
mddold$alcoholfreqyr[mddold$drinkfreq==3]=182
mddold$alcoholfreqyr[mddold$drinkfreq==4]=104
mddold$alcoholfreqyr[mddold$drinkfreq==5]=52
mddold$alcoholfreqyr[mddold$drinkfreq==6]=30
mddold$alcoholfreqyr[mddold$drinkfreq==7]=12
mddold$alcoholfreqyr[mddold$drinkfreq==8]=9
mddold$alcoholfreqyr[mddold$drinkfreq==9]=4.5
mddold$alcoholfreqyr[mddold$drinkfreq==10]=1.5
mddold$alcoholfreqyr[mddold$drinkfreq==NA]=NA

mddold$cigsperyear <- (mddold$smokefreqyr * mddold$smoke)
mddold$swigsperyear <- (mddold$alcoholfreqyr * mddold$alcohol)

summary(mddold$smokefreqyr)
summary(mddold$alcoholfreqyr)
summary(mddold$cigsperyear)
summary(mddold$swigsperyear)

table(mddold$smokefreqyr)
table(mddold$alcoholfreqyr)
table(mddold$cigsperyear)
table(mddold$swigsperyear)

mddnew <- mddold
str(mddnew)

# # --------------------------------------------------------------------------------------------------------
# FREQUENCY TABLES
# ("mdd", "sex", "drinkyears", "packyears", "sedatives", "tranquilizers", "cannabis", "opioids", "amphetamines", "cocaine", "heroine", "hallucinogens", "inhalants")

library(gmodels)
CrossTable(mddnew$mdd, mddnew$sex)
CrossTable(mddnew$mdd, mddnew$sedatives)
CrossTable(mddnew$mdd, mddnew$tranquilizers)
CrossTable(mddnew$mdd, mddnew$cannabis)
CrossTable(mddnew$mdd, mddnew$opioids)
CrossTable(mddnew$mdd, mddnew$amphetamines)
CrossTable(mddnew$mdd, mddnew$cocaine)
CrossTable(mddnew$mdd, mddnew$heroine)
CrossTable(mddnew$mdd, mddnew$hallucinogens)
CrossTable(mddnew$mdd, mddnew$inhalants)

library(Hmisc)
mddnew$g4cigs <- cut2(mddnew$cigsperyear, g = 4)
CrossTable(mddnew$mdd, mddnew$g4cigs)
# intervals
# [   1, 2002) | [2002, 4550) | [4550, 7644) | [7644,35672]
  
mddnew$g4swigs <- cut2(mddnew$swigsperyear, g = 4)
CrossTable(mddnew$mdd, mddnew$g4swigs)
# intervals
# [  1.5,   10.5) | [ 10.5,   63.0) | [ 63.0,  360.0) | [360.0,35672.0] 

# # --------------------------------------------------------------------------------------------------------
# # # MULTIVARIATE GRAPHS FOR EXPLORATORY ANALYSIS # # #

library(RColorBrewer)

mypar <- function(a = 1, b = 1, brewer.n = 4, brewer.name = "RdYlGn", ...) {
    par(mar = c(2.5, 2.5, 1.6, 1.1), mgp = c(1.5, 0.5, 0))
    par(mfrow = c(a, b), ...)
    palette(brewer.pal(brewer.n, brewer.name))
}

# create table for mdd vs cigs category
mddvg4cigs = table(mddnew$mdd,mddnew$g4cigs)

# To get the graph we want, we need to exchange the rows in this table
mddvg4cigs = rbind(mddvg4cigs[2,],mddvg4cigs[1,])

# and turn them into percents (dividing by the num. of observations # in each cigs category)
mddvg4cigs[1,]=mddvg4cigs[1,]/table(mddnew$g4cigs)
mddvg4cigs[2,]=mddvg4cigs[2,]/table(mddnew$g4cigs)
str(mddvg4cigs)

# create table for mdd vs alcohol category
mddvg4swigs = table(mddnew$mdd,mddnew$g4swigs)

# To get the graph we want, we need to exchange the rows in this table
mddvg4swigs = rbind(mddvg4swigs[2,],mddvg4swigs[1,])

# and turn them into percents (dividing by the num. of observations # in each swigs category)
mddvg4swigs[1,]=mddvg4swigs[1,]/table(mddnew$g4swigs)
mddvg4swigs[2,]=mddvg4swigs[2,]/table(mddnew$g4swigs)
str(mddvg4swigs)

# create table for mdd vs sex
mddvsex = table(mddnew$mdd,mddnew$sex)

# To get the graph we want, we need to exchange the rows in this table
mddvsex = rbind(mddvsex[2,],mddvsex[1,])

# and turn them into percents (dividing by the num. of observations # in either con or sans sex)
mddvsex[1,]=mddvsex[1,]/table(mddnew$sex)
mddvsex[2,]=mddvsex[2,]/table(mddnew$sex)
str(mddvsex)

# create table for mdd vs sedatives
mddvsedatives = table(mddnew$mdd,mddnew$sedatives)

# To get the graph we want, we need to exchange the rows in this table
mddvsedatives = rbind(mddvsedatives[2,],mddvsedatives[1,])

# and turn them into percents (dividing by the num. of observations # in either con or sans sedatives)
mddvsedatives[1,]=mddvsedatives[1,]/table(mddnew$sedatives)
mddvsedatives[2,]=mddvsedatives[2,]/table(mddnew$sedatives)
str(mddvsedatives)

# create table for mdd vs cannabis
mddvcannabis = table(mddnew$mdd,mddnew$cannabis)

# To get the graph we want, we need to exchange the rows in this table
mddvcannabis = rbind(mddvcannabis[2,],mddvcannabis[1,])

# and turn them into percents (dividing by the num. of observations # in either con or sans cannabis)
mddvcannabis[1,]=mddvcannabis[1,]/table(mddnew$cannabis)
mddvcannabis[2,]=mddvcannabis[2,]/table(mddnew$cannabis)
str(mddvcannabis)

# create table for mdd vs opioids
mddvopioids = table(mddnew$mdd,mddnew$opioids)

# To get the graph we want, we need to exchange the rows in this table
mddvopioids = rbind(mddvopioids[2,],mddvopioids[1,])

# and turn them into percents (dividing by the num. of observations # in either con or sans opioids)
mddvopioids[1,]=mddvopioids[1,]/table(mddnew$opioids)
mddvopioids[2,]=mddvopioids[2,]/table(mddnew$opioids)
str(mddvopioids)

# create table for mdd vs tranquilizers
mddvtranquilizers = table(mddnew$mdd,mddnew$tranquilizers)

# To get the graph we want, we need to exchange the rows in this table
mddvtranquilizers = rbind(mddvtranquilizers[2,],mddvtranquilizers[1,])

# and turn them into percents (dividing by the num. of observations # in either con or sans tranquilizers)
mddvtranquilizers[1,]=mddvtranquilizers[1,]/table(mddnew$tranquilizers)
mddvtranquilizers[2,]=mddvtranquilizers[2,]/table(mddnew$tranquilizers)
str(mddvtranquilizers)

# create table for mdd vs amphetamines
mddvamphetamines = table(mddnew$mdd,mddnew$amphetamines)

# To get the graph we want, we need to exchange the rows in this table
mddvamphetamines = rbind(mddvamphetamines[2,],mddvamphetamines[1,])

# and turn them into percents (dividing by the num. of observations # in either con or sans amphetamines)
mddvamphetamines[1,]=mddvamphetamines[1,]/table(mddnew$amphetamines)
mddvamphetamines[2,]=mddvamphetamines[2,]/table(mddnew$amphetamines)
str(mddvamphetamines)

# create table for mdd vs cocaine
mddvcocaine = table(mddnew$mdd,mddnew$cocaine)

# To get the graph we want, we need to exchange the rows in this table
mddvcocaine = rbind(mddvcocaine[2,],mddvcocaine[1,])

# and turn them into percents (dividing by the num. of observations # in either con or sans cocaine)
mddvcocaine[1,]=mddvcocaine[1,]/table(mddnew$cocaine)
mddvcocaine[2,]=mddvcocaine[2,]/table(mddnew$cocaine)
str(mddvcocaine)

# create table for mdd vs heroine
mddvheroine = table(mddnew$mdd,mddnew$heroine)

# To get the graph we want, we need to exchange the rows in this table
mddvheroine = rbind(mddvheroine[2,],mddvheroine[1,])

# and turn them into percents (dividing by the num. of observations # in either con or sans heroine)
mddvheroine[1,]=mddvheroine[1,]/table(mddnew$heroine)
mddvheroine[2,]=mddvheroine[2,]/table(mddnew$heroine)
str(mddvheroine)

# create table for mdd vs hallucinogens
mddvhallucinogens = table(mddnew$mdd,mddnew$hallucinogens)

# To get the graph we want, we need to exchange the rows in this table
mddvhallucinogens = rbind(mddvhallucinogens[2,],mddvhallucinogens[1,])

# and turn them into percents (dividing by the num. of observations # in either con or sans hallucinogens)
mddvhallucinogens[1,]=mddvhallucinogens[1,]/table(mddnew$hallucinogens)
mddvhallucinogens[2,]=mddvhallucinogens[2,]/table(mddnew$hallucinogens)
str(mddvhallucinogens)

# create table for mdd vs inhalants
mddvinhalants = table(mddnew$mdd,mddnew$inhalants)

# To get the graph we want, we need to exchange the rows in this table
mddvinhalants = rbind(mddvinhalants[2,],mddvinhalants[1,])

# and turn them into percents (dividing by the num. of observations # in either con or sans inhalants)
mddvinhalants[1,]=mddvinhalants[1,]/table(mddnew$inhalants)
mddvinhalants[2,]=mddvinhalants[2,]/table(mddnew$inhalants)
str(mddvinhalants)

mypar(mfrow = c(2,2))
# MDD diagnosis {RESPONSE} by Estimated Cigarette Use per Year {EXPLANATORY} among all Adults in the NESARC Study
bp_mddvg4cigs <- barplot(mddvg4cigs[1,], col=unique(mddnew$g4cigs), xlab="cigarettes per year", ylab="diagnosed depression", cex.axis=0.8)
# MDD diagnosis {RESPONSE} by Estimated Cigarette Use per Year {EXPLANATORY} among all Adults in the NESARC Study
bp_mddvg4cigs <- barplot(mddvg4cigs, col=unique(mddnew$g4cigs), xlab="cigarettes per year", ylab="diagnosed depression", cex.axis=0.8)
# MDD diagnosis {RESPONSE} by Estimated Alcohol Consumed per Year {EXPLANATORY} among all Adults in the NESARC Study
bp_mddvg4swigs <- barplot(mddvg4swigs[1,], col=unique(mddnew$g4swigs), xlab="alcohol per year", ylab="diagnosed depression", cex.axis=0.8)
# MDD diagnosis {RESPONSE} by Estimated Alcohol Consumed per Year {EXPLANATORY} among all Adults in the NESARC Study
bp_mddvg4swigs <- barplot(mddvg4swigs, col=unique(mddnew$g4swigs), xlab="alcohol per year", ylab="diagnosed depression", cex.axis=0.8)
# dev.copy2pdf(file="mdd_cigarettes_alcohol.pdf", height =8, width = 11)

mypar(mfrow = c(2, 5))
# MDD diagnosis {RESPONSE} by Biological Sex {EXPLANATORY} among all Adults in the NESARC Study
bp_mddvsex <- barplot(mddvsex[1,], col=unique(mddnew$sex), xlab="biological sex, 0 - female, 1 - male", ylab="diagnosed depression")
# MDD diagnosis {RESPONSE} by Sedatives Use {EXPLANATORY} among all Adults in the NESARC Study
bp_mddvsedatives <- barplot(mddvsedatives[1,], col=unique(mddnew$sedatives), xlab="sedative use", ylab="diagnosed depression")
# MDD diagnosis {RESPONSE} by Cannabis Use {EXPLANATORY} among all Adults in the NESARC Study
bp_mddvcannabis <- barplot(mddvcannabis[1,], col=unique(mddnew$cannabis), xlab="cannabis use", ylab="diagnosed depression")
# MDD diagnosis {RESPONSE} by tranquilizers use {EXPLANATORY} among all Adults in the NESARC Study
bp_mddvtranquilizers <- barplot(mddvtranquilizers[1,], col=unique(mddnew$tranquilizers), xlab="tranquilizers use", ylab="diagnosed depression")
# MDD diagnosis {RESPONSE} by opioids use {EXPLANATORY} among all Adults in the NESARC Study
bp_mddvopioids <- barplot(mddvopioids[1,], col=unique(mddnew$opioids), xlab="opioids use", ylab="diagnosed depression")
# MDD diagnosis {RESPONSE} by cocaine use {EXPLANATORY} among all Adults in the NESARC Study
bp_mddvcocaine <- barplot(mddvcocaine[1,], col=unique(mddnew$cocaine), xlab="cocaine use", ylab="diagnosed depression")
# MDD diagnosis {RESPONSE} by amphetamines use {EXPLANATORY} among all Adults in the NESARC Study
bp_mddvamphetamines <- barplot(mddvamphetamines[1,], col=unique(mddnew$amphetamines), xlab="amphetamines use", ylab="diagnosed depression")
# MDD diagnosis {RESPONSE} by heroine use {EXPLANATORY} among all Adults in the NESARC Study
bp_mddvheroine <- barplot(mddvheroine[1,], col=unique(mddnew$heroine), xlab="heroine use", ylab="diagnosed depression")
# MDD diagnosis {RESPONSE} by hallucinogens use {EXPLANATORY} among all Adults in the NESARC Study
bp_mddvhallucinogens <- barplot(mddvhallucinogens[1,], col=unique(mddnew$hallucinogens), xlab="hallucinogens use", ylab="diagnosed depression")
# MDD diagnosis {RESPONSE} by inhalants use {EXPLANATORY} among all Adults in the NESARC Study
bp_mddvinhalants <- barplot(mddvinhalants[1,], col=unique(mddnew$inhalants), xlab="inhalants use", ylab="diagnosed depression")
# dev.copy2pdf(file="mdd_sex_substances.pdf", height =8, width = 11)
in any case, i think Google will eventually be able to crawl this R post, and hopefully other beginner programmers like me will find the code useful for their basic graphing.
Navigation

R: graphing plots

No comments:

Post a Comment