diff --git a/.idea/dictionaries/Dusty.xml b/.idea/dictionaries/Dusty.xml index e56e71a..aef67a0 100644 --- a/.idea/dictionaries/Dusty.xml +++ b/.idea/dictionaries/Dusty.xml @@ -1,6 +1,7 @@ + . ...elt ...length @@ -1114,7 +1115,6 @@ `.__C__C++Function` `.__C__C++Object` `.__C__C++OverloadedMethods` - `.__C__{` `.__T__!:base` `.__T__$:base` @@ -1463,7 +1463,6 @@ `mday<-` `minute<-` `month<-` - `on_failure<-` `packageSlot<-` `polygons<-` diff --git a/lesson5/lesson5_student.rmd b/lesson5/lesson5_student.rmd index cbe26cb..949a98a 100644 --- a/lesson5/lesson5_student.rmd +++ b/lesson5/lesson5_student.rmd @@ -15,8 +15,19 @@ Notes: Notes: ```{r Third Qualitative Variable} -ggplot(aes(x = gender, y = age), - data = subset(pf, !is.na(gender))) + geom_histogram() +library(ggplot2) +library(dplyr) + +pf.fc_by_age_gender <- pf %>% + filter(!is.na(gender)) %>% + group_by(age, gender) %>% + summarize(mean_friend_count = mean(friend_count), + median_friend_count = median(friend_count), + n = n()) %>% + ungroup() %>% + arrange(age) + +head(pf.fc_by_age_gender) ``` *** @@ -25,7 +36,8 @@ ggplot(aes(x = gender, y = age), Notes: ```{r Plotting Conditional Summaries} - +ggplot(aes(x = age, y = median_friend_count), data = pf.fc_by_age_gender) + + geom_line(aes(color = gender)) ``` *** @@ -33,7 +45,7 @@ Notes: ### Thinking in Ratios Notes: -*** +What is the ratio of friends for males vs females ### Wide and Long Format Notes: @@ -44,8 +56,14 @@ Notes: Notes: ```{r} -install.packages('reshape2') +#install.packages('reshape2') library(reshape2) + +pf.fc_by_age_gender.wide <- dcast(pf.fc_by_age_gender, + age ~ gender, + value.var = 'median_friend_count') + +head(pf.fc_by_age_gender.wide) ``` @@ -55,7 +73,9 @@ library(reshape2) Notes: ```{r Ratio Plot} - +ggplot(aes(x = age, y = female/male), data = pf.fc_by_age_gender.wide) + + geom_line() + + geom_hline(aes(yintercept = 1), alpha=0.3, linetype = 2) ``` *** @@ -64,7 +84,8 @@ Notes: Notes: ```{r Third Quantitative Variable} - +pf$year_joined <- floor(2014 - pf$tenure/365) +head(pf) ``` *** @@ -73,7 +94,8 @@ Notes: Notes: ```{r Cut a Variable} - +pf$year_joined.bucket = cut(pf$year_joined, c(2004, 2009, 2011, 2012, 2014)) +table(pf$year_joined.bucket) ``` *** @@ -82,7 +104,8 @@ Notes: Notes: ```{r Plotting it All Together} - +ggplot(aes(x = age, y = friend_count), data = subset(pf, !is.na(year_joined.bucket))) + + geom_line(aes(color = year_joined.bucket), stat='summary', fun.y = median) ``` *** @@ -91,6 +114,9 @@ Notes: Notes: ```{r Plot the Grand Mean} +ggplot(aes(x = age, y = friend_count), data = subset(pf, !is.na(year_joined.bucket))) + + geom_line(aes(color = year_joined.bucket), stat='summary', fun.y = mean) + + geom_line(stat = 'summary', fun.y = mean, linetype = 2) ```