Lesson 5 part

This commit is contained in:
Dusty.P 2018-05-22 23:18:00 -08:00
parent 33c7e625e3
commit e1f08e77f1
2 changed files with 36 additions and 11 deletions

View File

@ -1,6 +1,7 @@
<component name="ProjectDictionaryState"> <component name="ProjectDictionaryState">
<dictionary name="Dusty"> <dictionary name="Dusty">
<words> <words>
<w />
<w>.</w> <w>.</w>
<w>...elt</w> <w>...elt</w>
<w>...length</w> <w>...length</w>
@ -1114,7 +1115,6 @@
<w>`.__C__C++Function`</w> <w>`.__C__C++Function`</w>
<w>`.__C__C++Object`</w> <w>`.__C__C++Object`</w>
<w>`.__C__C++OverloadedMethods`</w> <w>`.__C__C++OverloadedMethods`</w>
<w />
<w>`.__C__{`</w> <w>`.__C__{`</w>
<w>`.__T__!:base`</w> <w>`.__T__!:base`</w>
<w>`.__T__$:base`</w> <w>`.__T__$:base`</w>
@ -1463,7 +1463,6 @@
<w>`mday&lt;-`</w> <w>`mday&lt;-`</w>
<w>`minute&lt;-`</w> <w>`minute&lt;-`</w>
<w>`month&lt;-`</w> <w>`month&lt;-`</w>
<w />
<w>`on_failure&lt;-`</w> <w>`on_failure&lt;-`</w>
<w>`packageSlot&lt;-`</w> <w>`packageSlot&lt;-`</w>
<w>`polygons&lt;-`</w> <w>`polygons&lt;-`</w>

View File

@ -15,8 +15,19 @@ Notes:
Notes: Notes:
```{r Third Qualitative Variable} ```{r Third Qualitative Variable}
ggplot(aes(x = gender, y = age), library(ggplot2)
data = subset(pf, !is.na(gender))) + geom_histogram() library(dplyr)
pf.fc_by_age_gender <- pf %>%
filter(!is.na(gender)) %>%
group_by(age, gender) %>%
summarize(mean_friend_count = mean(friend_count),
median_friend_count = median(friend_count),
n = n()) %>%
ungroup() %>%
arrange(age)
head(pf.fc_by_age_gender)
``` ```
*** ***
@ -25,7 +36,8 @@ ggplot(aes(x = gender, y = age),
Notes: Notes:
```{r Plotting Conditional Summaries} ```{r Plotting Conditional Summaries}
ggplot(aes(x = age, y = median_friend_count), data = pf.fc_by_age_gender) +
geom_line(aes(color = gender))
``` ```
*** ***
@ -33,7 +45,7 @@ Notes:
### Thinking in Ratios ### Thinking in Ratios
Notes: Notes:
*** What is the ratio of friends for males vs females
### Wide and Long Format ### Wide and Long Format
Notes: Notes:
@ -44,8 +56,14 @@ Notes:
Notes: Notes:
```{r} ```{r}
install.packages('reshape2') #install.packages('reshape2')
library(reshape2) library(reshape2)
pf.fc_by_age_gender.wide <- dcast(pf.fc_by_age_gender,
age ~ gender,
value.var = 'median_friend_count')
head(pf.fc_by_age_gender.wide)
``` ```
@ -55,7 +73,9 @@ library(reshape2)
Notes: Notes:
```{r Ratio Plot} ```{r Ratio Plot}
ggplot(aes(x = age, y = female/male), data = pf.fc_by_age_gender.wide) +
geom_line() +
geom_hline(aes(yintercept = 1), alpha=0.3, linetype = 2)
``` ```
*** ***
@ -64,7 +84,8 @@ Notes:
Notes: Notes:
```{r Third Quantitative Variable} ```{r Third Quantitative Variable}
pf$year_joined <- floor(2014 - pf$tenure/365)
head(pf)
``` ```
*** ***
@ -73,7 +94,8 @@ Notes:
Notes: Notes:
```{r Cut a Variable} ```{r Cut a Variable}
pf$year_joined.bucket = cut(pf$year_joined, c(2004, 2009, 2011, 2012, 2014))
table(pf$year_joined.bucket)
``` ```
*** ***
@ -82,7 +104,8 @@ Notes:
Notes: Notes:
```{r Plotting it All Together} ```{r Plotting it All Together}
ggplot(aes(x = age, y = friend_count), data = subset(pf, !is.na(year_joined.bucket))) +
geom_line(aes(color = year_joined.bucket), stat='summary', fun.y = median)
``` ```
*** ***
@ -91,6 +114,9 @@ Notes:
Notes: Notes:
```{r Plot the Grand Mean} ```{r Plot the Grand Mean}
ggplot(aes(x = age, y = friend_count), data = subset(pf, !is.na(year_joined.bucket))) +
geom_line(aes(color = year_joined.bucket), stat='summary', fun.y = mean) +
geom_line(stat = 'summary', fun.y = mean, linetype = 2)
``` ```