diff --git a/lesson3/Problem Set.Rmd b/lesson3/Problem Set.Rmd index b610123..7582434 100644 --- a/lesson3/Problem Set.Rmd +++ b/lesson3/Problem Set.Rmd @@ -111,75 +111,147 @@ table(diamonds$carat) births <- read.csv('total_fertility.csv') library(tidyr) library(gridExtra) -b_2000 <- gather(births, -Total.fertility.rate, key = 'year', value = 'cases') -g1 <- ggplot(aes(x = year, y = cases, group = 1), data = subset(b_2000, Total.fertility.rate == 'United States')) + - geom_line() + - labs(x = "Year", - y = "Births per Woman") + - ggtitle('United States') + - coord_cartesian(ylim = c(1.5, 8)) + - scale_x_discrete(breaks = NULL) -g2 <- ggplot(aes(x = year, y = cases, group = 1), data = subset(b_2000, Total.fertility.rate == 'Germany')) + - geom_line() + - labs(x = "Year", - y = "Births per Woman") + - ggtitle('Germany') + - coord_cartesian(ylim = c(1.5, 8)) + - scale_x_discrete(breaks = NULL) -g3 <- ggplot(aes(x = year, y = cases, group = 1), data = subset(b_2000, Total.fertility.rate == 'United Kingdom')) + - geom_line() + - labs(x = "Year", - y = "Births per Woman") + - ggtitle('United Kingdom') + - coord_cartesian(ylim = c(1.5, 8)) + - scale_x_discrete(breaks = NULL) -g4 <- ggplot(aes(x = year, y = cases, group = 1), data = subset(b_2000, Total.fertility.rate == 'Spain')) + - geom_line() + - labs(x = "Year", - y = "Births per Woman") + - ggtitle('Spain') + - coord_cartesian(ylim = c(1.5, 8)) + - scale_x_discrete(breaks = NULL) -g5 <- ggplot(aes(x = year, y = cases, group = 1), data = subset(b_2000, Total.fertility.rate == 'France')) + - geom_line() + - labs(x = "Year", - y = "Births per Woman") + - ggtitle('France') + - coord_cartesian(ylim = c(1.5, 8)) + - scale_x_discrete(breaks = NULL) -g6 <- ggplot(aes(factor(year), cases, group = 1), data = subset(b_2000, Total.fertility.rate == 'Canada')) + - geom_line() + - labs(x = "Year", - y = "Births per Woman") + - ggtitle('Canada') + - coord_cartesian(ylim = c(1.5, 8)) + - scale_x_discrete(breaks = NULL) -grid.arrange(g1, g2, g3, g4, g5, g6) +births <- t(births) + +ggplot(aes(x = 'United States'), data = births) ``` ```{r fertility} births <- read.csv('total_fertility.csv') library(tidyr) library(gridExtra) -b_2000 <- gather(births, 'X1920':'X2000', key = 'year', value = 'cases') +#b_2000 <- gather(births, 'X1920':'X2000', key = 'year', value = 'cases') +b_2000 <- gather(births, -Total.fertility.rate, key = 'year', value = 'cases') -p1 = ggplot(aes(x = year, y = cases, group = 1), data = subset(b_2000, Total.fertility.rate == 'United States')) + - geom_line() + - labs(x = "Year", - y = "Births per Woman") + +data = subset(b_2000, Total.fertility.rate == 'United States') + +p1 = ggplot(aes(x = cases, group = 1), data = subset(b_2000, Total.fertility.rate == 'United States')) + + geom_histogram(binwidth = 0.1) + + labs(y = "Years", + x = "Births per Woman") + ggtitle('United States') + theme(axis.text.x= element_text(size = 6, angle = 90)) -p2 = ggplot(aes(x = year, y = factor(cases), group = 1), data = subset(b_2000, Total.fertility.rate == 'United States')) + - geom_histogram(stat = 'identity') + - labs(x = "Year", - y = "Births per Woman") + - ggtitle('United States') + +p2 = ggplot(aes(x = cases, group = 1), data = subset(b_2000, Total.fertility.rate == 'United Kingdom')) + + geom_histogram(binwidth = 0.1) + + labs(y = "Years", + x = "Births per Woman") + + ggtitle('United Kingdom') scale_y_discrete(breaks = seq(1, 5, .1)) + theme(axis.text.x= element_text(size = 6, angle = 90)) -grid.arrange(p1, p2) + +p3 = ggplot(aes(x = cases, group = 1), data = subset(b_2000, Total.fertility.rate == 'Brazil')) + + geom_histogram(binwidth = 0.1) + + labs(y = "Years", + x = "Births per Woman") + + ggtitle('Brazil') + scale_y_discrete(breaks = seq(1, 5, .1)) + + theme(axis.text.x= element_text(size = 6, angle = 90)) + +p4 = ggplot(aes(x = cases, group = 1), data = subset(b_2000, Total.fertility.rate == 'India')) + + geom_histogram(binwidth = 0.1) + + labs(y = "Years", + x = "Births per Woman") + + ggtitle('India') + scale_y_discrete(breaks = seq(1, 5, .1)) + + theme(axis.text.x= element_text(size = 6, angle = 90)) + +p5 = ggplot(aes(x = cases, group = 1), data = b_2000) + + geom_histogram(binwidth = 0.1) + + labs(y = "Years", + x = "Births per Woman") + + ggtitle('Global') + + scale_x_continuous(breaks = seq(0.0, 9.3, 1)) + +p1 = ggplot(aes(x = cases, group = 1), data = subset(b_2000, Total.fertility.rate == 'United States')) + + geom_histogram(binwidth = 0.1) + + labs(y = "Years", + x = "Births per Woman") + + ggtitle('United States') + + theme(axis.text.x= element_text(size = 6, angle = 90)) + +grid.arrange(p1, p2, p3, p4, p5, ncol = 2) + +summary(data['cases']) +summary(b_2000['cases']) ``` +The data I chose was the number of births per woman by country and year. From the Graphs it looks like +the more third world or developing countries have a higher birth rate than first world countries such as +the US and UK. If you graph the data using the year as the x axis and the number of births as the y axis +it becomes apparent that in countries such as Brazil and India which are developing countries the birth +rate has dropped drastically in the last couple decades as they are becoming more advanced. +Here are some basic statistics for the global data as well as the US + US Global +Min. :1.740 Min. :0.840 +1st Qu.:2.308 1st Qu.:4.620 +Median :3.700 Median :5.900 +Mean :4.033 Mean :5.397 +3rd Qu.:5.562 3rd Qu.:6.580 +Max. :7.030 Max. :9.220 + NA's :12532 + +```{r birthdays} +library(lubridate) +library(gridExtra) + +# Import Sample Birthdays data +birthdays <- read.csv('birthdaysExample.csv') + +# Convert the data frame into datetime objects sorted by date +dates <- strptime(birthdays$dates[order(as.Date(birthdays$dates, format = '%m/%d/%y'))], '%m/%d/%y') + +# Create a histogram showing the amount of birthdays for every day in the dataset +p1 = ggplot(birthdays, aes(x = dates)) + + geom_histogram(stat='count', binwidth = 1) + +# Extract the Months and Days from the dates into new columns +birthdays$months <- month(dates) +birthdays$days <- day(dates) + +# Create a histogram showing the distribution of birthdays by month +p2 <- ggplot(birthdays, aes(months)) + + geom_histogram() + + scale_x_continuous(breaks = seq(1, 12, 1)) + +# Create a historgram showing the distribution of birthdays by day of the month. +p3 <- ggplot(birthdays, aes(days)) + + geom_histogram() + + scale_x_continuous(breaks = seq(1, 31, 1)) + +# Show all 3 histograms on the same image +grid.arrange(p1, p2, p3) + +# Show basic statistics of the data including the Quartiles, Min, Max, Mean, and Median +summary(birthdays) + +``` + +From this we can see that the days that have the most births +are Feb 6th,May 22nd, and July 16th with 8 people each. But +as we can tell from the other distributions the number of +birthdays per month is fairly even. The mean is 6.474 which +is very close to half way through they year. The Median is +slightly higher at 7 which indicates that there are slightly +more birthdays in the latter half of the year. The quartiles +also indicate an even distribution with the 25% quartile at +month 3 and the 75% quartile at month 9. + +Similarly the day of the month data shows a fairly even +distribution as well. But there is one notible anomaly. +The 15th day has a drastically higher number of birthdays +than any other day. Whether this is an error or not I can't +tell. The 31st has understandibly fewer birthdays than the +other days since there are only 30 days in many months. + + dates months days +2/6/14 : 8 Min. : 1.000 Min. : 1.0 +5/22/14: 8 1st Qu.: 3.000 1st Qu.: 8.0 +7/16/14: 8 Median : 7.000 Median :16.0 +1/14/14: 7 Mean : 6.474 Mean :15.7 +2/2/14 : 7 3rd Qu.: 9.000 3rd Qu.:23.0 +2/23/14: 7 Max. :12.000 Max. :31.0 +(Other):988 diff --git a/lesson3/birthdaysExample.csv b/lesson3/birthdaysExample.csv new file mode 100644 index 0000000..4f6f9aa --- /dev/null +++ b/lesson3/birthdaysExample.csv @@ -0,0 +1 @@ +dates 11/25/14 6/8/14 9/12/14 5/26/14 2/20/14 6/19/14 10/10/14 10/22/14 7/6/14 9/20/14 4/8/14 8/29/14 1/9/14 4/4/14 3/1/14 6/11/14 8/27/14 9/5/14 4/30/14 5/22/14 3/16/14 9/20/14 9/12/14 9/21/14 5/18/14 11/8/14 4/14/14 6/3/14 3/28/14 12/15/14 6/4/14 7/14/14 1/15/14 7/16/14 7/15/14 2/2/14 2/3/14 9/17/14 4/17/14 3/18/14 6/6/14 7/28/14 10/12/14 5/18/14 5/14/14 4/8/14 4/25/14 10/5/14 8/17/14 5/8/14 4/9/14 12/18/14 8/18/14 8/4/14 7/8/14 2/1/14 9/1/14 6/10/14 10/19/14 1/31/14 2/4/14 6/30/14 4/2/14 2/23/14 4/14/14 6/10/14 2/10/14 3/7/14 1/9/14 11/5/14 11/23/14 4/30/14 7/20/14 8/9/14 5/19/14 2/5/14 4/14/14 6/13/14 12/20/14 8/17/14 10/29/14 10/2/14 10/6/14 4/24/14 12/16/14 8/21/14 6/17/14 11/3/14 9/17/14 3/13/14 6/30/14 10/21/14 1/22/14 8/30/14 11/23/14 5/28/14 1/26/14 3/3/14 11/30/14 9/15/14 8/15/14 1/24/14 4/29/14 8/4/14 3/13/14 6/16/14 1/29/14 5/17/14 11/8/14 12/28/14 4/27/14 10/4/14 11/5/14 11/26/14 12/3/14 12/9/14 8/18/14 12/9/14 3/21/14 4/26/14 2/10/14 1/28/14 12/17/14 9/6/14 1/27/14 1/31/14 9/14/14 10/14/14 1/2/14 11/21/14 10/14/14 12/28/14 3/28/14 5/29/14 1/7/14 7/26/14 9/9/14 10/1/14 10/2/14 10/3/14 5/11/14 10/23/14 10/28/14 4/14/14 6/29/14 11/15/14 9/16/14 7/17/14 12/21/14 8/27/14 3/22/14 11/6/14 11/3/14 3/21/14 9/17/14 3/19/14 12/8/14 2/12/14 2/6/14 9/13/14 1/27/14 7/15/14 2/2/14 10/15/14 4/22/14 6/25/14 11/24/14 3/19/14 10/13/14 9/21/14 4/10/14 2/17/14 3/23/14 8/10/14 4/1/14 10/16/14 2/16/14 10/22/14 8/10/14 5/15/14 3/28/14 9/9/14 9/12/14 7/2/14 2/23/14 12/18/14 7/8/14 9/4/14 3/19/14 1/13/14 8/7/14 11/10/14 5/28/14 7/6/14 7/22/14 10/29/14 1/17/14 3/1/14 3/2/14 2/27/14 9/10/14 4/9/14 1/8/14 8/7/14 4/5/14 6/19/14 4/18/14 8/12/14 8/8/14 3/10/14 1/21/14 4/11/14 6/25/14 10/14/14 8/18/14 5/10/14 8/26/14 11/23/14 7/31/14 10/28/14 7/31/14 2/2/14 8/16/14 3/27/14 9/24/14 6/10/14 9/8/14 4/6/14 8/4/14 5/7/14 4/26/14 8/5/14 11/9/14 4/14/14 6/25/14 8/28/14 2/23/14 2/13/14 2/14/14 7/21/14 6/9/14 12/30/14 6/30/14 1/14/14 5/18/14 1/17/14 11/16/14 11/16/14 11/17/14 9/29/14 10/28/14 3/21/14 11/10/14 5/19/14 10/28/14 1/22/14 2/2/14 6/6/14 3/17/14 1/13/14 5/7/14 8/5/14 5/23/14 9/8/14 8/26/14 6/18/14 11/22/14 5/7/14 8/15/14 8/20/14 1/25/14 8/21/14 10/14/14 10/17/14 2/1/14 7/18/14 2/6/14 10/5/14 3/18/14 8/7/14 10/18/14 5/22/14 3/29/14 4/23/14 7/19/14 10/9/14 2/24/14 2/25/14 11/4/14 6/10/14 1/14/14 9/24/14 3/12/14 12/17/14 10/30/14 2/10/14 8/18/14 11/2/14 11/5/14 11/6/14 8/22/14 4/29/14 7/20/14 3/20/14 9/29/14 12/18/14 5/25/14 9/25/14 8/13/14 5/4/14 2/6/14 12/22/14 11/14/14 9/1/14 2/9/14 7/29/14 3/9/14 4/24/14 4/10/14 7/1/14 2/17/14 8/26/14 7/19/14 1/3/14 8/22/14 1/4/14 6/25/14 9/4/14 5/2/14 3/19/14 3/20/14 12/14/14 6/1/14 3/30/14 8/17/14 9/15/14 10/9/14 8/31/14 9/29/14 1/23/14 1/26/14 6/27/14 8/30/14 11/18/14 7/23/14 1/12/14 6/6/14 9/28/14 9/29/14 7/19/14 1/3/14 8/28/14 1/11/14 3/28/14 10/19/14 2/16/14 1/13/14 4/20/14 12/22/14 7/21/14 7/21/14 3/26/14 5/8/14 7/29/14 5/22/14 2/25/14 2/6/14 8/25/14 9/16/14 10/20/14 6/13/14 12/24/14 9/1/14 11/5/14 2/2/14 6/17/14 10/13/14 7/30/14 7/28/14 7/2/14 7/18/14 1/11/14 4/17/14 6/22/14 6/12/14 1/9/14 1/26/14 7/7/14 12/21/14 11/3/14 12/2/14 6/25/14 12/31/14 6/1/14 8/10/14 6/6/14 7/8/14 11/6/14 8/29/14 9/26/14 3/21/14 4/14/14 3/21/14 3/15/14 12/30/14 5/7/14 4/28/14 2/6/14 9/10/14 2/1/14 12/29/14 5/9/14 3/24/14 10/14/14 8/31/14 5/5/14 3/16/14 4/7/14 1/7/14 3/19/14 7/1/14 7/16/14 1/29/14 6/5/14 5/20/14 8/5/14 8/19/14 7/8/14 9/14/14 4/18/14 11/18/14 6/13/14 1/20/14 9/19/14 5/31/14 8/27/14 1/5/14 2/10/14 3/11/14 4/6/14 12/7/14 3/18/14 3/12/14 6/7/14 2/17/14 6/24/14 9/14/14 3/10/14 2/24/14 1/9/14 8/15/14 12/3/14 8/8/14 3/23/14 10/6/14 6/14/14 4/6/14 4/22/14 11/12/14 1/26/14 2/17/14 8/27/14 4/12/14 7/16/14 7/6/14 1/16/14 3/20/14 12/9/14 3/22/14 5/27/14 6/22/14 9/3/14 9/13/14 6/2/14 7/31/14 10/21/14 5/1/14 11/17/14 7/4/14 1/27/14 6/12/14 12/24/14 4/26/14 6/23/14 10/17/14 10/4/14 4/13/14 11/8/14 12/15/14 8/18/14 3/13/14 7/28/14 11/2/14 4/26/14 11/27/14 5/22/14 6/5/14 1/22/14 6/8/14 9/19/14 9/24/14 11/21/14 10/4/14 6/21/14 4/27/14 4/23/14 11/29/14 10/3/14 11/29/14 11/28/14 2/14/14 12/4/14 4/19/14 11/28/14 9/2/14 1/14/14 8/27/14 2/26/14 9/4/14 12/26/14 12/18/14 8/21/14 12/7/14 4/4/14 11/20/14 10/11/14 9/1/14 8/27/14 7/7/14 1/1/14 6/9/14 12/2/14 3/16/14 3/3/14 11/16/14 2/27/14 2/10/14 7/3/14 1/19/14 11/18/14 12/14/14 3/3/14 7/29/14 3/24/14 2/14/14 11/22/14 6/10/14 9/14/14 5/8/14 11/4/14 7/16/14 6/23/14 2/19/14 4/27/14 5/22/14 3/31/14 5/2/14 6/8/14 2/27/14 1/11/14 1/27/14 12/31/14 8/14/14 9/10/14 8/2/14 4/23/14 2/11/14 6/2/14 11/18/14 9/9/14 6/29/14 4/8/14 6/10/14 5/6/14 12/4/14 9/23/14 12/17/14 10/23/14 9/22/14 3/9/14 6/23/14 10/11/14 8/22/14 7/10/14 8/19/14 10/18/14 4/23/14 8/8/14 9/24/14 6/11/14 12/4/14 6/1/14 9/23/14 1/12/14 1/4/14 6/7/14 3/4/14 11/2/14 7/22/14 10/2/14 8/7/14 3/25/14 3/2/14 5/14/14 3/30/14 12/25/14 9/30/14 2/13/14 10/6/14 4/5/14 5/22/14 7/5/14 8/1/14 3/19/14 6/19/14 4/10/14 10/21/14 9/18/14 10/23/14 12/4/14 8/31/14 10/19/14 5/28/14 6/9/14 12/27/14 6/20/14 9/20/14 1/7/14 12/28/14 11/4/14 7/9/14 1/15/14 6/17/14 5/20/14 1/1/14 4/26/14 2/18/14 4/12/14 1/14/14 9/16/14 10/13/14 3/14/14 4/1/14 10/16/14 3/1/14 12/14/14 4/8/14 1/1/14 6/7/14 11/30/14 4/12/14 11/17/14 9/26/14 1/15/14 11/1/14 6/30/14 3/10/14 7/24/14 3/2/14 10/25/14 8/24/14 4/28/14 7/16/14 10/12/14 9/1/14 9/28/14 3/27/14 9/27/14 4/3/14 8/1/14 5/19/14 6/2/14 5/9/14 2/3/14 6/15/14 4/25/14 5/22/14 11/17/14 5/28/14 9/14/14 4/4/14 5/21/14 9/14/14 2/23/14 1/19/14 6/17/14 3/12/14 8/26/14 5/5/14 1/3/14 3/5/14 11/23/14 12/24/14 8/21/14 7/27/14 5/30/14 4/25/14 11/30/14 11/6/14 7/12/14 10/7/14 1/10/14 5/13/14 11/12/14 11/19/14 6/5/14 10/24/14 12/10/14 9/21/14 4/28/14 3/21/14 12/2/14 11/21/14 4/12/14 11/1/14 5/11/14 8/5/14 12/12/14 7/23/14 1/29/14 3/28/14 10/7/14 10/11/14 8/9/14 9/6/14 2/16/14 9/16/14 6/28/14 1/19/14 3/25/14 7/8/14 10/20/14 2/27/14 7/18/14 9/7/14 4/14/14 9/3/14 7/2/14 11/9/14 8/2/14 3/5/14 6/20/14 12/9/14 6/12/14 1/21/14 12/9/14 2/24/14 7/7/14 7/13/14 10/26/14 8/11/14 3/9/14 2/24/14 2/9/14 11/5/14 3/2/14 2/26/14 6/15/14 6/3/14 11/8/14 5/26/14 10/21/14 3/16/14 8/25/14 5/9/14 11/3/14 7/7/14 12/11/14 11/17/14 5/25/14 1/28/14 6/14/14 11/3/14 3/9/14 5/29/14 1/16/14 6/4/14 9/20/14 12/5/14 8/16/14 10/1/14 11/10/14 7/12/14 4/8/14 4/6/14 3/24/14 6/30/14 7/5/14 10/26/14 3/29/14 6/23/14 5/18/14 9/19/14 12/11/14 11/10/14 3/20/14 3/8/14 7/8/14 12/14/14 10/27/14 4/19/14 1/9/14 11/1/14 1/14/14 2/23/14 9/29/14 8/21/14 7/18/14 7/19/14 9/24/14 9/13/14 5/19/14 4/23/14 7/30/14 2/2/14 12/5/14 9/29/14 10/7/14 10/29/14 12/19/14 2/12/14 10/7/14 11/7/14 8/12/14 4/19/14 2/7/14 12/31/14 7/11/14 3/17/14 6/30/14 7/8/14 1/3/14 3/4/14 2/13/14 12/8/14 4/17/14 1/19/14 9/8/14 10/27/14 6/24/14 9/19/14 1/1/14 2/6/14 3/7/14 11/20/14 5/16/14 6/18/14 10/17/14 11/12/14 1/20/14 5/16/14 9/6/14 8/14/14 3/12/14 3/26/14 7/23/14 4/22/14 8/14/14 2/27/14 6/25/14 7/13/14 10/31/14 5/5/14 5/23/14 8/14/14 6/18/14 7/25/14 9/11/14 7/16/14 9/9/14 4/1/14 2/20/14 10/25/14 12/18/14 6/9/14 1/14/14 10/28/14 2/28/14 6/2/14 8/16/14 3/2/14 2/15/14 3/23/14 3/2/14 5/8/14 10/29/14 7/20/14 9/7/14 11/3/14 8/17/14 7/6/14 3/13/14 9/5/14 10/4/14 11/23/14 4/29/14 5/4/14 10/11/14 11/14/14 12/30/14 7/5/14 1/22/14 2/13/14 3/13/14 12/1/14 3/30/14 1/4/14 5/23/14 7/20/14 1/22/14 7/25/14 1/24/14 8/9/14 12/7/14 3/25/14 1/19/14 9/14/14 6/17/14 10/8/14 3/16/14 11/14/14 11/14/14 11/17/14 7/12/14 6/6/14 8/12/14 6/24/14 5/30/14 11/27/14 1/13/14 8/18/14 10/23/14 6/1/14 5/26/14 5/22/14 12/18/14 4/5/14 10/15/14 9/1/14 11/20/14 10/15/14 2/16/14 1/18/14 1/12/14 10/24/14 1/11/14 8/5/14 5/17/14 3/5/14 2/11/14 10/8/14 10/24/14 2/23/14 6/20/14 3/17/14 7/16/14 9/23/14 1/3/14 9/29/14 12/28/14 6/28/14 7/1/14 12/14/14 9/28/14 2/20/14 9/21/14 5/12/14 2/18/14 8/18/14 4/13/14 2/24/14 12/15/14 8/27/14 2/6/14 9/10/14 9/27/14 1/10/14 6/19/14 9/16/14 11/5/14 3/27/14 10/25/14 10/16/14 2/2/14 4/15/14 1/9/14 7/7/14 8/7/14 3/22/14 12/26/14 7/9/14 11/27/14 11/25/14 2/23/14 5/4/14 1/14/14 8/25/14 3/24/14 12/19/14 1/16/14 3/16/14 5/12/14 1/30/14 1/6/14 11/30/14 8/31/14 2/13/14 9/20/14 2/6/14 12/10/14 1/27/14 4/30/14 11/24/14 9/23/14 8/1/14 9/24/14 10/27/14 7/19/14 10/31/14 6/22/14 4/8/14 5/4/14 7/16/14 10/13/14 7/15/14 6/9/14 12/17/14 3/22/14 3/29/14 8/26/14 12/28/14 9/27/14 8/26/14 \ No newline at end of file