library(ggplot2)
library(ggthemes)
library(lubridate)

Attaching package: ‘lubridate’

The following objects are masked from ‘package:base’:

    date, intersect, setdiff, union
library(dplyr)

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
library(tidyr)
library(tidyverse) # metapackage of all tidyverse packages
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
-- Attaching packages ----------------------------- tidyverse 1.3.1 --
√ tibble  3.1.6     √ stringr 1.4.0
√ readr   2.1.2     √ forcats 0.5.1
√ purrr   0.3.4     
-- Conflicts -------------------------------- tidyverse_conflicts() --
x lubridate::as.difftime() masks base::as.difftime()
x lubridate::date()        masks base::date()
x dplyr::filter()          masks stats::filter()
x lubridate::intersect()   masks base::intersect()
x dplyr::lag()             masks stats::lag()
x lubridate::setdiff()     masks base::setdiff()
x lubridate::union()       masks base::union()
library(DT)
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio
library(scales)

Attaching package: ‘scales’

The following object is masked from ‘package:purrr’:

    discard

The following object is masked from ‘package:readr’:

    col_factor
colors = c("#CC1011", "#665555", "#05a399", "#cfcaca", "#f5e840", "#0683c9", "#e075b0")


# Read the data for each month separately 
apr <- read.csv("./uber-raw-data-apr14.csv")
may <- read.csv("./uber-raw-data-may14.csv")
june <- read.csv("./uber-raw-data-jun14.csv")
july <- read.csv("./uber-raw-data-jul14.csv")
aug <- read.csv("./uber-raw-data-aug14.csv")
sept <- read.csv("./uber-raw-data-sep14.csv")

# Combine the data together 
data <- rbind(apr, may, june, july, aug, sept)
cat("The dimensions of the data are:", dim(data))
The dimensions of the data are: 4534327 4
# Look at the data
head(data)
data$Date.Time <- as.POSIXct(data$Date.Time, format="%m/%d/%Y %H:%M:%S")
data$Time <- format(as.POSIXct(data$Date.Time, format = "%m/%d/%Y %H:%M:%S"), format="%H:%M:%S")
data$Date.Time <- ymd_hms(data$Date.Time)


# Create individual columns for month day and year
data$day <- factor(day(data$Date.Time))
data$month <- factor(month(data$Date.Time, label=TRUE))
data$year <- factor(year(data$Date.Time))
data$dayofweek <- factor(wday(data$Date.Time, label=TRUE))

# Add Time variables as well 
data$second = factor(second(hms(data$Time)))
data$minute = factor(minute(hms(data$Time)))
data$hour = factor(hour(hms(data$Time)))
head(data)
# Plotting the trips by hours in a day
hourly_data <- data %>% 
                    group_by(hour) %>% 
                            dplyr::summarize(Total = n())

# Shows data in a searchable js table
datatable(hourly_data)

# Plot the data by hour
ggplot(hourly_data, aes(hour, Total)) + 
geom_bar(stat="identity", 
         fill="steelblue", 
         color="red") + 
ggtitle("Trips Every Hour", subtitle = "aggregated today") + 
theme(legend.position = "none", 
      plot.title = element_text(hjust = 0.5), 
      plot.subtitle = element_text(hjust = 0.5)) + 
scale_y_continuous(labels=comma)

# Aggregate the data by month and hour
month_hour_data <- data %>% group_by(month, hour) %>%  dplyr::summarize(Total = n())
`summarise()` has grouped output by 'month'. You can override using
the `.groups` argument.
ggplot(month_hour_data, aes(hour, Total, fill=month)) + 
geom_bar(stat = "identity") + 
ggtitle("Trips by Hour and Month") + 
scale_y_continuous(labels = comma)

# Aggregate data by day of the month 
day_data <- data %>% group_by(day) %>% dplyr::summarize(Trips = n())
day_data

# Plot the data for the day
ggplot(day_data, aes(day, Trips)) + 
geom_bar(stat = "identity", fill = "steelblue") +
ggtitle("Trips by day of the month") + 
theme(legend.position = "none") + 
scale_y_continuous(labels = comma)

day_month_data <- data %>% group_by(dayofweek, month) %>% dplyr::summarize(Trips = n())
`summarise()` has grouped output by 'dayofweek'. You can override
using the `.groups` argument.
day_month_data
# Plot the above data
ggplot(day_month_data, aes(dayofweek, Trips, fill = month)) + 
geom_bar(stat = "identity", aes(fill = month), position = "dodge") + 
ggtitle("Trips by Day and Month") + 
scale_y_continuous(labels = comma) + 
scale_fill_manual(values = colors)

month_data <- data %>% group_by(month) %>% dplyr::summarize(Total = n())

month_data
ggplot(month_data, aes(month, Total, fill = month)) + 
geom_bar(stat = "Identity") + 
ggtitle("Trips in a month") + 
theme(legend.position = "none") + 
scale_y_continuous(labels = comma) + 
scale_fill_manual(values = colors)

day_hour_data <- data %>% group_by(day, hour) %>% dplyr::summarize(Total = n())
`summarise()` has grouped output by 'day'. You can override using the
`.groups` argument.
datatable(day_hour_data)
ggplot(day_hour_data, aes(day, hour, fill = Total)) + 
geom_tile(color = "white") + 
ggtitle("Heat Map by Hour and Day")

# Collect data by month and day

month_day_data <- data %>% group_by(month, day) %>% dplyr::summarize(Trips = n())
`summarise()` has grouped output by 'month'. You can override using
the `.groups` argument.
month_day_data
# Plot a heatmap 

ggplot(month_day_data, aes(day, month, fill = Trips)) + 
geom_tile(color = "white") + 
ggtitle("Heat Map by Month and Day")

# Plot a heatmap by day of the week and month

ggplot(day_month_data, aes(dayofweek, month, fill = Trips)) + 
geom_tile(color = "white") + 
ggtitle("Heat Map by Month and Day of weak")

###### CREATING A MAP VISUALIZATION #####

# Set Map Constants
min_lat <- 40 
max_lat <- 40.91
min_long <- -74.15
max_long <- -73.7004

ggplot(data, aes(x=Lon, y=Lat)) +
  geom_point(size=1, color = "blue") +
     scale_x_continuous(limits=c(min_long, max_long)) +
      scale_y_continuous(limits=c(min_lat, max_lat)) +
        theme_map() +
           ggtitle("NYC MAP BASED ON UBER RIDES DURING 2014 (APR-SEP)")
Warning: Removed 70180 rows containing missing values (geom_point).

ggplot(data, aes(x=Lon, y=Lat, color = Base)) +
  geom_point(size=1) +
     scale_x_continuous(limits=c(min_long, max_long)) +
      scale_y_continuous(limits=c(min_lat, max_lat)) +
       theme_map() +
          ggtitle("NYC MAP BASED ON UBER RIDES DURING 2014 (APR-SEP) by BASE")
Warning: Removed 70180 rows containing missing values (geom_point).

LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCmxpYnJhcnkoZ2dwbG90MikNCmxpYnJhcnkoZ2d0aGVtZXMpDQpsaWJyYXJ5KGx1YnJpZGF0ZSkNCmxpYnJhcnkoZHBseXIpDQpsaWJyYXJ5KHRpZHlyKQ0KbGlicmFyeSh0aWR5dmVyc2UpICMgbWV0YXBhY2thZ2Ugb2YgYWxsIHRpZHl2ZXJzZSBwYWNrYWdlcw0KbGlicmFyeShEVCkNCmxpYnJhcnkoc2NhbGVzKQ0KYGBgDQoNCmBgYHtyfQ0KY29sb3JzID0gYygiI0NDMTAxMSIsICIjNjY1NTU1IiwgIiMwNWEzOTkiLCAiI2NmY2FjYSIsICIjZjVlODQwIiwgIiMwNjgzYzkiLCAiI2UwNzViMCIpDQoNCg0KIyBSZWFkIHRoZSBkYXRhIGZvciBlYWNoIG1vbnRoIHNlcGFyYXRlbHkgDQphcHIgPC0gcmVhZC5jc3YoIi4vdWJlci1yYXctZGF0YS1hcHIxNC5jc3YiKQ0KbWF5IDwtIHJlYWQuY3N2KCIuL3ViZXItcmF3LWRhdGEtbWF5MTQuY3N2IikNCmp1bmUgPC0gcmVhZC5jc3YoIi4vdWJlci1yYXctZGF0YS1qdW4xNC5jc3YiKQ0KanVseSA8LSByZWFkLmNzdigiLi91YmVyLXJhdy1kYXRhLWp1bDE0LmNzdiIpDQphdWcgPC0gcmVhZC5jc3YoIi4vdWJlci1yYXctZGF0YS1hdWcxNC5jc3YiKQ0Kc2VwdCA8LSByZWFkLmNzdigiLi91YmVyLXJhdy1kYXRhLXNlcDE0LmNzdiIpDQoNCiMgQ29tYmluZSB0aGUgZGF0YSB0b2dldGhlciANCmRhdGEgPC0gcmJpbmQoYXByLCBtYXksIGp1bmUsIGp1bHksIGF1Zywgc2VwdCkNCmNhdCgiVGhlIGRpbWVuc2lvbnMgb2YgdGhlIGRhdGEgYXJlOiIsIGRpbShkYXRhKSkNCmBgYA0KDQpgYGB7cn0NCiMgTG9vayBhdCB0aGUgZGF0YQ0KaGVhZChkYXRhKQ0KYGBgDQoNCmBgYHtyfQ0KZGF0YSREYXRlLlRpbWUgPC0gYXMuUE9TSVhjdChkYXRhJERhdGUuVGltZSwgZm9ybWF0PSIlbS8lZC8lWSAlSDolTTolUyIpDQpkYXRhJFRpbWUgPC0gZm9ybWF0KGFzLlBPU0lYY3QoZGF0YSREYXRlLlRpbWUsIGZvcm1hdCA9ICIlbS8lZC8lWSAlSDolTTolUyIpLCBmb3JtYXQ9IiVIOiVNOiVTIikNCmRhdGEkRGF0ZS5UaW1lIDwtIHltZF9obXMoZGF0YSREYXRlLlRpbWUpDQoNCg0KIyBDcmVhdGUgaW5kaXZpZHVhbCBjb2x1bW5zIGZvciBtb250aCBkYXkgYW5kIHllYXINCmRhdGEkZGF5IDwtIGZhY3RvcihkYXkoZGF0YSREYXRlLlRpbWUpKQ0KZGF0YSRtb250aCA8LSBmYWN0b3IobW9udGgoZGF0YSREYXRlLlRpbWUsIGxhYmVsPVRSVUUpKQ0KZGF0YSR5ZWFyIDwtIGZhY3Rvcih5ZWFyKGRhdGEkRGF0ZS5UaW1lKSkNCmRhdGEkZGF5b2Z3ZWVrIDwtIGZhY3Rvcih3ZGF5KGRhdGEkRGF0ZS5UaW1lLCBsYWJlbD1UUlVFKSkNCg0KIyBBZGQgVGltZSB2YXJpYWJsZXMgYXMgd2VsbCANCmRhdGEkc2Vjb25kID0gZmFjdG9yKHNlY29uZChobXMoZGF0YSRUaW1lKSkpDQpkYXRhJG1pbnV0ZSA9IGZhY3RvcihtaW51dGUoaG1zKGRhdGEkVGltZSkpKQ0KZGF0YSRob3VyID0gZmFjdG9yKGhvdXIoaG1zKGRhdGEkVGltZSkpKQ0KYGBgDQoNCmBgYHtyfQ0KaGVhZChkYXRhKQ0KYGBgDQoNCmBgYHtyfQ0KIyBQbG90dGluZyB0aGUgdHJpcHMgYnkgaG91cnMgaW4gYSBkYXkNCmhvdXJseV9kYXRhIDwtIGRhdGEgJT4lIA0KICAgICAgICAgICAgICAgICAgICBncm91cF9ieShob3VyKSAlPiUgDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgZHBseXI6OnN1bW1hcml6ZShUb3RhbCA9IG4oKSkNCg0KIyBTaG93cyBkYXRhIGluIGEgc2VhcmNoYWJsZSBqcyB0YWJsZQ0KZGF0YXRhYmxlKGhvdXJseV9kYXRhKQ0KDQojIFBsb3QgdGhlIGRhdGEgYnkgaG91cg0KZ2dwbG90KGhvdXJseV9kYXRhLCBhZXMoaG91ciwgVG90YWwpKSArIA0KZ2VvbV9iYXIoc3RhdD0iaWRlbnRpdHkiLCANCiAgICAgICAgIGZpbGw9InN0ZWVsYmx1ZSIsIA0KICAgICAgICAgY29sb3I9InJlZCIpICsgDQpnZ3RpdGxlKCJUcmlwcyBFdmVyeSBIb3VyIiwgc3VidGl0bGUgPSAiYWdncmVnYXRlZCB0b2RheSIpICsgDQp0aGVtZShsZWdlbmQucG9zaXRpb24gPSAibm9uZSIsIA0KICAgICAgcGxvdC50aXRsZSA9IGVsZW1lbnRfdGV4dChoanVzdCA9IDAuNSksIA0KICAgICAgcGxvdC5zdWJ0aXRsZSA9IGVsZW1lbnRfdGV4dChoanVzdCA9IDAuNSkpICsgDQpzY2FsZV95X2NvbnRpbnVvdXMobGFiZWxzPWNvbW1hKQ0KYGBgDQoNCmBgYHtyfQ0KIyBBZ2dyZWdhdGUgdGhlIGRhdGEgYnkgbW9udGggYW5kIGhvdXINCm1vbnRoX2hvdXJfZGF0YSA8LSBkYXRhICU+JSBncm91cF9ieShtb250aCwgaG91cikgJT4lICBkcGx5cjo6c3VtbWFyaXplKFRvdGFsID0gbigpKQ0KDQpnZ3Bsb3QobW9udGhfaG91cl9kYXRhLCBhZXMoaG91ciwgVG90YWwsIGZpbGw9bW9udGgpKSArIA0KZ2VvbV9iYXIoc3RhdCA9ICJpZGVudGl0eSIpICsgDQpnZ3RpdGxlKCJUcmlwcyBieSBIb3VyIGFuZCBNb250aCIpICsgDQpzY2FsZV95X2NvbnRpbnVvdXMobGFiZWxzID0gY29tbWEpDQpgYGANCg0KDQpgYGB7cn0NCiMgQWdncmVnYXRlIGRhdGEgYnkgZGF5IG9mIHRoZSBtb250aCANCmRheV9kYXRhIDwtIGRhdGEgJT4lIGdyb3VwX2J5KGRheSkgJT4lIGRwbHlyOjpzdW1tYXJpemUoVHJpcHMgPSBuKCkpDQpkYXlfZGF0YQ0KDQojIFBsb3QgdGhlIGRhdGEgZm9yIHRoZSBkYXkNCmdncGxvdChkYXlfZGF0YSwgYWVzKGRheSwgVHJpcHMpKSArIA0KZ2VvbV9iYXIoc3RhdCA9ICJpZGVudGl0eSIsIGZpbGwgPSAic3RlZWxibHVlIikgKw0KZ2d0aXRsZSgiVHJpcHMgYnkgZGF5IG9mIHRoZSBtb250aCIpICsgDQp0aGVtZShsZWdlbmQucG9zaXRpb24gPSAibm9uZSIpICsgDQpzY2FsZV95X2NvbnRpbnVvdXMobGFiZWxzID0gY29tbWEpDQpgYGANCg0KYGBge3J9DQpkYXlfbW9udGhfZGF0YSA8LSBkYXRhICU+JSBncm91cF9ieShkYXlvZndlZWssIG1vbnRoKSAlPiUgZHBseXI6OnN1bW1hcml6ZShUcmlwcyA9IG4oKSkNCg0KZGF5X21vbnRoX2RhdGENCmBgYA0KDQpgYGB7cn0NCiMgUGxvdCB0aGUgYWJvdmUgZGF0YQ0KZ2dwbG90KGRheV9tb250aF9kYXRhLCBhZXMoZGF5b2Z3ZWVrLCBUcmlwcywgZmlsbCA9IG1vbnRoKSkgKyANCmdlb21fYmFyKHN0YXQgPSAiaWRlbnRpdHkiLCBhZXMoZmlsbCA9IG1vbnRoKSwgcG9zaXRpb24gPSAiZG9kZ2UiKSArIA0KZ2d0aXRsZSgiVHJpcHMgYnkgRGF5IGFuZCBNb250aCIpICsgDQpzY2FsZV95X2NvbnRpbnVvdXMobGFiZWxzID0gY29tbWEpICsgDQpzY2FsZV9maWxsX21hbnVhbCh2YWx1ZXMgPSBjb2xvcnMpDQpgYGANCg0KYGBge3J9DQptb250aF9kYXRhIDwtIGRhdGEgJT4lIGdyb3VwX2J5KG1vbnRoKSAlPiUgZHBseXI6OnN1bW1hcml6ZShUb3RhbCA9IG4oKSkNCg0KbW9udGhfZGF0YQ0KYGBgDQoNCmBgYHtyfQ0KZ2dwbG90KG1vbnRoX2RhdGEsIGFlcyhtb250aCwgVG90YWwsIGZpbGwgPSBtb250aCkpICsgDQpnZW9tX2JhcihzdGF0ID0gIklkZW50aXR5IikgKyANCmdndGl0bGUoIlRyaXBzIGluIGEgbW9udGgiKSArIA0KdGhlbWUobGVnZW5kLnBvc2l0aW9uID0gIm5vbmUiKSArIA0Kc2NhbGVfeV9jb250aW51b3VzKGxhYmVscyA9IGNvbW1hKSArIA0Kc2NhbGVfZmlsbF9tYW51YWwodmFsdWVzID0gY29sb3JzKQ0KYGBgDQoNCmBgYHtyfQ0KZGF5X2hvdXJfZGF0YSA8LSBkYXRhICU+JSBncm91cF9ieShkYXksIGhvdXIpICU+JSBkcGx5cjo6c3VtbWFyaXplKFRvdGFsID0gbigpKQ0KZGF0YXRhYmxlKGRheV9ob3VyX2RhdGEpDQpnZ3Bsb3QoZGF5X2hvdXJfZGF0YSwgYWVzKGRheSwgaG91ciwgZmlsbCA9IFRvdGFsKSkgKyANCmdlb21fdGlsZShjb2xvciA9ICJ3aGl0ZSIpICsgDQpnZ3RpdGxlKCJIZWF0IE1hcCBieSBIb3VyIGFuZCBEYXkiKQ0KYGBgDQoNCmBgYHtyfQ0KIyBDb2xsZWN0IGRhdGEgYnkgbW9udGggYW5kIGRheQ0KDQptb250aF9kYXlfZGF0YSA8LSBkYXRhICU+JSBncm91cF9ieShtb250aCwgZGF5KSAlPiUgZHBseXI6OnN1bW1hcml6ZShUcmlwcyA9IG4oKSkNCm1vbnRoX2RheV9kYXRhDQpgYGANCg0KYGBge3J9DQojIFBsb3QgYSBoZWF0bWFwIA0KDQpnZ3Bsb3QobW9udGhfZGF5X2RhdGEsIGFlcyhkYXksIG1vbnRoLCBmaWxsID0gVHJpcHMpKSArIA0KZ2VvbV90aWxlKGNvbG9yID0gIndoaXRlIikgKyANCmdndGl0bGUoIkhlYXQgTWFwIGJ5IE1vbnRoIGFuZCBEYXkiKQ0KYGBgDQoNCmBgYHtyfQ0KIyBQbG90IGEgaGVhdG1hcCBieSBkYXkgb2YgdGhlIHdlZWsgYW5kIG1vbnRoDQoNCmdncGxvdChkYXlfbW9udGhfZGF0YSwgYWVzKGRheW9md2VlaywgbW9udGgsIGZpbGwgPSBUcmlwcykpICsgDQpnZW9tX3RpbGUoY29sb3IgPSAid2hpdGUiKSArIA0KZ2d0aXRsZSgiSGVhdCBNYXAgYnkgTW9udGggYW5kIERheSBvZiB3ZWFrIikNCmBgYA0KDQpgYGB7cn0NCiMjIyMjIyBDUkVBVElORyBBIE1BUCBWSVNVQUxJWkFUSU9OICMjIyMjDQoNCiMgU2V0IE1hcCBDb25zdGFudHMNCm1pbl9sYXQgPC0gNDAgDQptYXhfbGF0IDwtIDQwLjkxDQptaW5fbG9uZyA8LSAtNzQuMTUNCm1heF9sb25nIDwtIC03My43MDA0DQoNCmdncGxvdChkYXRhLCBhZXMoeD1Mb24sIHk9TGF0KSkgKw0KICBnZW9tX3BvaW50KHNpemU9MSwgY29sb3IgPSAiYmx1ZSIpICsNCiAgICAgc2NhbGVfeF9jb250aW51b3VzKGxpbWl0cz1jKG1pbl9sb25nLCBtYXhfbG9uZykpICsNCiAgICAgIHNjYWxlX3lfY29udGludW91cyhsaW1pdHM9YyhtaW5fbGF0LCBtYXhfbGF0KSkgKw0KICAgICAgICB0aGVtZV9tYXAoKSArDQogICAgICAgICAgIGdndGl0bGUoIk5ZQyBNQVAgQkFTRUQgT04gVUJFUiBSSURFUyBEVVJJTkcgMjAxNCAoQVBSLVNFUCkiKQ0KYGBgDQoNCmBgYHtyfQ0KZ2dwbG90KGRhdGEsIGFlcyh4PUxvbiwgeT1MYXQsIGNvbG9yID0gQmFzZSkpICsNCiAgZ2VvbV9wb2ludChzaXplPTEpICsNCiAgICAgc2NhbGVfeF9jb250aW51b3VzKGxpbWl0cz1jKG1pbl9sb25nLCBtYXhfbG9uZykpICsNCiAgICAgIHNjYWxlX3lfY29udGludW91cyhsaW1pdHM9YyhtaW5fbGF0LCBtYXhfbGF0KSkgKw0KICAgICAgIHRoZW1lX21hcCgpICsNCiAgICAgICAgICBnZ3RpdGxlKCJOWUMgTUFQIEJBU0VEIE9OIFVCRVIgUklERVMgRFVSSU5HIDIwMTQgKEFQUi1TRVApIGJ5IEJBU0UiKQ0KYGBgDQo=