Load relevant libraries

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.5     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.0.1     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(p8105.datasets)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
rest<- p8105.datasets::rest_inspec
View(rest)

Which borough has the highest average score?

rest %>%
  select(boro, score)%>%
  group_by(boro)%>%
  summarise(avg_score = mean(score, na.rm=TRUE))%>%
  mutate(boro = fct_reorder(boro, avg_score))%>%
  plot_ly(x = ~boro, y = ~avg_score, color = ~boro, type= "bar", colors = "viridis")

What types of restaurants are most common in Manhattan?

rest %>%
  select(boro, cuisine_description)%>%
  filter(boro =="MANHATTAN")%>%
  count(cuisine_description)%>%
  mutate(cuisine_description = replace(cuisine_description, cuisine_description=="Latin (Cuban, Dominican, Puerto Rican, South & Central American)", "Latin"),
    cuisine_description = fct_reorder(cuisine_description, -n))%>%
  top_n(15)%>%
  plot_ly(x = ~cuisine_description, y = ~n, color = ~cuisine_description, type = "bar", colors = "viridis")%>%
  layout(xaxis = list(title = "Restaurant Type"), yaxis = list(title = 'Count'), showlegend=FALSE)
## Selecting by n

Is there any trend between time of year inspection was performed and inspection score?

rest%>%
  mutate(inspection_date = as.Date(inspection_date))%>%
  mutate(month = month(inspection_date))%>%
  mutate(month = replace(month, month == "1", "January"),
         month = replace(month, month == "2", "February"),
         month = replace(month, month == "3", "March"),
         month = replace(month, month == "4", "April"),
         month = replace(month, month == "5", "May"),
         month = replace(month, month == "6", "June"),
         month = replace(month, month == "7", "July"),
         month = replace(month, month == "8", "August"),
         month = replace(month, month == "9", "September"),
         month = replace(month, month == "10", "October"),
         month = replace(month, month == "11", "November"),
         month = replace(month, month == "12", "December"), 
         month = factor(month, levels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")))%>%
  group_by(month)%>%
  summarise(avg_score = mean(score, na.rm=TRUE))%>%
  select(month, avg_score)%>%
  plot_ly(x =~month, y=~avg_score, type = "scatter", colors = "viridis")%>%
  add_trace(y = ~avg_score, mode = 'lines', color = "viridis")%>%
  layout(xaxis = list(title = "Month"), yaxis = list(title = 'Average Inspection Score'), showlegend=FALSE)
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode

Summer months tend to have higher average inspection scores. It’s possible that people are happier/more positive in the summer and tend to score restaurants more positively also.

What is the grade distribution for each Borough?

rest%>%
  mutate(grade = factor(grade, levels = c("A", "B", "C", "Z", "P", "Not Yet Graded")),
         boro = replace(boro, boro=="MANHATTAN", "Manhattan"),
         boro = replace(boro, boro=="QUEENS", "Queens"),
         boro = replace(boro, boro=="BROOKLYN", "Brooklyn"),
         boro = replace(boro, boro=="BRONX", "Bronx"), 
         boro = replace(boro, boro=="STATEN ISLAND", "Staten Island"))%>%
  group_by(boro, grade)%>%
  filter(boro!="Missing")%>%
  count(grade)%>%
  plot_ly(x = ~boro, y=~n, name =~grade, color = ~grade, type = "bar", colors="viridis")%>%
  layout(xaxis = list(title = "Borough"), yaxis = list(title = 'Count'), legend = list(title = list(text = "<b> Borough <b>")), barmode = 'stack')