Load relevant libraries

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.5     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.0.1     ✓ forcats 0.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

library(p8105.datasets)
library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

rest<- p8105.datasets::rest_inspec
View(rest)

Which borough has the highest average score?

rest %>%
  select(boro, score)%>%
  group_by(boro)%>%
  summarise(avg_score = mean(score, na.rm=TRUE))%>%
  mutate(boro = fct_reorder(boro, avg_score))%>%
  plot_ly(x = ~boro, y = ~avg_score, color = ~boro, type= "bar", colors = "viridis")

What types of restaurants are most common in Manhattan?

rest %>%
  select(boro, cuisine_description)%>%
  filter(boro =="MANHATTAN")%>%
  count(cuisine_description)%>%
  mutate(cuisine_description = replace(cuisine_description, cuisine_description=="Latin (Cuban, Dominican, Puerto Rican, South & Central American)", "Latin"),
    cuisine_description = fct_reorder(cuisine_description, -n))%>%
  top_n(15)%>%
  plot_ly(x = ~cuisine_description, y = ~n, color = ~cuisine_description, type = "bar", colors = "viridis")%>%
  layout(xaxis = list(title = "Restaurant Type"), yaxis = list(title = 'Count'), showlegend=FALSE)

## Selecting by n

Is there any trend between time of year inspection was performed and inspection score?

rest%>%
  mutate(inspection_date = as.Date(inspection_date))%>%
  mutate(month = month(inspection_date))%>%
  mutate(month = replace(month, month == "1", "January"),
         month = replace(month, month == "2", "February"),
         month = replace(month, month == "3", "March"),
         month = replace(month, month == "4", "April"),
         month = replace(month, month == "5", "May"),
         month = replace(month, month == "6", "June"),
         month = replace(month, month == "7", "July"),
         month = replace(month, month == "8", "August"),
         month = replace(month, month == "9", "September"),
         month = replace(month, month == "10", "October"),
         month = replace(month, month == "11", "November"),
         month = replace(month, month == "12", "December"), 
         month = factor(month, levels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")))%>%
  group_by(month)%>%
  summarise(avg_score = mean(score, na.rm=TRUE))%>%
  select(month, avg_score)%>%
  plot_ly(x =~month, y=~avg_score, type = "scatter", colors = "viridis")%>%
  add_trace(y = ~avg_score, mode = 'lines', color = "viridis")%>%
  layout(xaxis = list(title = "Month"), yaxis = list(title = 'Average Inspection Score'), showlegend=FALSE)

## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode

Summer months tend to have higher average inspection scores. It’s possible that people are happier/more positive in the summer and tend to score restaurants more positively also.

What is the grade distribution for each Borough?

rest%>%
  mutate(grade = factor(grade, levels = c("A", "B", "C", "Z", "P", "Not Yet Graded")),
         boro = replace(boro, boro=="MANHATTAN", "Manhattan"),
         boro = replace(boro, boro=="QUEENS", "Queens"),
         boro = replace(boro, boro=="BROOKLYN", "Brooklyn"),
         boro = replace(boro, boro=="BRONX", "Bronx"), 
         boro = replace(boro, boro=="STATEN ISLAND", "Staten Island"))%>%
  group_by(boro, grade)%>%
  filter(boro!="Missing")%>%
  count(grade)%>%
  plot_ly(x = ~boro, y=~n, name =~grade, color = ~grade, type = "bar", colors="viridis")%>%
  layout(xaxis = list(title = "Borough"), yaxis = list(title = 'Count'), legend = list(title = list(text = "<b> Borough <b>")), barmode = 'stack')

Plotly

Load relevant libraries

Which borough has the highest average score?

What types of restaurants are most common in Manhattan?

Is there any trend between time of year inspection was performed and inspection score?

What is the grade distribution for each Borough?