## Loading Libraries and Data ####################

# install tidyverse if needed (delete the # to run the line):
# install.packages("tidyverse")

# load library
library(tidyverse)

# load data
treats <- read_csv("~/your/path/to/file/Charlie_treats.csv")


## Viewing Data ####################
# sneak peak at data
glimpse(treats)

# add your code to view column names


# format is data[row, column]
# this will show you the value in the second row, third column
treats[2,3]

# this will show you everything in the second row, all columns (we leave column blank)
treats[2,]

# this will show you all rows in the third column (we leave row blank)
treats[,3]

# this displays all data in the brand column
treats$brand

# this displays all the data in the long_lasting column
treats$long_lasting

# this will store all the values in the price column as a new object called cost
cost <- treats$price

## Column Math ####################
sum(treats$price)
mean(treats$price)
sd(treats$price) 
length(treats$price)

?length

# pipe the full dataset treats to the unique function and run unique on the column flavor
treats %>% 
  unique(flavor)

# put the column you want to sort by in the parentheses
treats %>% 
  arrange(desire)
# sort in descending order
treats %>% 
  arrange(desc(desire))

## Renaming, Deleting, Adding ####################
# note that this code also now puts the price column before flavor
treats %>%
  select(price, flavor)

treats %>% 
  select(-flavor, brand)

# create a new dataset called "preferences" that is just flavor, desire, long_lasting and thrown_out
preferences <- treats %>%
  select(flavor, desire, long_lasting, thrown_out)
preferences

# change desire to yummy
# syntax rename(new = old)
treats <- treats %>%
  rename(yummy = desire)
treats

# divide the price column by the quantity column
# the syntax for mutate is mutate(new = formula)
treats %>%
  mutate(value = price/quantity)

# change yumminess scale by overwriting yummy column
treats %>%
  mutate(yummy = yummy - 3)

## Conditional Filtering ####################
# view Charlie's favorites ranked at 5 yumminess
treats %>%
  filter(yummy == 5)

# make a new data object that is treats Charlie really likes that includes yumminess level 4 and 5
faves <- treats %>% 
  filter(yummy > 3)
faves

# treats where thrown out is no
treats %>% filter(thrown_out == "no")

# treats where thrown out does not equal yes
treats %>% 
  filter(thrown_out != "yes") 
# treats where thrown out equals no or sometimes
treats %>% 
  filter(thrown_out == "no" | thrown_out == "sometimes")

## Summarize by Group ####################
# we can get a table that shows the mean and how many samples we have using summarize
treats %>%
  summarize(mean(price), sd(price), n())
# note that the names are just the functions, but you can also have explicit names if you define them 
treats %>%
  summarize(mean = mean(price), st_dev = sd(price), total_n = n())

# find average yumminess for each flavor of treat
treats %>%
  group_by(flavor) %>%
  summarize(yum_factor = mean(yummy))
# How would you pipe this to the function arrange() to put scores in order?
treats %>%
  group_by(flavor) %>%
  summarize(mean = mean(yummy)) %>%
  arrange(_____)
# Do you remember how to put the results in descending order? 

# count number of thrown outs per flavor 
treats %>%
  group_by(flavor, thrown_out) %>%
  summarize(count = n())
# how would you add a filter so that we can count up only the ones that are long lasting? 
treats %>%
  group_by(flavor, thrown_out) %>%
  summarize(count = n()) 
# now how would you find the average price for those treats?
## Exercises ####################
treats

treats %>%
  group_by(_______) %>%
  summarize(cheapest = mean(________)) %>%
  arrange(________)

unique(treats$flavor)

treats %>%
  filter(________) %>%
  ________(________) %>%
  summarize(________ = mean(________)) %>%
  ________(desc(________))

treats %>%
  
## Graphs ####################
x_axis <- c(1,2,3,4,5,6)
y_axis <- 5 * x_axis

my_data <- data.frame(x_axis, y_axis)

ggplot(data = my_data, aes(x = x_axis, y = y_axis)) +
  geom_line()
## Scatterplots ####################
x_axis <- c(1,2,3,4,5,6)
y_axis <- 5 * x_axis

my_data <- data.frame(x_axis, y_axis)

ggplot(data = my_data, aes(x = x_axis, y = y_axis)) +
  geom_point() # this is the only line that changes

ggplot(data = my_data, aes(x = x_axis, y = y_axis)) +
  geom_point() +
  xlab("Time in Workshop") +
  ylab("Love of R")

ggplot(data = my_data, aes(x = x_axis, y = y_axis, color = "red")) +
  geom_point() +
  xlab("Time in Workshop") +
  ylab("Love of R") +
  theme_classic()

ggplot(data = treats, aes(x = price, y = yummy)) +
  geom_point() +
  xlab(______) +
  ylab(______) +
  theme_classic()

ggplot(data = treats, aes(x = price_per_item, y = yummy)) +
  geom_point(position = "jitter") +
  xlab("$$$") +
  ylab("Yummm") +
  geom_smooth(method = "lm") +
  theme_classic()

## Boxplots ####################
ggplot(data = ________, aes(x = ________, y = ________)) +
  geom_boxplot() 

ggplot(data = ________, aes(x = ________, y = ________, color = flavor)) +
  geom_boxplot() 

ggplot(data = ________, aes(x = ________, y = ________, fill = flavor)) +
  geom_boxplot() +
  scale_fill_manual(values = c("red", "orange", "yellow", "green", "blue", "purple", "magenta"))

## Exercises ####################
ggplot()