## Loading Libraries and Data #################### # install tidyverse if needed (delete the # to run the line): # install.packages("tidyverse") # load library library(tidyverse) # load data treats <- read_csv("~/your/path/to/file/Charlie_treats.csv") ## Viewing Data #################### # sneak peak at data glimpse(treats) # add your code to view column names # format is data[row, column] # this will show you the value in the second row, third column treats[2,3] # this will show you everything in the second row, all columns (we leave column blank) treats[2,] # this will show you all rows in the third column (we leave row blank) treats[,3] # this displays all data in the brand column treats$brand # this displays all the data in the long_lasting column treats$long_lasting # this will store all the values in the price column as a new object called cost cost <- treats$price ## Column Math #################### sum(treats$price) mean(treats$price) sd(treats$price) length(treats$price) ?length # pipe the full dataset treats to the unique function and run unique on the column flavor treats %>% unique(flavor) # put the column you want to sort by in the parentheses treats %>% arrange(desire) # sort in descending order treats %>% arrange(desc(desire)) ## Renaming, Deleting, Adding #################### # note that this code also now puts the price column before flavor treats %>% select(price, flavor) treats %>% select(-flavor, brand) # create a new dataset called "preferences" that is just flavor, desire, long_lasting and thrown_out preferences <- treats %>% select(flavor, desire, long_lasting, thrown_out) preferences # change desire to yummy # syntax rename(new = old) treats <- treats %>% rename(yummy = desire) treats # divide the price column by the quantity column # the syntax for mutate is mutate(new = formula) treats %>% mutate(value = price/quantity) # change yumminess scale by overwriting yummy column treats %>% mutate(yummy = yummy - 3) ## Conditional Filtering #################### # view Charlie's favorites ranked at 5 yumminess treats %>% filter(yummy == 5) # make a new data object that is treats Charlie really likes that includes yumminess level 4 and 5 faves <- treats %>% filter(yummy > 3) faves # treats where thrown out is no treats %>% filter(thrown_out == "no") # treats where thrown out does not equal yes treats %>% filter(thrown_out != "yes") # treats where thrown out equals no or sometimes treats %>% filter(thrown_out == "no" | thrown_out == "sometimes") ## Summarize by Group #################### # we can get a table that shows the mean and how many samples we have using summarize treats %>% summarize(mean(price), sd(price), n()) # note that the names are just the functions, but you can also have explicit names if you define them treats %>% summarize(mean = mean(price), st_dev = sd(price), total_n = n()) # find average yumminess for each flavor of treat treats %>% group_by(flavor) %>% summarize(yum_factor = mean(yummy)) # How would you pipe this to the function arrange() to put scores in order? treats %>% group_by(flavor) %>% summarize(mean = mean(yummy)) %>% arrange(_____) # Do you remember how to put the results in descending order? # count number of thrown outs per flavor treats %>% group_by(flavor, thrown_out) %>% summarize(count = n()) # how would you add a filter so that we can count up only the ones that are long lasting? treats %>% group_by(flavor, thrown_out) %>% summarize(count = n()) # now how would you find the average price for those treats? ## Exercises #################### treats treats %>% group_by(_______) %>% summarize(cheapest = mean(________)) %>% arrange(________) unique(treats$flavor) treats %>% filter(________) %>% ________(________) %>% summarize(________ = mean(________)) %>% ________(desc(________)) treats %>% ## Graphs #################### x_axis <- c(1,2,3,4,5,6) y_axis <- 5 * x_axis my_data <- data.frame(x_axis, y_axis) ggplot(data = my_data, aes(x = x_axis, y = y_axis)) + geom_line() ## Scatterplots #################### x_axis <- c(1,2,3,4,5,6) y_axis <- 5 * x_axis my_data <- data.frame(x_axis, y_axis) ggplot(data = my_data, aes(x = x_axis, y = y_axis)) + geom_point() # this is the only line that changes ggplot(data = my_data, aes(x = x_axis, y = y_axis)) + geom_point() + xlab("Time in Workshop") + ylab("Love of R") ggplot(data = my_data, aes(x = x_axis, y = y_axis, color = "red")) + geom_point() + xlab("Time in Workshop") + ylab("Love of R") + theme_classic() ggplot(data = treats, aes(x = price, y = yummy)) + geom_point() + xlab(______) + ylab(______) + theme_classic() ggplot(data = treats, aes(x = price_per_item, y = yummy)) + geom_point(position = "jitter") + xlab("$$$") + ylab("Yummm") + geom_smooth(method = "lm") + theme_classic() ## Boxplots #################### ggplot(data = ________, aes(x = ________, y = ________)) + geom_boxplot() ggplot(data = ________, aes(x = ________, y = ________, color = flavor)) + geom_boxplot() ggplot(data = ________, aes(x = ________, y = ________, fill = flavor)) + geom_boxplot() + scale_fill_manual(values = c("red", "orange", "yellow", "green", "blue", "purple", "magenta")) ## Exercises #################### ggplot()