colMeans(swiss) first_and_last <- function(x) { first <- x[1] last <- x[length(x)] return(c("first" = first, "last" = last)) } first_and_last(c(4, 3, 1, 8)) first_and_last(7) first_and_last(numeric(0)) smarter_first_and_last <- function(x) { if(length(x) == 0L) { # specify integers with L stop("The input has no length!") #<< } else { first <- x[1] last <- x[length(x)] return(c("first" = first, "last" = last)) } } smarter_first_and_last(numeric(0)) smarter_first_and_last(c(4, 3, 1, 8)) smarter_first_and_last ## NAME <- function(ARGUMENT1, ARGUMENT2=DEFAULT){ ## BODY ## return(OUTPUT) ## } quantile_report <- function(x, na.rm = FALSE) { quants <- quantile(x, na.rm = na.rm, probs = c(0.01, 0.05, 0.10, 0.25, 0.5, 0.75, 0.90, 0.95, 0.99)) names(quants) <- c("Bottom 1%", "Bottom 5%", "Bottom 10%", "Bottom 25%", "Median", "Top 25%", "Top 10%", "Top 5%", "Top 1%") return(quants) } quantile_report(rnorm(10000)) ## lapply(swiss, FUN = quantile_report) lapply(swiss, FUN = quantile_report)[1:3] sapply(swiss, FUN = quantile_report) apply(swiss, MARGIN = 2, FUN = quantile_report) library(dplyr); library(readr) file_list <- list.files("./example_data/") file_paths <- paste0("./example_data/", file_list) data_names <- stringr::str_remove(file_list, ".csv") data_list <- vector("list", length(file_list)) names(data_list) <- data_names for (i in seq_along(file_list)){ data_list[[ data_names[i] ]] <- read_csv(file_paths[i]) } complete_data <- bind_rows(data_list) head(complete_data, 3) complete_data <- lapply(file_paths, read_csv) %>% bind_rows() head(complete_data, 3) library(vroom) complete_data <- vroom(file_paths) head(complete_data, 3) loop_vec <- numeric(5) # Preallocation! for(x in seq_along(loop_vec)){ # Change x to 1,2,3,4,5 loop_vec[x] <- x^2 # Write x squared to loop_vec } loop_vec # No preallocation, just iterate over 1:5 and assign output! apply_vec <- sapply(1:5, function(x){x^2}) apply_vec bucket <- function(x, quants = c(0.333, 0.667)) { #<< # set low extreme, quantile points, high extreme new_breaks <- c(min(x)-1, quantile(x, probs = quants), max(x)+1) #<< # labels = FALSE will return integer codes instead of ranges return(cut(x, breaks = new_breaks, labels = FALSE)) } dat <- rnorm(100) dat_quants <- c(0.05, 0.25, 0.5, 0.75, 0.95) bucketed_dat <- bucket(dat, quants = dat_quants) plot(x = bucketed_dat, y = dat, main = "Buckets and values", pch = 16) abline(h = quantile(dat, dat_quants), lty = "dashed", col = "red") (school_data <- data.frame(school = letters[1:10], pr_passing_exam=c(0.78, 0.55, 0.91, -1, 0.88, 0.81, 0.90, 0.76, 99, 99), pr_free_lunch = c(0.33, 99, 0.25, 0.05, 0.12, 0.09, 0.22, -13, 0.21, 99))) remove_extremes <- function(x, low, high) { x_no_low <- ifelse(x < low, NA, x) x_no_low_no_high <- ifelse(x_no_low > high, NA, x) return(x_no_low_no_high) } remove_extremes(school_data$pr_passing_exam, low = 0, high = 1) library(dplyr) school_data %>% mutate(across(-school, ~ remove_extremes(x = ., low = 0, high = 1))) swiss %>% select("Fertility", "Catholic") %>% head(2) swiss %>% summarize(across(everything(), ~ mean(., na.rm=TRUE) / sd(., na.rm=TRUE))) ## lapply(swiss, function(x) mean(x, na.rm = TRUE) / sd(x, na.rm = TRUE)) # This is me sneakily restricting output to 5 values lapply(swiss, function(x) mean(x, na.rm = TRUE) / sd(x, na.rm = TRUE))[1:5] ## library(gapminder); library(ggplot2) ## ggplot(gapminder %>% filter(country == "Afghanistan"), ## aes(x = year, y = pop / 1000000)) + ## geom_line(color = "firebrick") + ## xlab(NULL) + ylab("Population (millions)") + ## ggtitle("Population of Afghanistan since 1952") + ## theme_minimal() + ## theme(plot.title = element_text(hjust = 0, size = 20)) library(gapminder); library(ggplot2) ggplot(gapminder %>% filter(country == "Afghanistan"), aes(x = year, y = pop / 1000000)) + geom_line(color = "firebrick") + xlab(NULL) + ylab("Population (millions)") + ggtitle("Population of Afghanistan since 1952") + theme_minimal() + theme(plot.title = element_text(hjust = 0, size = 20)) ggplot(gapminder %>% filter(country == "Peru"), aes(x = year, y = lifeExp)) + geom_line(color = "firebrick") + xlab(NULL) + ylab("Life expectancy") + ggtitle("Life expectancy in Peru since 1952") + theme_minimal() + theme(plot.title = element_text(hjust = 0, size = 20)) gapminder_lifeplot <- function(cntry) { ggplot(gapminder %>% filter(country == cntry), #<< aes(x = year, y = lifeExp)) + geom_line(color = "firebrick") + xlab(NULL) + ylab("Life expectancy") + theme_minimal() + ggtitle(paste0("Life expectancy in ", cntry, " since 1952")) + #<< theme(plot.title = element_text(hjust = 0, size = 20)) } gapminder_lifeplot(cntry = "Turkey") gapminder_lifeplot(cntry = "Rwanda") y_axis_label <- c("lifeExp" = "Life expectancy", "pop" = "Population (millions)", "gdpPercap" = "GDP per capita, USD") title_text <- c("lifeExp" = "Life expectancy in ", "pop" = "Population of ", "gdpPercap" = "GDP per capita in ") # example use: y_axis_label["pop"] title_text["pop"] gapminder_plot <- function(cntry, yvar) { y_axis_label <- c("lifeExp" = "Life expectancy", "pop" = "Population (millions)", "gdpPercap" = "GDP per capita, USD")[yvar] #<< title_text <- c("lifeExp" = "Life expectancy in ", "pop" = "Population of ", "gdpPercap" = "GDP per capita in ")[yvar] #<< ggplot(gapminder %>% filter(country == cntry) %>% #<< mutate(pop = pop / 1000000), aes_string(x = "year", y = yvar)) + #<< geom_line(color = "firebrick") + ggtitle(paste0(title_text, cntry, " since 1952")) + #<< xlab(NULL) + ylab(y_axis_label) + theme_minimal() + theme(plot.title = element_text(hjust = 0, size = 20)) } gapminder_plot(cntry = "Turkey", yvar = "pop") gapminder_plot(cntry = "Rwanda", yvar = "gdpPercap") us_ca <- c("Canada", "United States") gapminder %>% filter(country %in% us_ca) %>% distinct(country) %>% head(2) gapminder %>% filter(!country %in% us_ca) %>% distinct(country) %>% head(2) `%!in%` <- Negate(`%in%`) gapminder %>% filter(country %!in% us_ca) %>% # Our new operator! #<< distinct(country) %>% head(2) ## debug(gapminder_plot) ## undebug(gapminder_plot) ## render_and_print_slides <- function(week){ ## week_dir <- paste0(getwd(), "/Lectures/", "Week", week, "/") ## current_rmd <- paste0(week_dir, stringr::str_subset(list.files(week_dir), ## "^CSSS508_Week.*\\.Rmd$")) ## rmarkdown::render(current_rmd, encoding = "UTF-8") ## current_html <- stringr::str_replace(current_rmd, "\\.Rmd", "\\.html") ## new_pdf_file <- stringr::str_replace(current_html, "\\.html", "\\.pdf") ## new_r_script <- stringr::str_replace(current_html, "\\.html", "\\.R") ## message("Slides rendered, waiting 5 seconds.") ## Sys.sleep(5) ## message("Purling slides.") ## knitr::purl(input = current_rmd, output = new_r_script, documentation = 0) ## message("Printing from Chrome.") ## pagedown::chrome_print(current_html, format="pdf") ## message(paste0("Printing complete at ", week_dir)) ## }