vectorized_function <- function(input){
intermediate_vector <- vectorized_function_1(input)
output_vector <- vectorized_function_2(intermediate_vector)
return(output_vector)
}
vectorized_function(c(1, 2, 3))Repeating Things 1 (reference card)
Approaches to repeating things in R
- Vectorize - functions that take a vector of values, do elementwise calculations, and return a vector of results
- Using Apply/Map - takes a function and applies it to each item in a list of items
- dplyr - using
mutate()for vectorized functions androwwise() |> mutate()for non-vectorized functions - Loops - perform of any kind of repetition we want
Vectorized functions
- Many functions in R will take a vector of values and return a vector of results
- If we right functions that only include vectorized functions or calculations they will automatically work on vectors
- To run these functions with a column from a data frame extract the column first
data <- data.frame(column_name = c(1, 2, 3))
vectorized_function(data$column_name)- Multiple vector inputs can be used in the same way
vectorized_function_2 <- function(input_1, input_2){
intermediate_vector_1 <- vectorized_function_1(input_1)
intermediate_vector_2 <- intermediate_vector_1 * input_2 # math is vectorize
output_vector <- vectorized_function_2(intermediate_vector_2)
return(output_vector)
}
vectorized_function_2(c(1, 2, 3), c(4, 5, 6))- If you pass a single value for either input it will be treated as a vector of only that value
function_name(c(1, 2, 3), 4)Apply
- The
apply()families of functions work even if the function you are applying is not vectorized - First argument is a vector, second argument is a function, “applies” the function to each item in the vector
sapply()returns a simplified verion of the result, often a vector
non_vectorized_function <- function(input){
if (input_1 > 2){
output_vector <- input * 2
} else {
output_vector <- input * 4
}
return(output_vector)
}
sapply(c(1, 2, 3), non_vectorized_function)Apply with multiple arguments
- If you have multiple arguments you can use
mapply() - The first argument is the function, the rest are vectors of arguments
non_vectorized_function_2 <- function(input_1, input_2){
if (input_2 == "A"){
output_vector <- input_1 * 2
} else {
output_vector <- input_1 * 4
}
return(output_vector)
}
mapply(non_vectorized_function_2, c(1, 2, 3), c("A", "B", "A"))Integrating with dplyr
- Use
mutate()to apply vectorized functions to one or more columns in a data frame and add the value as a new column
data <- data.frame(column_1 = c(1, 2, 3), column_2 = c(4, 5, 6))
data |>
mutate(new_column = vectorized_function_2(column_1, column_2))- To work with non-vecorized functions we need to add
rowwise()beforemutate()
data <- data.frame(column_1 = c(1, 2, 3), column_2 = c("A", "B", "A"))
data |>
rowwise() |>
mutate(new_column = non_vectorized_function_2(column_1, column_2))One result per group from dplyr
- We can use
group_by()andsummarize()to get one result per group - In this case the function should take one or more vectors as input and return a single value
summarizing_function <- function(input){
output_value <- sum(input)
return(output_value)
}
data <- data.frame(group_column = c("A", "A", "B", "B"), value_column = c(1, 2, 3, 4))
data |>
group_by(group_column) |>
summarize(new_column = summarizing_function(value_column))