<- function(input){
vectorized_function <- vectorized_function_1(input)
intermediate_vector <- vectorized_function_2(intermediate_vector)
output_vector return(output_vector)
}
vectorized_function(c(1, 2, 3))
Repeating Things 1 (reference card)
Approaches to repeating things in R
- Vectorize - functions that take a vector of values, do elementwise calculations, and return a vector of results
- Using Apply/Map - takes a function and applies it to each item in a list of items
- dplyr - using
mutate()
for vectorized functions androwwise() |> mutate()
for non-vectorized functions - Loops - perform of any kind of repetition we want
Vectorized functions
- Many functions in R will take a vector of values and return a vector of results
- If we right functions that only include vectorized functions or calculations they will automatically work on vectors
- To run these functions with a column from a data frame extract the column first
<- data.frame(column_name = c(1, 2, 3))
data vectorized_function(data$column_name)
- Multiple vector inputs can be used in the same way
<- function(input_1, input_2){
vectorized_function_2 <- vectorized_function_1(input_1)
intermediate_vector_1 <- intermediate_vector_1 * input_2 # math is vectorize
intermediate_vector_2 <- vectorized_function_2(intermediate_vector_2)
output_vector return(output_vector)
}
vectorized_function_2(c(1, 2, 3), c(4, 5, 6))
- If you pass a single value for either input it will be treated as a vector of only that value
function_name(c(1, 2, 3), 4)
Apply
- The
apply()
families of functions work even if the function you are applying is not vectorized - First argument is a vector, second argument is a function, “applies” the function to each item in the vector
sapply()
returns a simplified verion of the result, often a vector
<- function(input){
non_vectorized_function if (input_1 > 2){
<- input * 2
output_vector else {
} <- input * 4
output_vector
}return(output_vector)
}
sapply(c(1, 2, 3), non_vectorized_function)
Apply with multiple arguments
- If you have multiple arguments you can use
mapply()
- The first argument is the function, the rest are vectors of arguments
<- function(input_1, input_2){
non_vectorized_function_2 if (input_2 == "A"){
<- input_1 * 2
output_vector else {
} <- input_1 * 4
output_vector
}return(output_vector)
}
sapply(non_vectorized_function_2, c(1, 2, 3), c("A", "B", "A"))
Integrating with dplyr
- Use
mutate()
to apply vectorized functions to one or more columns in a data frame and add the value as a new column
<- data.frame(column_1 = c(1, 2, 3), column_2 = c(4, 5, 6))
data |>
data mutate(new_column = vectorized_function_2(column_1, column_2))
- To work with non-vecorized functions we need to add
rowwise()
beforemutate()
<- data.frame(column_1 = c(1, 2, 3), column_2 = c("A", "B", "A"))
data |>
data rowwise() |>
mutate(new_column = non_vectorized_function_2(column_1, column_2))
One result per group from dplyr
- We can use
group_by()
andsummarize()
to get one result per group - In this case the function should take one or more vectors as input and return a single value
<- function(input){
summarizing_function <- sum(input)
output_value return(output_value)
}
<- data.frame(group_column = c("A", "A", "B", "B"), value_column = c(1, 2, 3, 4))
data |>
data group_by(group_column) |>
summarize(new_column = summarizing_function(value_column))