7 Modules and Functions
7.1 Table of Contents
- Writing Robust Functions
- Function Design Principles
- Error Handling
- Functional Programming
- S3 and S4 Object Systems
- Package Structure
- Testing with testthat
- Debugging Strategies
- Code Organization
- Best Practices
7.2 Writing Robust Functions
7.2.1 Basic Function Structure
# Simple function
greet <- function(name) {
paste("Hello,", name)
}
# Function with default arguments
power <- function(x, exponent = 2) {
x^exponent
}
# Function with multiple returns
calculate_stats <- function(x) {
list(
mean = mean(x),
median = median(x),
sd = sd(x),
range = range(x)
)
}
# Function with validation
safe_divide <- function(x, y) {
if (!is.numeric(x) || !is.numeric(y)) {
stop("Both arguments must be numeric")
}
if (y == 0) {
warning("Division by zero, returning Inf")
return(Inf)
}
x / y
}7.2.2 Documentation with roxygen2
#' Calculate Circle Area
#'
#' This function calculates the area of a circle given its radius.
#'
#' @param radius Numeric value representing the circle's radius (must be positive)
#' @return Numeric value representing the circle's area
#' @export
#' @examples
#' circle_area(5)
#' circle_area(10)
#' @seealso \code{\link{circle_circumference}}
circle_area <- function(radius) {
if (radius < 0) {
stop("Radius must be non-negative")
}
pi * radius^2
}
#' @describeIn circle_area Calculate circle circumference
#' @export
circle_circumference <- function(radius) {
if (radius < 0) {
stop("Radius must be non-negative")
}
2 * pi * radius
}7.2.3 Input Validation
library(assertthat)
# Using assertthat
analyze_data <- function(data, column) {
assert_that(is.data.frame(data))
assert_that(is.string(column))
assert_that(column %in% names(data))
summary(data[[column]])
}
# Using stopifnot
analyze_data2 <- function(data, column) {
stopifnot(
is.data.frame(data),
is.character(column),
length(column) == 1,
column %in% names(data)
)
summary(data[[column]])
}
# Custom validation
validate_age <- function(age) {
if (!is.numeric(age)) {
stop("Age must be numeric", call. = FALSE)
}
if (age < 0 || age > 150) {
stop("Age must be between 0 and 150", call. = FALSE)
}
TRUE
}7.3 Function Design Principles
7.3.1 Single Responsibility Principle
# Bad: Function does too many things
process_data <- function(file) {
data <- read.csv(file)
data <- data[complete.cases(data), ]
data$new_col <- data$col1 * data$col2
model <- lm(y ~ x, data = data)
plot(data$x, data$y)
return(model)
}
# Good: Separate functions for each task
load_data <- function(file) {
read.csv(file)
}
clean_data <- function(data) {
data[complete.cases(data), ]
}
transform_data <- function(data) {
data$new_col <- data$col1 * data$col2
data
}
fit_model <- function(data) {
lm(y ~ x, data = data)
}
plot_data <- function(data) {
plot(data$x, data$y)
}
# Use pipeline
process_data <- function(file) {
load_data(file) %>%
clean_data() %>%
transform_data() %>%
fit_model()
}7.3.2 Pure Functions
# Pure function (no side effects, same input = same output)
calculate_total <- function(prices, tax_rate = 0.1) {
prices * (1 + tax_rate)
}
# Impure function (has side effects)
counter <- 0
increment_counter <- function() {
counter <<- counter + 1 # Modifies global state
counter
}
# Better: Return value instead of modifying global state
increment <- function(x) {
x + 1
}7.3.3 Function Composition
# Compose functions
add_one <- function(x) x + 1
multiply_by_two <- function(x) x * 2
square <- function(x) x^2
# Manual composition
result <- square(multiply_by_two(add_one(5))) # ((5+1)*2)^2 = 144
# Using pipe
library(magrittr)
result <- 5 %>% add_one() %>% multiply_by_two() %>% square()
# Function factory
make_multiplier <- function(n) {
function(x) x * n
}
multiply_by_three <- make_multiplier(3)
multiply_by_five <- make_multiplier(5)
multiply_by_three(10) # 30
multiply_by_five(10) # 507.4 Error Handling
7.4.1 tryCatch
# Basic tryCatch
safe_log <- function(x) {
tryCatch(
{
log(x)
},
error = function(e) {
message("Error: ", e$message)
return(NA)
},
warning = function(w) {
message("Warning: ", w$message)
return(log(x))
}
)
}
safe_log(-1) # Warning, returns NaN
safe_log("a") # Error, returns NA
# Advanced error handling
read_data_safely <- function(file) {
tryCatch(
{
data <- read.csv(file)
message("Successfully loaded ", nrow(data), " rows")
return(data)
},
error = function(e) {
stop("Failed to read file: ", e$message, call. = FALSE)
},
warning = function(w) {
message("Warning occurred: ", w$message)
return(NULL)
},
finally = {
message("Read operation completed")
}
)
}7.4.2 Custom Error Classes
# Define custom error
validation_error <- function(message, field = NULL) {
err <- list(
message = message,
field = field,
call = sys.call(-1)
)
class(err) <- c("validation_error", "error", "condition")
err
}
# Use custom error
validate_user <- function(user) {
if (!"name" %in% names(user)) {
stop(validation_error("Name is required", field = "name"))
}
if (!"age" %in% names(user)) {
stop(validation_error("Age is required", field = "age"))
}
if (user$age < 0) {
stop(validation_error("Age must be positive", field = "age"))
}
TRUE
}
# Handle custom error
tryCatch(
validate_user(list(name = "Alice", age = -5)),
validation_error = function(e) {
message("Validation failed in field '", e$field, "': ", e$message)
}
)7.4.3 purrr’s safely and possibly
library(purrr)
# safely() returns list(result, error)
safe_log <- safely(log)
safe_log(10) # $result = 2.302585, $error = NULL
safe_log("a") # $result = NULL, $error = <error>
# Use with map
results <- map(list(10, -1, "a"), safe_log)
results <- transpose(results)
results$result # List of results
results$error # List of errors
# possibly() returns default value on error
safe_log2 <- possibly(log, otherwise = NA)
safe_log2(10) # 2.302585
safe_log2("a") # NA
# quietly() captures messages, warnings, and output
quiet_log <- quietly(log)
quiet_log(-1)7.5 Functional Programming
7.5.1 Higher-Order Functions
# Functions that take functions as arguments
apply_twice <- function(f, x) {
f(f(x))
}
apply_twice(sqrt, 256) # sqrt(sqrt(256)) = 4
# Functions that return functions
make_power <- function(n) {
function(x) x^n
}
square <- make_power(2)
cube <- make_power(3)
square(5) # 25
cube(5) # 1257.5.2 Closures
# Closure: function + environment
make_counter <- function() {
count <- 0
list(
increment = function() {
count <<- count + 1
count
},
decrement = function() {
count <<- count - 1
count
},
get = function() {
count
},
reset = function() {
count <<- 0
}
)
}
counter <- make_counter()
counter$increment() # 1
counter$increment() # 2
counter$get() # 2
counter$decrement() # 1
counter$reset() # 07.5.3 Map, Reduce, Filter
library(purrr)
# Map: apply function to each element
numbers <- 1:5
map_dbl(numbers, ~ .x^2) # c(1, 4, 9, 16, 25)
# Reduce: combine elements
reduce(numbers, `+`) # 15 (sum)
reduce(numbers, `*`) # 120 (product)
# Accumulate: cumulative reduce
accumulate(numbers, `+`) # c(1, 3, 6, 10, 15)
# Filter (keep/discard)
keep(numbers, ~ .x %% 2 == 0) # c(2, 4)
discard(numbers, ~ .x %% 2 == 0) # c(1, 3, 5)
# Predicate functions
every(numbers, ~ .x > 0) # TRUE
some(numbers, ~ .x > 3) # TRUE
none(numbers, ~ .x > 10) # TRUE7.6 S3 and S4 Object Systems
7.6.1 S3 Classes (Simple)
# Create S3 object
person <- function(name, age) {
structure(
list(name = name, age = age),
class = "person"
)
}
# Create instance
alice <- person("Alice", 25)
class(alice) # "person"
# Generic function
print.person <- function(x, ...) {
cat("Person:", x$name, "(Age:", x$age, ")\n")
}
print(alice) # Uses print.person method
# Additional methods
summary.person <- function(object, ...) {
cat("Name:", object$name, "\n")
cat("Age:", object$age, "\n")
cat("Adult:", object$age >= 18, "\n")
}
# Validator
validate_person <- function(x) {
if (!is.character(x$name) || length(x$name) != 1) {
stop("name must be a single string")
}
if (!is.numeric(x$age) || length(x$age) != 1 || x$age < 0) {
stop("age must be a single positive number")
}
x
}
person <- function(name, age) {
x <- structure(
list(name = name, age = age),
class = "person"
)
validate_person(x)
}7.6.2 S4 Classes (Formal)
# Define S4 class
setClass("Employee",
slots = c(
name = "character",
age = "numeric",
salary = "numeric",
department = "character"
),
prototype = list(
name = NA_character_,
age = NA_real_,
salary = NA_real_,
department = NA_character_
)
)
# Create instance
emp <- new("Employee",
name = "Bob",
age = 30,
salary = 50000,
department = "Engineering"
)
# Accessor methods
setGeneric("getName", function(obj) standardGeneric("getName"))
setMethod("getName", "Employee", function(obj) obj@name)
setGeneric("getSalary", function(obj) standardGeneric("getSalary"))
setMethod("getSalary", "Employee", function(obj) obj@salary)
# Show method
setMethod("show", "Employee", function(object) {
cat("Employee:", object@name, "\n")
cat("Age:", object@age, "\n")
cat("Salary: $", object@salary, "\n")
cat("Department:", object@department, "\n")
})
# Validation
setValidity("Employee", function(object) {
if (object@age < 18) {
return("Age must be at least 18")
}
if (object@salary < 0) {
return("Salary must be positive")
}
TRUE
})7.7 Package Structure
7.7.1 Basic Package Layout
# Create package
library(usethis)
create_package("mypackage")
# Package structure:
# mypackage/
# ├── R/ # R code
# │ ├── functions.R
# │ └── utils.R
# ├── man/ # Documentation
# ├── tests/ # Tests
# │ └── testthat/
# ├── data/ # Data files
# ├── vignettes/ # Long-form documentation
# ├── DESCRIPTION # Package metadata
# ├── NAMESPACE # Exports
# └── README.md # README7.7.2 DESCRIPTION File
Package: mypackage
Title: My Awesome Package
Version: 0.1.0
Authors@R: person("Your", "Name", email = "you@example.com",
role = c("aut", "cre"))
Description: This package does amazing things with data.
License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.0
Imports:
dplyr (>= 1.0.0),
ggplot2
Suggests:
testthat (>= 3.0.0),
knitr,
rmarkdown
VignetteBuilder: knitr
7.7.3 Adding Functions
# Create new function file
use_r("my_function")
# In R/my_function.R
#' My Function
#'
#' @param x A numeric vector
#' @return The mean of x
#' @export
my_function <- function(x) {
mean(x, na.rm = TRUE)
}
# Generate documentation
devtools::document()
# Load package for testing
devtools::load_all()7.8 Testing with testthat
7.8.1 Setup Testing
# Setup testthat
usethis::use_testthat()
# Create test file
usethis::use_test("my_function")7.8.2 Writing Tests
# tests/testthat/test-my_function.R
library(testthat)
test_that("my_function calculates mean correctly", {
expect_equal(my_function(c(1, 2, 3)), 2)
expect_equal(my_function(c(10, 20, 30)), 20)
})
test_that("my_function handles NA values", {
expect_equal(my_function(c(1, NA, 3)), 2)
expect_false(is.na(my_function(c(NA, NA, NA))))
})
test_that("my_function validates input", {
expect_error(my_function("not numeric"))
expect_error(my_function(NULL))
})
test_that("my_function handles edge cases", {
expect_equal(my_function(c()), NaN)
expect_equal(my_function(5), 5)
})7.8.3 Common Expectations
# Equality
expect_equal(2 + 2, 4)
expect_identical(2L, 2L)
expect_equivalent(c(a = 1), 1) # Ignores attributes
# Comparison
expect_gt(5, 3) # Greater than
expect_lt(3, 5) # Less than
expect_gte(5, 5) # Greater than or equal
expect_lte(3, 5) # Less than or equal
# Types
expect_type("hello", "character")
expect_s3_class(lm(y ~ x, data = df), "lm")
expect_s4_class(obj, "Employee")
# Logical
expect_true(2 > 1)
expect_false(2 < 1)
# Errors and warnings
expect_error(log("a"))
expect_warning(log(-1))
expect_message(message("hello"))
expect_silent(2 + 2)
# Matching
expect_match("hello world", "world")
expect_match("test123", "\\d+") # Regex
# Length
expect_length(1:5, 5)
expect_named(c(a = 1, b = 2), c("a", "b"))7.8.4 Testing Best Practices
# Arrange-Act-Assert pattern
test_that("calculate_total adds tax correctly", {
# Arrange
price <- 100
tax_rate <- 0.1
# Act
result <- calculate_total(price, tax_rate)
# Assert
expect_equal(result, 110)
})
# Test fixtures
test_that("function works with sample data", {
# Load test data
test_data <- read.csv("tests/testthat/fixtures/sample_data.csv")
result <- my_analysis(test_data)
expect_equal(nrow(result), 100)
})
# Snapshot testing
test_that("output matches snapshot", {
expect_snapshot(my_complex_function(data))
})7.9 Debugging Strategies
7.9.1 Basic Debugging Tools
# Print debugging
my_function <- function(x) {
print(paste("x =", x)) # Debug print
result <- x * 2
print(paste("result =", result)) # Debug print
result
}
# browser() - interactive debugger
my_function <- function(x) {
y <- x * 2
browser() # Execution stops here
z <- y + 1
z
}
# Commands in browser:
# n - next line
# s - step into function
# c - continue execution
# Q - quit
# ls() - list variables7.9.2 Advanced Debugging
# debug() - debug entire function
debug(my_function)
my_function(5) # Enters debugger
undebug(my_function)
# debugonce() - debug once
debugonce(my_function)
# trace() - add code to function
trace(mean, quote(print(x)))
mean(1:5) # Prints x before calculating
untrace(mean)
# recover() - debug on error
options(error = recover)
my_buggy_function() # Opens debugger on error
options(error = NULL) # Reset7.9.3 Profiling
# Profile code
library(profvis)
profvis({
data <- data.frame(x = rnorm(10000), y = rnorm(10000))
model <- lm(y ~ x, data = data)
predictions <- predict(model, data)
})
# Benchmark functions
library(microbenchmark)
microbenchmark(
base = apply(matrix(1:1000000, ncol = 100), 1, sum),
optimized = rowSums(matrix(1:1000000, ncol = 100)),
times = 100
)7.10 Code Organization
7.10.1 Project Structure
# Recommended structure
# project/
# ├── R/ # R code
# │ ├── 01_load_data.R
# │ ├── 02_clean_data.R
# │ ├── 03_analyze.R
# │ └── utils.R
# ├── data/ # Data files
# │ ├── raw/
# │ └── processed/
# ├── output/ # Results
# │ ├── figures/
# │ └── tables/
# ├── tests/ # Tests
# ├── docs/ # Documentation
# ├── config.R # Configuration
# ├── main.R # Main script
# └── README.md7.10.2 Sourcing Files
# Source all R files in directory
source_files <- list.files("R", pattern = "\\.R$", full.names = TRUE)
invisible(lapply(source_files, source))
# Or use a package
library(here)
source(here("R", "utils.R"))7.11 Best Practices
7.11.1 Code Style
# Use styler for automatic formatting
library(styler)
style_file("R/my_file.R")
style_dir("R/")
# Check style with lintr
library(lintr)
lint("R/my_file.R")7.11.2 Performance Tips
# Pre-allocate vectors
# Slow
result <- c()
for (i in 1:10000) {
result <- c(result, i^2)
}
# Fast
result <- numeric(10000)
for (i in 1:10000) {
result[i] <- i^2
}
# Best: vectorize
result <- (1:10000)^2
# Use appropriate data structures
# Slow: repeated subsetting
for (i in 1:nrow(df)) {
df[i, "new_col"] <- df[i, "old_col"] * 2
}
# Fast: vectorized
df$new_col <- df$old_col * 27.12 Summary
7.12.1 Skills Learned
- ✅ Writing robust, documented functions
- ✅ Error handling and validation
- ✅ Functional programming patterns
- ✅ S3 and S4 object systems
- ✅ Package structure and development
- ✅ Comprehensive testing with testthat
- ✅ Debugging and profiling
- ✅ Code organization and style
7.12.2 Next Steps
- Practice: Write your own R package
- Test: Achieve 80%+ code coverage
- Share: Publish on GitHub or CRAN
- Learn: Continue with
6_R_Developer_Tools.md
Continue to Developer Tools! 🚀