library(tidyverse)
library(palmerpenguins)
# prepare a clean table for examples
penguins_clean <- penguins %>%
select(species, island, year, bill_length_mm, bill_depth_mm, flipper_length_mm, body_mass_g) %>%
drop_na(species)Combining Data
Joining examples with palmerpenguins
Example A — inner_join with a small metadata table
# create a small metadata table that intentionally omits one species (to show filtering)
species_meta <- tibble(
species = c("Adelie", "Gentoo"), # note: "Chinstrap" is omitted
conservation_note = c("stable", "monitor")
)
# inner_join keeps only rows that have a match in species_meta (Adelie + Gentoo only)
penguins_inner <- penguins_clean %>%
inner_join(species_meta, by = "species")
# inspect result counts per species (should not include Chinstrap)
penguins_inner %>% count(species)# A tibble: 2 × 2
species n
<chr> <int>
1 Adelie 152
2 Gentoo 124
Example B — left_join to attach island-level summaries
# compute island-level average body mass
island_summary <- penguins_clean %>%
group_by(island) %>%
summarise(island_mean_mass = mean(body_mass_g, na.rm = TRUE), .groups = "drop")
# left_join preserves all rows from penguins_clean and adds island_mean_mass
penguins_with_island <- penguins_clean %>%
left_join(island_summary, by = "island")
# verify: each row has island_mean_mass and original rows are preserved
penguins_with_island %>% slice_head(n = 6)# A tibble: 6 × 8
species island year bill_length_mm bill_depth_mm flipper_length_mm
<fct> <fct> <int> <dbl> <dbl> <int>
1 Adelie Torgersen 2007 39.1 18.7 181
2 Adelie Torgersen 2007 39.5 17.4 186
3 Adelie Torgersen 2007 40.3 18 195
4 Adelie Torgersen 2007 NA NA NA
5 Adelie Torgersen 2007 36.7 19.3 193
6 Adelie Torgersen 2007 39.3 20.6 190
# ℹ 2 more variables: body_mass_g <int>, island_mean_mass <dbl>