Sampling recordings - Multple Time Periods

This brief vignette shows an example of a basic workflow selecting recordings for different times of day by site and year.

First we’ll load the packages we want to work with

 library(ARUtools)
 library(dplyr)
 library(purrr)
 library(tidyr)
 library(glue)
 library(lubridate)

Next we’ll prepare our metadata on the recordings, by cleaning, adding site-level information and calculating the time to sunrise/sunset for each file. We’ll also define recordings as either ‘early’ (occurring before 6am) or ‘late’ (occurring after 6am).

s <- clean_site_index(example_sites_clean,
 name_date = c("date_time_start", "date_time_end")
)
m <- clean_metadata(project_files = example_files) |>
 add_sites(s) |>
 calc_sun() |>
 mutate(
 time_period = if_else(hour(date_time) < 6, "early", "late"),
 year = year(date)
 )
 #> Extracting ARU info...
 #> Extracting Dates and Times...
 #> Joining by columns `date_time_start` and `date_time_end`
m
 #> # A tibble: 42 ×ばつ 18
 #> file_name type path aru_id manufacturer model aru_type site_id tz_offset
 #> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> 
 #> 1 P01_1_202005... wav a_BA... BARLT... Frontier La... BAR-... BARLT P01_1 -0400 
 #> 2 P01_1_202005... wav a_BA... BARLT... Frontier La... BAR-... BARLT P01_1 -0400 
 #> 3 P02_1_202005... wav a_S4... S4A01... Wildlife Ac... Song... SongMet... P02_1 <NA> 
 #> 4 P02_1_202005... wav a_S4... S4A01... Wildlife Ac... Song... SongMet... P02_1 <NA> 
 #> # i 38 more rows
 #> # i 9 more variables: date_time <dttm>, date <date>, longitude <dbl>,
 #> # latitude <dbl>, tz <chr>, t2sr <dbl>, t2ss <dbl>, time_period <chr>,
 #> # year <dbl>

Time to do some sampling!

First we define the selection parameters for each time frame we’re interested in sampling. This might be "dawn" and "dusk", or in this example, "early" and "late" morning.

This function will also simulate the selection weights so we can see what we’ve defined.

p <- list(
 "early" = sim_selection_weights(min_range = c(-70, 240)),
 "late" = sim_selection_weights(min_range = c(100, 300), min_mean = 200)
)
p
 #> $early
 #> $early$min_range
 #> [1] -70 240
 #> 
 #> $early$min_mean
 #> [1] 30
 #> 
 #> $early$min_sd
 #> [1] 60
 #> 
 #> $early$day_range
 #> [1] 120 201
 #> 
 #> $early$day_mean
 #> [1] 161
 #> 
 #> $early$day_sd
 #> [1] 20
 #> 
 #> $early$offset
 #> [1] 0
 #> 
 #> $early$return_log
 #> [1] TRUE
 #> 
 #> $early$selection_fun
 #> [1] "norm"
 #> 
 #> 
 #> $late
 #> $late$min_range
 #> [1] 100 300
 #> 
 #> $late$min_mean
 #> [1] 200
 #> 
 #> $late$min_sd
 #> [1] 60
 #> 
 #> $late$day_range
 #> [1] 120 201
 #> 
 #> $late$day_mean
 #> [1] 161
 #> 
 #> $late$day_sd
 #> [1] 20
 #> 
 #> $late$offset
 #> [1] 0
 #> 
 #> $late$return_log
 #> [1] TRUE
 #> 
 #> $late$selection_fun
 #> [1] "norm"

Now we can calculate selection weights

Here we’ll calculate a separate set of selection weights for early and late recordings in each year. Then we’ll group recordings by site, year, and time period.

w <- m |>
 nest(data = c(-time_period, -year)) |>
 mutate(
 params = p,
 sel = map2(data, params, calc_selection_weights)
 ) |>
 unnest(sel) |>
 select(-"data", -"params") |>
 mutate(selection_group = glue("{site_id}_{year}_{time_period}"))
w
 #> # A tibble: 21 ×ばつ 27
 #> time_period year file_name type path aru_id manufacturer model aru_type
 #> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> 
 #> 1 early 2020 P01_1_202005... wav a_BA... BARLT... Frontier La... BAR-... BARLT 
 #> 2 early 2020 P02_1_202005... wav a_S4... S4A01... Wildlife Ac... Song... SongMet...
 #> 3 early 2020 P06_1_202005... wav a_BA... BARLT... Frontier La... BAR-... BARLT 
 #> 4 early 2020 P07_1_202005... wav a_S4... S4A01... Wildlife Ac... Song... SongMet...
 #> # i 17 more rows
 #> # i 18 more variables: site_id <chr>, tz_offset <chr>, date_time <dttm>,
 #> # date <date>, longitude <dbl>, latitude <dbl>, tz <chr>, t2sr <dbl>,
 #> # t2ss <dbl>, doy <dbl>, psel_by <chr>, psel_min <dbl>, psel_doy <dbl>,
 #> # psel <dbl>, psel_scaled <dbl>, psel_std <dbl>, psel_normalized <dbl>,
 #> # selection_group <glue>

This w data sets contains the original sampling recordings, but now also new columns containing various measures of the probability of selection.

We’ll define the number of samples we’d like to have.

n <- w |>
 summarize(n_recordings = n(), .by = c("selection_group", "time_period")) |>
 mutate(
 n = if_else(time_period == "early", 5, 2),
 n_os = if_else(time_period == "early", floor(n * 1 / 3), floor(n * 1 / 4)),
 n_os = pmax(0, pmin(n_recordings - n, round(n / 3))),
 n = pmin(n, n_recordings)
 )
n
 #> # A tibble: 7 ×ばつ 5
 #> selection_group time_period n_recordings n n_os
 #> <glue> <chr> <int> <dbl> <dbl>
 #> 1 P01_1_2020_early early 3 3 0
 #> 2 P02_1_2020_early early 3 3 0
 #> 3 P06_1_2020_early early 3 3 0
 #> 4 P07_1_2020_early early 3 3 0
 #> # i 3 more rows

And finally sample the recordings!

g <- sample_recordings(w, n,
 col_site_id = selection_group,
 col_sel_weights = psel_normalized
)
g
 #> Summary of Site Counts: 
 #> 
 #> siteuse by total: 
 #> Base Over
 #> total 19 2
 #> 
 #> siteuse by stratum: 
 #> Base Over
 #> P01_1_2020_early 3 0
 #> P02_1_2020_early 3 0
 #> P03_1_2020_late 2 1
 #> P06_1_2020_early 3 0
 #> P07_1_2020_early 3 0
 #> P08_1_2020_late 2 1
 #> P09_1_2020_early 3 0

The recordings selected for sampling...

g$sites_base
 #> Simple feature collection with 19 features and 35 fields
 #> Geometry type: POINT
 #> Dimension: XY
 #> Bounding box: xmin: 124 ymin: -53.21667 xmax: 132 ymax: 238.3167
 #> Projected CRS: WGS 84 / World Mercator
 #> First 10 features:
 #> siteID siteuse replsite lon_WGS84 lat_WGS84 stratum wgt ip
 #> 1 sample-01 Base None 0.001113911 -4.812753e-04 P01_1_2020_early 1 1
 #> 2 sample-02 Base None 0.001113911 -4.812753e-04 P01_1_2020_early 1 1
 #> 3 sample-03 Base None 0.001113911 -4.812753e-04 P01_1_2020_early 1 1
 #> 4 sample-04 Base None 0.001122894 -4.273146e-04 P02_1_2020_early 1 1
 #> 5 sample-05 Base None 0.001122894 -4.273146e-04 P02_1_2020_early 1 1
 #> 6 sample-06 Base None 0.001122894 -4.273146e-04 P02_1_2020_early 1 1
 #> 7 sample-07 Base None 0.001167810 3.240657e-05 P06_1_2020_early 1 1
 #> 8 sample-08 Base None 0.001167810 3.240657e-05 P06_1_2020_early 1 1
 #> 9 sample-09 Base None 0.001167810 3.240657e-05 P06_1_2020_early 1 1
 #> 10 sample-10 Base None 0.001167810 -3.701886e-04 P07_1_2020_early 1 1
 #> caty aux time_period year file_name type
 #> 1 None 0.001 early 2020 P01_1_20200503T052000-0400_ARU.wav wav
 #> 2 None 0.001 early 2020 P01_1_20200503T052000-0400_ARU.wav wav
 #> 3 None 0.001 early 2020 P01_1_20200503T052000-0400_ARU.wav wav
 #> 4 None 0.001 early 2020 P02_1_20200504T052500_ARU.wav wav
 #> 5 None 0.001 early 2020 P02_1_20200504T052500_ARU.wav wav
 #> 6 None 0.001 early 2020 P02_1_20200504T052500_ARU.wav wav
 #> 7 None 0.001 early 2020 P06_1_20200509T052000-0400_ARU.wav wav
 #> 8 None 0.001 early 2020 P06_1_20200509T052000-0400_ARU.wav wav
 #> 9 None 0.001 early 2020 P06_1_20200509T052000-0400_ARU.wav wav
 #> 10 None 0.001 early 2020 P07_1_20200509T052500_ARU.wav wav
 #> path aru_id
 #> 1 a_BARLT10962_P01_1/P01_1_20200503T052000-0400_ARU.wav BARLT10962
 #> 2 j_BARLT10962_P01_1/P01_1_20200503T052000-0400_ARU.wav BARLT10962
 #> 3 o_BARLT10962_P01_1/P01_1_20200503T052000-0400_ARU.wav BARLT10962
 #> 4 a_S4A01234_P02_1/P02_1_20200504T052500_ARU.wav S4A01234
 #> 5 j_S4A01234_P02_1/P02_1_20200504T052500_ARU.wav S4A01234
 #> 6 o_S4A01234_P02_1/P02_1_20200504T052500_ARU.wav S4A01234
 #> 7 a_BARLT10962_P06_1/P06_1_20200509T052000-0400_ARU.wav BARLT10962
 #> 8 j_BARLT10962_P06_1/P06_1_20200509T052000-0400_ARU.wav BARLT10962
 #> 9 o_BARLT10962_P06_1/P06_1_20200509T052000-0400_ARU.wav BARLT10962
 #> 10 a_S4A01234_P07_1/P07_1_20200509T052500_ARU.wav S4A01234
 #> manufacturer model aru_type site_id tz_offset
 #> 1 Frontier Labs BAR-LT BARLT P01_1 -0400
 #> 2 Frontier Labs BAR-LT BARLT P01_1 -0400
 #> 3 Frontier Labs BAR-LT BARLT P01_1 -0400
 #> 4 Wildlife Acoustics Song Meter 4 SongMeter P02_1 <NA>
 #> 5 Wildlife Acoustics Song Meter 4 SongMeter P02_1 <NA>
 #> 6 Wildlife Acoustics Song Meter 4 SongMeter P02_1 <NA>
 #> 7 Frontier Labs BAR-LT BARLT P06_1 -0400
 #> 8 Frontier Labs BAR-LT BARLT P06_1 -0400
 #> 9 Frontier Labs BAR-LT BARLT P06_1 -0400
 #> 10 Wildlife Acoustics Song Meter 4 SongMeter P07_1 <NA>
 #> date_time date longitude latitude tz t2ss
 #> 1 2020年05月03日 05:20:00 2020年05月03日 -85.03 50.01 America/Toronto 498.4167
 #> 2 2020年05月03日 05:20:00 2020年05月03日 -85.03 50.01 America/Toronto 498.4167
 #> 3 2020年05月03日 05:20:00 2020年05月03日 -85.03 50.01 America/Toronto 498.4167
 #> 4 2020年05月04日 05:25:00 2020年05月04日 -87.45 52.68 America/Toronto 483.4167
 #> 5 2020年05月04日 05:25:00 2020年05月04日 -87.45 52.68 America/Toronto 483.4167
 #> 6 2020年05月04日 05:25:00 2020年05月04日 -87.45 52.68 America/Toronto 483.4167
 #> 7 2020年05月09日 05:20:00 2020年05月09日 -90.08 52.00 America/Winnipeg 521.9333
 #> 8 2020年05月09日 05:20:00 2020年05月09日 -90.08 52.00 America/Winnipeg 521.9333
 #> 9 2020年05月09日 05:20:00 2020年05月09日 -90.08 52.00 America/Winnipeg 521.9333
 #> 10 2020年05月09日 05:25:00 2020年05月09日 -86.03 50.45 America/Toronto 488.7500
 #> psel_by psel_min psel_doy psel psel_scaled psel_std psel_normalized
 #> 1 t2sr -0.5359972 -0.9351720 0.2296568 0.8502302 1 0.001
 #> 2 t2sr -0.5359972 -0.9351720 0.2296568 0.8502302 1 0.001
 #> 3 t2sr -0.5359972 -0.9351720 0.2296568 0.8502302 1 0.001
 #> 4 t2sr -0.5240265 -0.9200039 0.2359748 0.8736204 1 0.001
 #> 5 t2sr -0.5240265 -0.9200039 0.2359748 0.8736204 1 0.001
 #> 6 t2sr -0.5240265 -0.9200039 0.2359748 0.8736204 1 0.001
 #> 7 t2sr -0.4585242 -0.8503970 0.2701113 1.0000000 1 0.001
 #> 8 t2sr -0.4585242 -0.8503970 0.2701113 1.0000000 1 0.001
 #> 9 t2sr -0.4585242 -0.8503970 0.2701113 1.0000000 1 0.001
 #> 10 t2sr -0.5129536 -0.8503970 0.2558023 0.9470254 1 0.001
 #> selection_group geometry
 #> 1 P01_1_2020_early POINT (124 -53.21667)
 #> 2 P01_1_2020_early POINT (124 -53.21667)
 #> 3 P01_1_2020_early POINT (124 -53.21667)
 #> 4 P02_1_2020_early POINT (125 -47.25)
 #> 5 P02_1_2020_early POINT (125 -47.25)
 #> 6 P02_1_2020_early POINT (125 -47.25)
 #> 7 P06_1_2020_early POINT (130 3.583333)
 #> 8 P06_1_2020_early POINT (130 3.583333)
 #> 9 P06_1_2020_early POINT (130 3.583333)
 #> 10 P07_1_2020_early POINT (130 -40.93333)

AltStyle によって変換されたページ (->オリジナル) /