Get started

 library(mintyr)

split_cv

 # Prepare example data: Convert first 3 columns of iris dataset to long format and split
dt_split <- w2l_split(data = iris, cols2l = 1:3)
 # dt_split is now a list containing 3 data tables for Sepal.Length, Sepal.Width, and Petal.Length
 
 # Example 1: Single cross-validation (no repeats)
 split_cv(
 split_dt = dt_split, # Input list of split data
 v = 3, # Set 3-fold cross-validation
 repeats = 1 # Perform cross-validation once (no repeats)
)
 #> $Sepal.Length
 #> splits id train validate
 #> <list> <char> <list> <list>
 #> 1: <vfold_split[100x50x150x3]> Fold1 <data.table[100x3]> <data.table[50x3]>
 #> 2: <vfold_split[100x50x150x3]> Fold2 <data.table[100x3]> <data.table[50x3]>
 #> 3: <vfold_split[100x50x150x3]> Fold3 <data.table[100x3]> <data.table[50x3]>
 #> 
 #> $Sepal.Width
 #> splits id train validate
 #> <list> <char> <list> <list>
 #> 1: <vfold_split[100x50x150x3]> Fold1 <data.table[100x3]> <data.table[50x3]>
 #> 2: <vfold_split[100x50x150x3]> Fold2 <data.table[100x3]> <data.table[50x3]>
 #> 3: <vfold_split[100x50x150x3]> Fold3 <data.table[100x3]> <data.table[50x3]>
 #> 
 #> $Petal.Length
 #> splits id train validate
 #> <list> <char> <list> <list>
 #> 1: <vfold_split[100x50x150x3]> Fold1 <data.table[100x3]> <data.table[50x3]>
 #> 2: <vfold_split[100x50x150x3]> Fold2 <data.table[100x3]> <data.table[50x3]>
 #> 3: <vfold_split[100x50x150x3]> Fold3 <data.table[100x3]> <data.table[50x3]>
 # Returns a list where each element contains:
 # - splits: rsample split objects
 # - id: fold numbers (Fold1, Fold2, Fold3)
 # - train: training set data
 # - validate: validation set data
 
 # Example 2: Repeated cross-validation
 split_cv(
 split_dt = dt_split, # Input list of split data
 v = 3, # Set 3-fold cross-validation
 repeats = 2 # Perform cross-validation twice
)
 #> $Sepal.Length
 #> splits id id2 train
 #> <list> <char> <char> <list>
 #> 1: <vfold_split[100x50x150x3]> Repeat1 Fold1 <data.table[100x3]>
 #> 2: <vfold_split[100x50x150x3]> Repeat1 Fold2 <data.table[100x3]>
 #> 3: <vfold_split[100x50x150x3]> Repeat1 Fold3 <data.table[100x3]>
 #> 4: <vfold_split[100x50x150x3]> Repeat2 Fold1 <data.table[100x3]>
 #> 5: <vfold_split[100x50x150x3]> Repeat2 Fold2 <data.table[100x3]>
 #> 6: <vfold_split[100x50x150x3]> Repeat2 Fold3 <data.table[100x3]>
 #> validate
 #> <list>
 #> 1: <data.table[50x3]>
 #> 2: <data.table[50x3]>
 #> 3: <data.table[50x3]>
 #> 4: <data.table[50x3]>
 #> 5: <data.table[50x3]>
 #> 6: <data.table[50x3]>
 #> 
 #> $Sepal.Width
 #> splits id id2 train
 #> <list> <char> <char> <list>
 #> 1: <vfold_split[100x50x150x3]> Repeat1 Fold1 <data.table[100x3]>
 #> 2: <vfold_split[100x50x150x3]> Repeat1 Fold2 <data.table[100x3]>
 #> 3: <vfold_split[100x50x150x3]> Repeat1 Fold3 <data.table[100x3]>
 #> 4: <vfold_split[100x50x150x3]> Repeat2 Fold1 <data.table[100x3]>
 #> 5: <vfold_split[100x50x150x3]> Repeat2 Fold2 <data.table[100x3]>
 #> 6: <vfold_split[100x50x150x3]> Repeat2 Fold3 <data.table[100x3]>
 #> validate
 #> <list>
 #> 1: <data.table[50x3]>
 #> 2: <data.table[50x3]>
 #> 3: <data.table[50x3]>
 #> 4: <data.table[50x3]>
 #> 5: <data.table[50x3]>
 #> 6: <data.table[50x3]>
 #> 
 #> $Petal.Length
 #> splits id id2 train
 #> <list> <char> <char> <list>
 #> 1: <vfold_split[100x50x150x3]> Repeat1 Fold1 <data.table[100x3]>
 #> 2: <vfold_split[100x50x150x3]> Repeat1 Fold2 <data.table[100x3]>
 #> 3: <vfold_split[100x50x150x3]> Repeat1 Fold3 <data.table[100x3]>
 #> 4: <vfold_split[100x50x150x3]> Repeat2 Fold1 <data.table[100x3]>
 #> 5: <vfold_split[100x50x150x3]> Repeat2 Fold2 <data.table[100x3]>
 #> 6: <vfold_split[100x50x150x3]> Repeat2 Fold3 <data.table[100x3]>
 #> validate
 #> <list>
 #> 1: <data.table[50x3]>
 #> 2: <data.table[50x3]>
 #> 3: <data.table[50x3]>
 #> 4: <data.table[50x3]>
 #> 5: <data.table[50x3]>
 #> 6: <data.table[50x3]>
 # Returns a list where each element contains:
 # - splits: rsample split objects
 # - id: repeat numbers (Repeat1, Repeat2)
 # - id2: fold numbers (Fold1, Fold2, Fold3)
 # - train: training set data
 # - validate: validation set data

c2p_nest

 # Example data preparation: Define column names for combination
col_names <- c("Sepal.Length", "Sepal.Width", "Petal.Length")
 
 # Example 1: Basic column-to-pairs nesting with custom separator
 c2p_nest(
 iris, # Input iris dataset
 cols2bind = col_names, # Columns to be combined as pairs
 pairs_n = 2, # Create pairs of 2 columns
 sep = "&" # Custom separator for pair names
)
 #> pairs data
 #> <char> <list>
 #> 1: Sepal.Length&Sepal.Width <data.table[150x4]>
 #> 2: Sepal.Length&Petal.Length <data.table[150x4]>
 #> 3: Sepal.Width&Petal.Length <data.table[150x4]>
 # Returns a nested data.table where:
 # - pairs: combined column names (e.g., "Sepal.Length&Sepal.Width")
 # - data: list column containing data.tables with value1, value2 columns
 
 # Example 2: Column-to-pairs nesting with numeric indices and grouping
 c2p_nest(
 iris, # Input iris dataset
 cols2bind = 1:3, # First 3 columns to be combined
 pairs_n = 2, # Create pairs of 2 columns
 by = 5 # Group by 5th column (Species)
)
 #> pairs Species data
 #> <char> <fctr> <list>
 #> 1: Sepal.Length-Sepal.Width setosa <data.table[50x3]>
 #> 2: Sepal.Length-Sepal.Width versicolor <data.table[50x3]>
 #> 3: Sepal.Length-Sepal.Width virginica <data.table[50x3]>
 #> 4: Sepal.Length-Petal.Length setosa <data.table[50x3]>
 #> 5: Sepal.Length-Petal.Length versicolor <data.table[50x3]>
 #> 6: Sepal.Length-Petal.Length virginica <data.table[50x3]>
 #> 7: Sepal.Width-Petal.Length setosa <data.table[50x3]>
 #> 8: Sepal.Width-Petal.Length versicolor <data.table[50x3]>
 #> 9: Sepal.Width-Petal.Length virginica <data.table[50x3]>
 # Returns a nested data.table where:
 # - pairs: combined column names
 # - Species: grouping variable
 # - data: list column containing data.tables grouped by Species

r2p_nest

 # Example 1: Row-to-pairs nesting with column names
 r2p_nest(
 mtcars, # Input mtcars dataset
 rows2bind = "cyl", # Column to be used as row values
 by = c("hp", "drat", "wt") # Columns to be transformed into pairs
)
 #> name data
 #> <fctr> <list>
 #> 1: hp <data.table[32x12]>
 #> 2: drat <data.table[32x12]>
 #> 3: wt <data.table[32x12]>
 # Returns a nested data.table where:
 # - name: variable names (hp, drat, wt)
 # - data: list column containing data.tables with rows grouped by cyl values
 
 # Example 2: Row-to-pairs nesting with numeric indices
 r2p_nest(
 mtcars, # Input mtcars dataset
 rows2bind = 2, # Use 2nd column (cyl) as row values
 by = 4:6 # Use columns 4-6 (hp, drat, wt) for pairs
)
 #> name data
 #> <fctr> <list>
 #> 1: hp <data.table[32x12]>
 #> 2: drat <data.table[32x12]>
 #> 3: wt <data.table[32x12]>
 # Returns a nested data.table where:
 # - name: variable names from columns 4-6
 # - data: list column containing data.tables with rows grouped by cyl values

export_nest

 # Example 1: Basic nested data export workflow
 # Step 1: Create nested data structure
dt_nest <- w2l_nest(
 data = iris, # Input iris dataset
 cols2l = 1:2, # Columns to be nested
 by = "Species" # Grouping variable
)
 
 # Step 2: Export nested data to files
 export_nest(
 nest_dt = dt_nest, # Input nested data.table
 nest_cols = "data", # Column containing nested data
 group_cols = c("name", "Species") # Columns to create directory structure
)
 #> Total files exported: 6
 #> [1] 6
 # Returns the number of files created
 # Creates directory structure: tempdir()/name/Species/data.txt
 
 # Check exported files
 list.files(
 path = tempdir(), # Default export directory
 pattern = "txt", # File type pattern to search
 recursive = TRUE # Search in subdirectories
)
 #> [1] "Sepal.Length/setosa/data.txt" "Sepal.Length/versicolor/data.txt"
 #> [3] "Sepal.Length/virginica/data.txt" "Sepal.Width/setosa/data.txt" 
 #> [5] "Sepal.Width/versicolor/data.txt" "Sepal.Width/virginica/data.txt"
 # Returns list of created files and their paths
 
 # Clean up exported files
files <- list.files(
 path = tempdir(), # Default export directory
 pattern = "txt", # File type pattern to search
 recursive = TRUE, # Search in subdirectories
 full.names = TRUE # Return full file paths
)
 file.remove(files) # Remove all exported files
 #> [1] TRUE TRUE TRUE TRUE TRUE TRUE

export_list

 # Example: Export split data to files
 
 # Step 1: Create split data structure
dt_split <- w2l_split(
 data = iris, # Input iris dataset
 cols2l = 1:2, # Columns to be split
 by = "Species" # Grouping variable
)
 
 # Step 2: Export split data to files
 export_list(
 split_dt = dt_split # Input list of data.tables
)
 #> [1] 6
 # Returns the number of files created
 # Files are saved in tempdir() with .txt extension
 
 # Check exported files
 list.files(
 path = tempdir(), # Default export directory
 pattern = "txt", # File type pattern to search
 recursive = TRUE # Search in subdirectories
)
 #> [1] "Sepal.Length_setosa.txt" "Sepal.Length_versicolor.txt"
 #> [3] "Sepal.Length_virginica.txt" "Sepal.Width_setosa.txt" 
 #> [5] "Sepal.Width_versicolor.txt" "Sepal.Width_virginica.txt"
 
 # Clean up exported files
files <- list.files(
 path = tempdir(), # Default export directory
 pattern = "txt", # File type pattern to search
 recursive = TRUE, # Search in subdirectories
 full.names = TRUE # Return full file paths
)
 file.remove(files) # Remove all exported files
 #> [1] TRUE TRUE TRUE TRUE TRUE TRUE

convert_nest

 # Example 1: Create nested data structures
 # Create single nested column
df_nest1 <- iris |> 
 dplyr::group_nest(Species) # Group and nest by Species
 
 # Create multiple nested columns
df_nest2 <- iris |>
 dplyr::group_nest(Species) |> # Group and nest by Species
 dplyr::mutate(
 data2 = purrr::map( # Create second nested column
 data,
 dplyr::mutate, 
 c = 2
 )
 )
 
 # Example 2: Convert nested structures
 # Convert data frame to data table
 convert_nest(
 df_nest1, # Input nested data frame
 to = "dt" # Convert to data.table
)
 #> Species data
 #> <fctr> <list>
 #> 1: setosa <data.table[50x4]>
 #> 2: versicolor <data.table[50x4]>
 #> 3: virginica <data.table[50x4]>
 
 # Example 3: Convert data table to data frame
dt_nest <- mintyr::w2l_nest(
 data = iris, # Input dataset
 cols2l = 1:2 # Columns to nest
)
 convert_nest(
 dt_nest, # Input nested data table
 to = "df" # Convert to data frame
)
 #> # A tibble: 2 ×ばつ 2
 #> name data 
 #> <fct> <list> 
 #> 1 Sepal.Length <tibble [150 ×ばつ 4]>
 #> 2 Sepal.Width <tibble [150 ×ばつ 4]>

get_path_segment

 # Example: Path segment extraction demonstrations
 
 # Setup test paths
paths <- c(
 "C:/home/user/documents", # Windows style path
 "/var/log/system", # Unix system path
 "/usr/local/bin" # Unix binary path
)
 
 # Example 1: Extract first segment
 get_path_segment(
 paths, # Input paths
 1 # Get first segment
)
 #> [1] "home" "var" "usr"
 # Returns: c("home", "var", "usr")
 
 # Example 2: Extract second-to-last segment
 get_path_segment(
 paths, # Input paths
 -2 # Get second-to-last segment
)
 #> [1] "user" "log" "local"
 # Returns: c("user", "log", "local")
 
 # Example 3: Extract from first to last segment
 get_path_segment(
 paths, # Input paths
 c(1,-1) # Range from first to last
)
 #> [1] "home/user/documents" "var/log/system" "usr/local/bin"
 # Returns full paths without drive letters
 
 # Example 4: Extract first three segments
 get_path_segment(
 paths, # Input paths
 c(1,3) # Range from first to third
)
 #> [1] "home/user/documents" "var/log/system" "usr/local/bin"
 # Returns: c("home/user/documents", "var/log/system", "usr/local/bin")
 
 # Example 5: Extract last two segments (reverse order)
 get_path_segment(
 paths, # Input paths
 c(-1,-2) # Range from last to second-to-last
)
 #> [1] "user/documents" "log/system" "local/bin"
 # Returns: c("documents/user", "system/log", "bin/local")
 
 # Example 6: Extract first two segments
 get_path_segment(
 paths, # Input paths
 c(1,2) # Range from first to second
)
 #> [1] "home/user" "var/log" "usr/local"
 # Returns: c("home/user", "var/log", "usr/local")

format_digits

 # Example: Number formatting demonstrations
 
 # Setup test data
dt <- data.table::data.table(
 a = c(0.1234, 0.5678), # Numeric column 1
 b = c(0.2345, 0.6789), # Numeric column 2
 c = c("text1", "text2") # Text column
)
 
 # Example 1: Format all numeric columns
 format_digits(
 dt, # Input data table
 digits = 2 # Round to 2 decimal places
)
 #> a b c
 #> <char> <char> <char>
 #> 1: 0.12 0.23 text1
 #> 2: 0.57 0.68 text2
 
 # Example 2: Format specific column as percentage
 format_digits(
 dt, # Input data table
 cols = c("a"), # Only format column 'a'
 digits = 2, # Round to 2 decimal places
 percentage = TRUE # Convert to percentage
)
 #> a b c
 #> <char> <num> <char>
 #> 1: 12.34% 0.2345 text1
 #> 2: 56.78% 0.6789 text2

mintyr_example

 # Get path to an example file
 mintyr_example("csv_test1.csv")
 #> [1] "C:/Users/Dell/AppData/Local/Temp/RtmpYB9Y1S/Rinst38b0670f21db/mintyr/extdata/csv_test1.csv"

mintyr_examples

 # List all example files
 mintyr_examples()
 #> [1] "csv_test1.csv" "csv_test2.csv" "xlsx_test1.xlsx" "xlsx_test2.xlsx"

import_xlsx

 # Example: Excel file import demonstrations
 
 # Setup test files
xlsx_files <- mintyr_example(
 mintyr_examples("xlsx_test") # Get example Excel files
)
 
 # Example 1: Import and combine all sheets from all files
 import_xlsx(
 xlsx_files, # Input Excel file paths
 rbind = TRUE # Combine all sheets into one data.table
)
 #> excel_name sheet_name col1 col2 col3
 #> <char> <char> <num> <char> <lgcl>
 #> 1: xlsx_test1 Sheet1 4 d FALSE
 #> 2: xlsx_test1 Sheet1 5 f TRUE
 #> 3: xlsx_test1 Sheet1 6 e TRUE
 #> 4: xlsx_test1 Sheet2 1 a TRUE
 #> 5: xlsx_test1 Sheet2 2 b FALSE
 #> 6: xlsx_test1 Sheet2 3 c TRUE
 #> 7: xlsx_test2 Sheet1 15 o FALSE
 #> 8: xlsx_test2 Sheet1 16 p TRUE
 #> 9: xlsx_test2 Sheet1 17 q FALSE
 #> 10: xlsx_test2 a 7 g FALSE
 #> 11: xlsx_test2 a 9 h TRUE
 #> 12: xlsx_test2 a 8 i FALSE
 #> 13: xlsx_test2 b 10 J FALSE
 #> 14: xlsx_test2 b 11 K TRUE
 #> 15: xlsx_test2 b 12 L FALSE
 
 # Example 2: Import specific sheets separately
 import_xlsx(
 xlsx_files, # Input Excel file paths
 rbind = FALSE, # Keep sheets as separate data.tables
 sheet = 2 # Only import first sheet
)
 #> $xlsx_test1_Sheet2
 #> col1 col2 col3
 #> <num> <char> <lgcl>
 #> 1: 1 a TRUE
 #> 2: 2 b FALSE
 #> 3: 3 c TRUE
 #> 
 #> $xlsx_test2_a
 #> col1 col2 col3
 #> <num> <char> <lgcl>
 #> 1: 7 g FALSE
 #> 2: 9 h TRUE
 #> 3: 8 i FALSE

import_csv

 # Example: CSV file import demonstrations
 
 # Setup test files
csv_files <- mintyr_example(
 mintyr_examples("csv_test") # Get example CSV files
)
 
 # Example 1: Import and combine CSV files using data.table
 import_csv(
 csv_files, # Input CSV file paths
 package = "data.table", # Use data.table for reading
 rbind = TRUE, # Combine all files into one data.table
 rbind_label = "_file", # Column name for file source
 keep_ext = TRUE, # Include .csv extension in _file column
 full_path = TRUE # Show complete file paths in _file column
)
 #> _file
 #> <char>
 #> 1: C:/Users/Dell/AppData/Local/Temp/RtmpYB9Y1S/Rinst38b0670f21db/mintyr/extdata/csv_test1.csv
 #> 2: C:/Users/Dell/AppData/Local/Temp/RtmpYB9Y1S/Rinst38b0670f21db/mintyr/extdata/csv_test1.csv
 #> 3: C:/Users/Dell/AppData/Local/Temp/RtmpYB9Y1S/Rinst38b0670f21db/mintyr/extdata/csv_test1.csv
 #> 4: C:/Users/Dell/AppData/Local/Temp/RtmpYB9Y1S/Rinst38b0670f21db/mintyr/extdata/csv_test2.csv
 #> 5: C:/Users/Dell/AppData/Local/Temp/RtmpYB9Y1S/Rinst38b0670f21db/mintyr/extdata/csv_test2.csv
 #> 6: C:/Users/Dell/AppData/Local/Temp/RtmpYB9Y1S/Rinst38b0670f21db/mintyr/extdata/csv_test2.csv
 #> col1 col2 col3
 #> <int> <char> <lgcl>
 #> 1: 4 d FALSE
 #> 2: 5 f TRUE
 #> 3: 6 e TRUE
 #> 4: 15 o FALSE
 #> 5: 16 p TRUE
 #> 6: 17 q FALSE
 
 # Example 2: Import files separately using arrow
 import_csv(
 csv_files, # Input CSV file paths
 package = "arrow", # Use arrow for reading
 rbind = FALSE # Keep files as separate data.tables
)
 #> $csv_test1
 #> # A tibble: 3 ×ばつ 3
 #> col1 col2 col3 
 #> <int> <chr> <lgl>
 #> 1 4 d FALSE
 #> 2 5 f TRUE 
 #> 3 6 e TRUE 
 #> 
 #> $csv_test2
 #> # A tibble: 3 ×ばつ 3
 #> col1 col2 col3 
 #> <int> <chr> <lgl>
 #> 1 15 o FALSE
 #> 2 16 p TRUE 
 #> 3 17 q FALSE

get_filename

 # Example: File path processing demonstrations
 
 # Setup test files
xlsx_files <- mintyr_example(
 mintyr_examples("xlsx_test") # Get example Excel files
)
 
 # Example 1: Extract filenames without extensions
 get_filename(
 xlsx_files, # Input file paths
 rm_extension = TRUE, # Remove file extensions
 rm_path = TRUE # Remove directory paths
)
 #> [1] "xlsx_test1" "xlsx_test2"
 
 # Example 2: Keep file extensions
 get_filename(
 xlsx_files, # Input file paths
 rm_extension = FALSE, # Keep file extensions
 rm_path = TRUE # Remove directory paths
)
 #> [1] "xlsx_test1.xlsx" "xlsx_test2.xlsx"
 
 # Example 3: Keep full paths without extensions
 get_filename(
 xlsx_files, # Input file paths
 rm_extension = TRUE, # Remove file extensions
 rm_path = FALSE # Keep directory paths
)
 #> [1] "C:/Users/Dell/AppData/Local/Temp/RtmpYB9Y1S/Rinst38b0670f21db/mintyr/extdata/xlsx_test1"
 #> [2] "C:/Users/Dell/AppData/Local/Temp/RtmpYB9Y1S/Rinst38b0670f21db/mintyr/extdata/xlsx_test2"

w2l_nest

 # Example: Wide to long format nesting demonstrations
 
 # Example 1: Basic nesting by group
 w2l_nest(
 data = iris, # Input dataset
 by = "Species" # Group by Species column
)
 #> Species data
 #> <fctr> <list>
 #> 1: setosa <data.table[50x4]>
 #> 2: versicolor <data.table[50x4]>
 #> 3: virginica <data.table[50x4]>
 
 # Example 2: Nest specific columns with numeric indices
 w2l_nest(
 data = iris, # Input dataset
 cols2l = 1:4, # Select first 4 columns to nest
 by = "Species" # Group by Species column
)
 #> name Species data
 #> <fctr> <fctr> <list>
 #> 1: Sepal.Length setosa <data.table[50x1]>
 #> 2: Sepal.Length versicolor <data.table[50x1]>
 #> 3: Sepal.Length virginica <data.table[50x1]>
 #> 4: Sepal.Width setosa <data.table[50x1]>
 #> 5: Sepal.Width versicolor <data.table[50x1]>
 #> 6: Sepal.Width virginica <data.table[50x1]>
 #> 7: Petal.Length setosa <data.table[50x1]>
 #> 8: Petal.Length versicolor <data.table[50x1]>
 #> 9: Petal.Length virginica <data.table[50x1]>
 #> 10: Petal.Width setosa <data.table[50x1]>
 #> 11: Petal.Width versicolor <data.table[50x1]>
 #> 12: Petal.Width virginica <data.table[50x1]>
 
 # Example 3: Nest specific columns with column names
 w2l_nest(
 data = iris, # Input dataset
 cols2l = c("Sepal.Length", # Select columns by name
 "Sepal.Width", 
 "Petal.Length"),
 by = 5 # Group by column index 5 (Species)
)
 #> name Species data
 #> <fctr> <fctr> <list>
 #> 1: Sepal.Length setosa <data.table[50x2]>
 #> 2: Sepal.Length versicolor <data.table[50x2]>
 #> 3: Sepal.Length virginica <data.table[50x2]>
 #> 4: Sepal.Width setosa <data.table[50x2]>
 #> 5: Sepal.Width versicolor <data.table[50x2]>
 #> 6: Sepal.Width virginica <data.table[50x2]>
 #> 7: Petal.Length setosa <data.table[50x2]>
 #> 8: Petal.Length versicolor <data.table[50x2]>
 #> 9: Petal.Length virginica <data.table[50x2]>
 # Returns similar structure to Example 2

w2l_split

 # Example: Wide to long format splitting demonstrations
 
 # Example 1: Basic splitting by Species
 w2l_split(
 data = iris, # Input dataset
 by = "Species" # Split by Species column
) |> 
 lapply(head) # Show first 6 rows of each split
 #> $setosa
 #> Sepal.Length Sepal.Width Petal.Length Petal.Width
 #> <num> <num> <num> <num>
 #> 1: 5.1 3.5 1.4 0.2
 #> 2: 4.9 3.0 1.4 0.2
 #> 3: 4.7 3.2 1.3 0.2
 #> 4: 4.6 3.1 1.5 0.2
 #> 5: 5.0 3.6 1.4 0.2
 #> 6: 5.4 3.9 1.7 0.4
 #> 
 #> $versicolor
 #> Sepal.Length Sepal.Width Petal.Length Petal.Width
 #> <num> <num> <num> <num>
 #> 1: 7.0 3.2 4.7 1.4
 #> 2: 6.4 3.2 4.5 1.5
 #> 3: 6.9 3.1 4.9 1.5
 #> 4: 5.5 2.3 4.0 1.3
 #> 5: 6.5 2.8 4.6 1.5
 #> 6: 5.7 2.8 4.5 1.3
 #> 
 #> $virginica
 #> Sepal.Length Sepal.Width Petal.Length Petal.Width
 #> <num> <num> <num> <num>
 #> 1: 6.3 3.3 6.0 2.5
 #> 2: 5.8 2.7 5.1 1.9
 #> 3: 7.1 3.0 5.9 2.1
 #> 4: 6.3 2.9 5.6 1.8
 #> 5: 6.5 3.0 5.8 2.2
 #> 6: 7.6 3.0 6.6 2.1
 
 # Example 2: Split specific columns using numeric indices
 w2l_split(
 data = iris, # Input dataset
 cols2l = 1:3, # Select first 3 columns to split
 by = 5 # Split by column index 5 (Species)
) |> 
 lapply(head) # Show first 6 rows of each split
 #> $Sepal.Length_setosa
 #> Petal.Width value
 #> <num> <num>
 #> 1: 0.2 5.1
 #> 2: 0.2 4.9
 #> 3: 0.2 4.7
 #> 4: 0.2 4.6
 #> 5: 0.2 5.0
 #> 6: 0.4 5.4
 #> 
 #> $Sepal.Length_versicolor
 #> Petal.Width value
 #> <num> <num>
 #> 1: 1.4 7.0
 #> 2: 1.5 6.4
 #> 3: 1.5 6.9
 #> 4: 1.3 5.5
 #> 5: 1.5 6.5
 #> 6: 1.3 5.7
 #> 
 #> $Sepal.Length_virginica
 #> Petal.Width value
 #> <num> <num>
 #> 1: 2.5 6.3
 #> 2: 1.9 5.8
 #> 3: 2.1 7.1
 #> 4: 1.8 6.3
 #> 5: 2.2 6.5
 #> 6: 2.1 7.6
 #> 
 #> $Sepal.Width_setosa
 #> Petal.Width value
 #> <num> <num>
 #> 1: 0.2 3.5
 #> 2: 0.2 3.0
 #> 3: 0.2 3.2
 #> 4: 0.2 3.1
 #> 5: 0.2 3.6
 #> 6: 0.4 3.9
 #> 
 #> $Sepal.Width_versicolor
 #> Petal.Width value
 #> <num> <num>
 #> 1: 1.4 3.2
 #> 2: 1.5 3.2
 #> 3: 1.5 3.1
 #> 4: 1.3 2.3
 #> 5: 1.5 2.8
 #> 6: 1.3 2.8
 #> 
 #> $Sepal.Width_virginica
 #> Petal.Width value
 #> <num> <num>
 #> 1: 2.5 3.3
 #> 2: 1.9 2.7
 #> 3: 2.1 3.0
 #> 4: 1.8 2.9
 #> 5: 2.2 3.0
 #> 6: 2.1 3.0
 #> 
 #> $Petal.Length_setosa
 #> Petal.Width value
 #> <num> <num>
 #> 1: 0.2 1.4
 #> 2: 0.2 1.4
 #> 3: 0.2 1.3
 #> 4: 0.2 1.5
 #> 5: 0.2 1.4
 #> 6: 0.4 1.7
 #> 
 #> $Petal.Length_versicolor
 #> Petal.Width value
 #> <num> <num>
 #> 1: 1.4 4.7
 #> 2: 1.5 4.5
 #> 3: 1.5 4.9
 #> 4: 1.3 4.0
 #> 5: 1.5 4.6
 #> 6: 1.3 4.5
 #> 
 #> $Petal.Length_virginica
 #> Petal.Width value
 #> <num> <num>
 #> 1: 2.5 6.0
 #> 2: 1.9 5.1
 #> 3: 2.1 5.9
 #> 4: 1.8 5.6
 #> 5: 2.2 5.8
 #> 6: 2.1 6.6
 
 # Example 3: Split specific columns using column names
list_res <- w2l_split(
 data = iris, # Input dataset
 cols2l = c("Sepal.Length", # Select columns by name
 "Sepal.Width"),
 by = "Species" # Split by Species column
)
 lapply(list_res, head) # Show first 6 rows of each split
 #> $Sepal.Length_setosa
 #> Petal.Length Petal.Width value
 #> <num> <num> <num>
 #> 1: 1.4 0.2 5.1
 #> 2: 1.4 0.2 4.9
 #> 3: 1.3 0.2 4.7
 #> 4: 1.5 0.2 4.6
 #> 5: 1.4 0.2 5.0
 #> 6: 1.7 0.4 5.4
 #> 
 #> $Sepal.Length_versicolor
 #> Petal.Length Petal.Width value
 #> <num> <num> <num>
 #> 1: 4.7 1.4 7.0
 #> 2: 4.5 1.5 6.4
 #> 3: 4.9 1.5 6.9
 #> 4: 4.0 1.3 5.5
 #> 5: 4.6 1.5 6.5
 #> 6: 4.5 1.3 5.7
 #> 
 #> $Sepal.Length_virginica
 #> Petal.Length Petal.Width value
 #> <num> <num> <num>
 #> 1: 6.0 2.5 6.3
 #> 2: 5.1 1.9 5.8
 #> 3: 5.9 2.1 7.1
 #> 4: 5.6 1.8 6.3
 #> 5: 5.8 2.2 6.5
 #> 6: 6.6 2.1 7.6
 #> 
 #> $Sepal.Width_setosa
 #> Petal.Length Petal.Width value
 #> <num> <num> <num>
 #> 1: 1.4 0.2 3.5
 #> 2: 1.4 0.2 3.0
 #> 3: 1.3 0.2 3.2
 #> 4: 1.5 0.2 3.1
 #> 5: 1.4 0.2 3.6
 #> 6: 1.7 0.4 3.9
 #> 
 #> $Sepal.Width_versicolor
 #> Petal.Length Petal.Width value
 #> <num> <num> <num>
 #> 1: 4.7 1.4 3.2
 #> 2: 4.5 1.5 3.2
 #> 3: 4.9 1.5 3.1
 #> 4: 4.0 1.3 2.3
 #> 5: 4.6 1.5 2.8
 #> 6: 4.5 1.3 2.8
 #> 
 #> $Sepal.Width_virginica
 #> Petal.Length Petal.Width value
 #> <num> <num> <num>
 #> 1: 6.0 2.5 3.3
 #> 2: 5.1 1.9 2.7
 #> 3: 5.9 2.1 3.0
 #> 4: 5.6 1.8 2.9
 #> 5: 5.8 2.2 3.0
 #> 6: 6.6 2.1 3.0
 # Returns similar structure to Example 2

nest_cv

 # Example: Cross-validation for nested data.table demonstrations
 
 # Setup test data
dt_nest <- w2l_nest(
 data = iris, # Input dataset
 cols2l = 1:2 # Nest first 2 columns
)
 
 # Example 1: Basic 2-fold cross-validation
 nest_cv(
 nest_dt = dt_nest, # Input nested data.table
 v = 2 # Number of folds (2-fold CV)
)
 #> name splits id train
 #> <fctr> <list> <char> <list>
 #> 1: Sepal.Length <vfold_split[75x75x150x4]> Fold1 <data.table[75x4]>
 #> 2: Sepal.Length <vfold_split[75x75x150x4]> Fold2 <data.table[75x4]>
 #> 3: Sepal.Width <vfold_split[75x75x150x4]> Fold1 <data.table[75x4]>
 #> 4: Sepal.Width <vfold_split[75x75x150x4]> Fold2 <data.table[75x4]>
 #> validate
 #> <list>
 #> 1: <data.table[75x4]>
 #> 2: <data.table[75x4]>
 #> 3: <data.table[75x4]>
 #> 4: <data.table[75x4]>
 
 # Example 2: Repeated 2-fold cross-validation
 nest_cv(
 nest_dt = dt_nest, # Input nested data.table
 v = 2, # Number of folds (2-fold CV)
 repeats = 2 # Number of repetitions
)
 #> name splits id id2 train
 #> <fctr> <list> <char> <char> <list>
 #> 1: Sepal.Length <vfold_split[75x75x150x4]> Repeat1 Fold1 <data.table[75x4]>
 #> 2: Sepal.Length <vfold_split[75x75x150x4]> Repeat1 Fold2 <data.table[75x4]>
 #> 3: Sepal.Length <vfold_split[75x75x150x4]> Repeat2 Fold1 <data.table[75x4]>
 #> 4: Sepal.Length <vfold_split[75x75x150x4]> Repeat2 Fold2 <data.table[75x4]>
 #> 5: Sepal.Width <vfold_split[75x75x150x4]> Repeat1 Fold1 <data.table[75x4]>
 #> 6: Sepal.Width <vfold_split[75x75x150x4]> Repeat1 Fold2 <data.table[75x4]>
 #> 7: Sepal.Width <vfold_split[75x75x150x4]> Repeat2 Fold1 <data.table[75x4]>
 #> 8: Sepal.Width <vfold_split[75x75x150x4]> Repeat2 Fold2 <data.table[75x4]>
 #> validate
 #> <list>
 #> 1: <data.table[75x4]>
 #> 2: <data.table[75x4]>
 #> 3: <data.table[75x4]>
 #> 4: <data.table[75x4]>
 #> 5: <data.table[75x4]>
 #> 6: <data.table[75x4]>
 #> 7: <data.table[75x4]>
 #> 8: <data.table[75x4]>

top_perc

 # Example 1: Basic usage with single trait
 # This example selects the top 10% of observations based on Petal.Width
 # keep_data=TRUE returns both summary statistics and the filtered data
 top_perc(iris, 
 perc = 0.1, # Select top 10%
 trait = c("Petal.Width"), # Column to analyze
 keep_data = TRUE) # Return both stats and filtered data
 #> $Petal.Width_0.1
 #> $Petal.Width_0.1$stat
 #> # A tibble: 1 ×ばつ 5
 #> variable n mean sd top_perc
 #> <fct> <dbl> <dbl> <dbl> <chr> 
 #> 1 Petal.Width 17 2.34 0.1 10% 
 #> 
 #> $Petal.Width_0.1$data
 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species
 #> 1 6.3 3.3 6.0 2.5 virginica
 #> 2 6.5 3.0 5.8 2.2 virginica
 #> 3 7.2 3.6 6.1 2.5 virginica
 #> 4 5.8 2.8 5.1 2.4 virginica
 #> 5 6.4 3.2 5.3 2.3 virginica
 #> 6 7.7 3.8 6.7 2.2 virginica
 #> 7 7.7 2.6 6.9 2.3 virginica
 #> 8 6.9 3.2 5.7 2.3 virginica
 #> 9 6.4 2.8 5.6 2.2 virginica
 #> 10 7.7 3.0 6.1 2.3 virginica
 #> 11 6.3 3.4 5.6 2.4 virginica
 #> 12 6.7 3.1 5.6 2.4 virginica
 #> 13 6.9 3.1 5.1 2.3 virginica
 #> 14 6.8 3.2 5.9 2.3 virginica
 #> 15 6.7 3.3 5.7 2.5 virginica
 #> 16 6.7 3.0 5.2 2.3 virginica
 #> 17 6.2 3.4 5.4 2.3 virginica
 
 # Example 2: Using grouping with 'by' parameter
 # This example performs the same analysis but separately for each Species
 # Returns nested list with stats and filtered data for each group
 top_perc(iris, 
 perc = 0.1, # Select top 10%
 trait = c("Petal.Width"), # Column to analyze
 by = "Species") # Group by Species
 #> # A tibble: 3 ×ばつ 6
 #> Species variable n mean sd top_perc
 #> <fct> <fct> <dbl> <dbl> <dbl> <chr> 
 #> 1 setosa Petal.Width 9 0.433 0.071 10% 
 #> 2 versicolor Petal.Width 5 1.66 0.089 10% 
 #> 3 virginica Petal.Width 6 2.45 0.055 10%
 
 # Example 3: Complex example with multiple percentages and grouping variables
 # Reshape data from wide to long format for Sepal.Length and Sepal.Width
iris |> 
 tidyr::pivot_longer(1:2,
 names_to = "names", 
 values_to = "values") |> 
 mintyr::top_perc(
 perc = c(0.1, -0.2),
 trait = "values",
 by = c("Species", "names"),
 type = "mean_sd")
 #> # A tibble: 12 ×ばつ 7
 #> Species names variable n mean sd top_perc
 #> <fct> <chr> <fct> <dbl> <dbl> <dbl> <chr> 
 #> 1 setosa Sepal.Length values 5 5.64 0.134 10% 
 #> 2 setosa Sepal.Width values 6 4.08 0.194 10% 
 #> 3 versicolor Sepal.Length values 6 6.8 0.126 10% 
 #> 4 versicolor Sepal.Width values 5 3.26 0.089 10% 
 #> 5 virginica Sepal.Length values 5 7.74 0.089 10% 
 #> 6 virginica Sepal.Width values 5 3.6 0.2 10% 
 #> 7 setosa Sepal.Length values 11 4.53 0.135 -20% 
 #> 8 setosa Sepal.Width values 12 2.97 0.219 -20% 
 #> 9 versicolor Sepal.Length values 11 5.28 0.244 -20% 
 #> 10 versicolor Sepal.Width values 13 2.35 0.151 -20% 
 #> 11 virginica Sepal.Length values 11 5.79 0.336 -20% 
 #> 12 virginica Sepal.Width values 11 2.56 0.15 -20%