Helper functions for metan
Tiago Olivoto
2024-12-13
Source:vignettes/vignettes_helper.Rmd
vignettes_helper.Rmd
See the section Rendering engine to know how HTML tables were generated.
Select helpers
The package metan
reexports the tidy
select helpers and implements own select helpers based on operations
with prefixes and suffixes (difference_var()
,
intersect_var()
, and union_var()
), length of
variable names (width_of()
,
width_greater_than()
, and width_less_than()
),
and on case type (lower_case_only()
,
upper_case_only()
, and title_case_only()
).
Variables that start with a prefix and ends with a suffix.
Here, we will select the variables from data_ge2
that
start with “C” and ends with “D”. Just to reduce the length of outputs,
only three rows are selected
library(metan)
data_sel <- head(data_ge2, 3)
data_sel %>%
select_cols(intersect_var("C", "D")) %>%
print_table()
Variables that start with a prefix OR ends with a suffix.
The following code select variables that start with “C” or ends with “D”.
data_sel %>%
select_cols(union_var("C", "D")) %>%
print_table()
Variables that start with a prefix AND NOT ends with a suffix.
The following code select variables that start with “C” and not ends with “D”.
data_sel %>%
select_cols(difference_var("C", "D")) %>%
print_table()
Selection based on length of column names.
- Select variables with an specific name length (four letters)
data_sel %>%
select_cols(width_of(4)) %>%
print_table()
- Select variables with width less than n.
data_sel %>%
select_cols(width_less_than(3)) %>%
print_table()
- Select variables with width greater than n.
data_sel %>%
select_cols(width_greater_than(2)) %>%
print_table()
Select variables by case type
Let’s create data frame with ‘messy’ columnn names.
df <- head(data_ge, 3)
colnames(df) <- c("Env", "gen", "Rep", "GY", "hm")
select_cols(df, lower_case_only()) %>% print_table()
select_cols(df, upper_case_only()) %>% print_table()
select_cols(df, title_case_only()) %>% print_table()
Remove rows or colums wih NA values
The functions remove_rows_na()
and
remove_rows_na()
are used to remove rows and columns with
NA values, respectively.
remove_cols_na(data_with_na) %>% print_table()
# Warning: Column(s) PH, EH, EP, CW, KW, NR, NKR, CDED, PERK with NA values
# deleted.
remove_rows_na(data_with_na) %>% print_table()
# Warning: Row(s) 1, 5, 10 with NA values deleted.
Split a dataframe into subsets grouped by one or more factors
Group data and exclude all non-numeric variables
g1 <- split_factors(data_ge, ENV)
is.split_factors(g1)
# [1] TRUE
Group data and keep all original variables
g2 <- split_factors(data_ge, ENV, GEN, keep_factors = TRUE)
print_table(g2[[1]])
Group a data frame using all factor variables
g3 <- as.split_factors(CO2)
names(g3)
# [1] "Qn1 | Quebec | nonchilled" "Qn2 | Quebec | nonchilled"
# [3] "Qn3 | Quebec | nonchilled" "Qc1 | Quebec | chilled"
# [5] "Qc3 | Quebec | chilled" "Qc2 | Quebec | chilled"
# [7] "Mn3 | Mississippi | nonchilled" "Mn2 | Mississippi | nonchilled"
# [9] "Mn1 | Mississippi | nonchilled" "Mc2 | Mississippi | chilled"
# [11] "Mc3 | Mississippi | chilled" "Mc1 | Mississippi | chilled"
Make a two-way table based on categorical and numerical arguments
print_table(data_ge)
matrix <- make_mat(data_ge, row = GEN, col = ENV, val = GY)
print_table(matrix, rownames = TRUE)
Make upper and lower triangular matrices
cor_mat <- corr_coef(data_ge2, EP, EL, ED, EL, CD, CL)$cor %>% as.data.frame()
# Upper triangular
upp_mat <- make_upper_tri(cor_mat)
print_table(upp_mat, rownames = TRUE)
# Lower triangular
low_mat <- make_lower_tri(cor_mat)
print_table(low_mat, rownames = TRUE)
Make a symmetric matrix
sym <- make_sym(low_mat)
print_table(sym, rownames = TRUE)
Reorder a correlation matrix
Reorder the correlation matrix according to the correlation coefficient by using hclust for hierarchical clustering order. This is useful to identify the hidden pattern in the matrix.
print_table(cor_mat)
reorder_cormat(as.matrix(cor_mat)) %>%
as.data.frame() %>%
print_table(rownames = TRUE)
Compute harmonic and geometric means
num <- c(1:20, 30, 50)
hmean(num)
# [1] 6.025626
gmean(num)
# [1] 9.552141
hmean(data_ge2) %>% round(2)
# # A tibble: 1 × 15
# PH EH EP EL ED CL CD CW KW NR NKR CDED PERK
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 2.44 1.28 0.53 15.1 49.4 28.8 15.9 23.0 166. 16.0 31.9 0.58 87.4
# # ℹ 2 more variables: TKW <dbl>, NKE <dbl>
gmean(data_ge2, EP, EL, CL)
# # A tibble: 1 × 3
# EP EL CL
# <dbl> <dbl> <dbl>
# 1 0.534 15.1 28.9
Generate pairwise combinations of variables by applying one function to each pair
data <- data.frame(A = runif(n = 5, min = 3, max = 30),
B = runif(n = 5, min = 1, max = 10),
C = runif(n = 5, min = 9, max = 90),
D = runif(n = 5, min = 1, max = 90),
E = runif(n = 5, min = 5, max = 10))
c1 <- comb_vars(data)
print_table(c1)
c2 <- comb_vars(data, FUN = "*", order = "first")
print_table(c2)
Combining data.frames by row, filling missing values
df1 <- data.frame(v1 = c(1, 2), v2 = c(2, 3))
df2 <- data.frame(v3 = c(4, 5))
rbind_fill_id(df1, df2) %>% print_table()
rbind_fill_id(df1, df2, .fill = ".") %>% print_table()
Rescale a continuous vector to have specified minimum and maximum values
Rendering engine
This vignette was built with pkgdown. All tables were produced
with the package DT
using the
following function.
library(DT) # Used to make the tables
# Function to make HTML tables
print_table <- function(table, rownames = FALSE, digits = 3, ...){
df <- datatable(data.frame(table), rownames = rownames, extensions = 'Buttons',
options = list(scrollX = TRUE,
dom = '<<t>Bp>',
buttons = c('copy', 'excel', 'pdf', 'print')), ...)
num_cols <- c(as.numeric(which(sapply(table, class) == "numeric")))
if(length(num_cols) > 0){
formatSignif(df, columns = num_cols, digits = digits)
} else{
df
}
}