Helper functions for metan

See the section Rendering engine to know how HTML tables were generated.

Select helpers

The package metan reexports the tidy select helpers and implements own select helpers based on operations with prefixes and suffixes (difference_var(), intersect_var(), and union_var()), length of variable names (width_of(), width_greater_than(), and width_less_than()), and on case type (lower_case_only(), upper_case_only(), and title_case_only()).

Variables that start with a prefix and ends with a suffix.

Here, we will select the variables from data_ge2 that start with “C” and ends with “D”. Just to reduce the length of outputs, only three rows are selected

library(metan)
data_sel <- head(data_ge2, 3)
data_sel %>% 
  select_cols(intersect_var("C", "D")) %>% 
  print_table()

Variables that start with a prefix OR ends with a suffix.

The following code select variables that start with “C” or ends with “D”.

data_sel %>% 
  select_cols(union_var("C", "D")) %>% 
  print_table()

Variables that start with a prefix AND NOT ends with a suffix.

The following code select variables that start with “C” and not ends with “D”.

data_sel %>% 
  select_cols(difference_var("C", "D")) %>% 
  print_table()

Selection based on length of column names.

Select variables with an specific name length (four letters)

data_sel %>% 
  select_cols(width_of(4)) %>% 
  print_table()

Select variables with width less than n.

data_sel %>% 
  select_cols(width_less_than(3)) %>% 
  print_table()

Select variables with width greater than n.

data_sel %>% 
  select_cols(width_greater_than(2)) %>% 
  print_table()

Select variables by case type

Let’s create data frame with ‘messy’ columnn names.

df <- head(data_ge, 3)
colnames(df) <- c("Env", "gen", "Rep", "GY", "hm")
select_cols(df, lower_case_only()) %>% print_table()

select_cols(df, upper_case_only()) %>% print_table()

select_cols(df, title_case_only()) %>% print_table()

Remove rows or colums wih NA values

The functions remove_rows_na() and remove_rows_na() are used to remove rows and columns with NA values, respectively.

data_with_na <- data_g
data_with_na[c(1, 5, 10), c(3:5, 10:15)] <- NA
print_table(data_with_na)

remove_cols_na(data_with_na) %>% print_table()
# Warning: Column(s) PH, EH, EP, CW, KW, NR, NKR, CDED, PERK with NA values
# deleted.

remove_rows_na(data_with_na) %>% print_table()
# Warning: Row(s) 1, 5, 10 with NA values deleted.

Split a dataframe into subsets grouped by one or more factors

Group data and exclude all non-numeric variables

g1 <- split_factors(data_ge, ENV)
is.split_factors(g1)
# [1] TRUE

Group data and keep all original variables

g2 <- split_factors(data_ge, ENV, GEN, keep_factors = TRUE)
print_table(g2[[1]])

Group a data frame using all factor variables

g3 <- as.split_factors(CO2)
names(g3)
#  [1] "Qn1 | Quebec | nonchilled"      "Qn2 | Quebec | nonchilled"     
#  [3] "Qn3 | Quebec | nonchilled"      "Qc1 | Quebec | chilled"        
#  [5] "Qc3 | Quebec | chilled"         "Qc2 | Quebec | chilled"        
#  [7] "Mn3 | Mississippi | nonchilled" "Mn2 | Mississippi | nonchilled"
#  [9] "Mn1 | Mississippi | nonchilled" "Mc2 | Mississippi | chilled"   
# [11] "Mc3 | Mississippi | chilled"    "Mc1 | Mississippi | chilled"

Make a two-way table based on categorical and numerical arguments

print_table(data_ge)

matrix <- make_mat(data_ge, row = GEN, col = ENV, val = GY)
print_table(matrix, rownames = TRUE)

Make upper and lower triangular matrices

cor_mat <- corr_coef(data_ge2, EP, EL, ED, EL, CD, CL)$cor %>% as.data.frame()

# Upper triangular
upp_mat <- make_upper_tri(cor_mat)
print_table(upp_mat, rownames = TRUE)


# Lower triangular
low_mat <- make_lower_tri(cor_mat)
print_table(low_mat, rownames = TRUE)

Make a symmetric matrix

sym <- make_sym(low_mat)
print_table(sym, rownames = TRUE)

Reorder a correlation matrix

Reorder the correlation matrix according to the correlation coefficient by using hclust for hierarchical clustering order. This is useful to identify the hidden pattern in the matrix.

print_table(cor_mat)

reorder_cormat(as.matrix(cor_mat)) %>%
  as.data.frame() %>% 
  print_table(rownames = TRUE)

Compute harmonic and geometric means

num <- c(1:20, 30, 50)
hmean(num)
# [1] 6.025626
gmean(num)
# [1] 9.552141

hmean(data_ge2) %>% round(2)
# # A tibble: 1 × 15
#      PH    EH    EP    EL    ED    CL    CD    CW    KW    NR   NKR  CDED  PERK
#   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1  2.44  1.28  0.53  15.1  49.4  28.8  15.9  23.0  166.  16.0  31.9  0.58  87.4
# # ℹ 2 more variables: TKW <dbl>, NKE <dbl>
gmean(data_ge2, EP, EL, CL)
# # A tibble: 1 × 3
#      EP    EL    CL
#   <dbl> <dbl> <dbl>
# 1 0.534  15.1  28.9

Generate pairwise combinations of variables by applying one function to each pair

data <- data.frame(A = runif(n = 5, min = 3, max = 30),
                   B = runif(n = 5, min = 1, max = 10),
                   C = runif(n = 5, min = 9, max = 90),
                   D = runif(n = 5, min = 1, max = 90),
                   E = runif(n = 5, min = 5, max = 10))
c1 <- comb_vars(data)
print_table(c1)


c2 <- comb_vars(data, FUN = "*", order = "first")
print_table(c2)

Combining data.frames by row, filling missing values

df1 <- data.frame(v1 = c(1, 2), v2 = c(2, 3))
df2 <- data.frame(v3 = c(4, 5))
rbind_fill_id(df1, df2) %>% print_table()

rbind_fill_id(df1, df2, .fill = ".") %>% print_table()

Rescale a continuous vector to have specified minimum and maximum values

Rescale a numeric vector

resca(values = c(1:5))
# [1]   0  25  50  75 100

Rescale using a data frame and select rescaled variables only

data_ge %>%
resca(GY, HM, new_min = 0, new_max = 1,  keep = FALSE) %>%
  head()%>%
  print_table()

Rescale within factors

library(tidyverse)
  data_ge2 %>% 
    select(ENV, GEN, starts_with("N"), ends_with("L")) %>%
    group_by(ENV, GEN) %>%
    summarise_all(mean) %>%
    group_by(ENV) %>%
    resca(ends_with("L")) %>%
    head(n = 13) %>%
    print_table()

Rendering engine

This vignette was built with pkgdown. All tables were produced with the package DT using the following function.

library(DT) # Used to make the tables
# Function to make HTML tables
print_table <- function(table, rownames = FALSE, digits = 3, ...){
  df <- datatable(data.frame(table), rownames = rownames, extensions = 'Buttons',
                  options = list(scrollX = TRUE, 
                                 dom = '<<t>Bp>',
                                 buttons = c('copy', 'excel', 'pdf', 'print')), ...)
  num_cols <- c(as.numeric(which(sapply(table, class) == "numeric")))
  if(length(num_cols) > 0){
    formatSignif(df, columns = num_cols, digits = digits)
  } else{
    df
  }
}

Tiago Olivoto

2024-12-13