Utilities for handling with rows and columns — utils_rows

add_cols(): Add one or more columns to an existing data frame. If specified .before or .after columns does not exist, columns are appended at the end of the data. Return a data frame with all the original columns in .data plus the columns declared in .... In add_cols() columns in .data are available for the expressions. So, it is possible to add a column based on existing data.
add_rows(): Add one or more rows to an existing data frame. If specified .before or .after rows does not exist, rows are appended at the end of the data. Return a data frame with all the original rows in .data plus the rows declared in ... argument.
add_row_id(): Add a column with the row id as the first column in .data.
add_prefix() and add_suffix() add prefixes and suffixes, respectively, in variable names selected in ... argument.
all_pairs(): Get all the possible pairs between the levels of a factor.
colnames_to_lower(): Translate all column names to lower case.
colnames_to_upper(): Translate all column names to upper case.
colnames_to_title(): Translate all column names to title case.
column_exists(): Checks if a column exists in a data frame. Return a logical value.
columns_to_first(): Move columns to first positions in .data.
columns_to_last(): Move columns to last positions in .data.
columns_to_rownames(): Move a column of .data to its row names.
rownames_to_column(): Move the row names of .data to a new column.
remove_rownames(): Remove the row names of .data.
concatenate(): Concatenate columns of a data frame. If drop = TRUE then the existing variables are dropped. If pull = TRUE then the concatenated variable is pull out to a vector. This is specially useful when using concatenate to add columns to a data frame with add_cols().
get_levels(): Get the levels of a factor variable.
get_levels_comb(): Get the combination of the levels of a factor.
get_level_size(): Get the size of each level of a factor variable.
remove_cols(): Remove one or more columns from a data frame.
remove_rows(): Remove one or more rows from a data frame.
reorder_cols(): Reorder columns in a data frame.
select_cols(): Select one or more columns from a data frame.
select_first_col(): Select first variable, possibly with an offset.
select_last_col(): Select last variable, possibly with an offset.
select_numeric_cols(): Select all the numeric columns of a data frame.
select_non_numeric_cols(): Select all the non-numeric columns of a data frame.
select_rows(): Select one or more rows from a data frame.
tidy_colnames(): Tidy up column names with tidy_strings().

Usage

add_cols(.data, ..., .before = NULL, .after = NULL)

add_rows(.data, ..., .before = NULL, .after = NULL)

add_row_id(.data, var = "row_id")

all_pairs(.data, levels)

add_prefix(.data, ..., prefix, sep = "_")

add_suffix(.data, ..., suffix, sep = "_")

colnames_to_lower(.data)

colnames_to_upper(.data)

colnames_to_title(.data)

column_to_first(.data, ...)

column_to_last(.data, ...)

column_to_rownames(.data, var = "rowname")

rownames_to_column(.data, var = "rowname")

remove_rownames(.data, ...)

column_exists(.data, cols)

concatenate(
  .data,
  ...,
  prefix = NULL,
  suffix = NULL,
  new_var = new_var,
  sep = "_",
  drop = FALSE,
  pull = FALSE,
  .before = NULL,
  .after = NULL
)

get_levels(.data, ...)

get_levels_comb(.data, ...)

get_level_size(.data, ...)

reorder_cols(.data, ..., .before = NULL, .after = NULL)

remove_cols(.data, ...)

remove_rows(.data, ...)

select_first_col(.data, offset = NULL)

select_last_col(.data, offset = NULL)

select_numeric_cols(.data)

select_non_numeric_cols(.data)

select_cols(.data, ...)

select_rows(.data, ...)

tidy_colnames(.data, sep = "_")

Arguments

.data

A data frame

...

The argument depends on the function used.

For add_cols() and add_rows() is name-value pairs. All values must have one element for each row in .data when using add_cols() or one element for each column in .data when using add_rows(). Values of length 1 will be recycled when using add_cols().
For remove_cols() and select_cols(), ... is the column name or column index of the variable(s) to be dropped.
For add_prefix() and add_suffix(), ... is the column name to add the prefix or suffix, respectively. Select helpers are allowed.
For columns_to_first() and columns_to_last(), ... is the column name or column index of the variable(s) to be moved to first or last in .data.
For remove_rows() and select_rows(), ... is an integer row value.
For concatenate(), ... is the unquoted variable names to be concatenated.
For get_levels(), get_level_comb(), and get_level_size() ... is the unquoted variable names to get the levels, levels combinations and levels size, respectively.

.before, .after

For add_cols(), concatenate(), and reorder_cols(), one-based column index or column name where to add the new columns, default: .after last column. For add_rows(), one-based row index where to add the new rows, default: .after last row.

var

Name of column to use for rownames.

levels

The levels of a factor or a numeric vector.

prefix, suffix

The prefix and suffix used in add_prefix() and add_suffix(), respectively.

sep

The separator to appear when using concatenate(), add_prefix(), or add_suffix(). Defaults to to "_".

cols

A quoted variable name to check if it exists in .data.

new_var

The name of the new variable containing the concatenated values. Defaults to new_var.

drop

Logical argument. If TRUE keeps the new variable new_var and drops the existing ones. Defaults to FALSE.

pull

Logical argument. If TRUE, returns the last column (on the assumption that's the column you've created most recently), as a vector.

offset

Set it to n to select the nth variable from the end (for select_last_col()) of from the begin (for select_first_col())

Author

Tiago Olivoto tiagoolivoto@gmail.com

Examples

# \donttest{
library(metan)

################# Adding columns #################
# Variables x and y .after last column
data_ge %>%
  add_cols(x = 10,
           y = 30)
#> # A tibble: 420 × 7
#>    ENV   GEN   REP      GY    HM     x     y
#>    <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl>
#>  1 E1    G1    1      2.17  44.9    10    30
#>  2 E1    G1    2      2.50  46.9    10    30
#>  3 E1    G1    3      2.43  47.8    10    30
#>  4 E1    G2    1      3.21  45.2    10    30
#>  5 E1    G2    2      2.93  45.3    10    30
#>  6 E1    G2    3      2.56  45.5    10    30
#>  7 E1    G3    1      2.77  46.7    10    30
#>  8 E1    G3    2      3.62  43.2    10    30
#>  9 E1    G3    3      2.28  47.8    10    30
#> 10 E1    G4    1      2.36  47.9    10    30
#> # ℹ 410 more rows
# Variables x and y .before the variable GEN
data_ge %>%
  add_cols(x = 10,
           y = 30,
           .before = GEN)
#> # A tibble: 420 × 7
#>    ENV       x     y GEN   REP      GY    HM
#>    <fct> <dbl> <dbl> <fct> <fct> <dbl> <dbl>
#>  1 E1       10    30 G1    1      2.17  44.9
#>  2 E1       10    30 G1    2      2.50  46.9
#>  3 E1       10    30 G1    3      2.43  47.8
#>  4 E1       10    30 G2    1      3.21  45.2
#>  5 E1       10    30 G2    2      2.93  45.3
#>  6 E1       10    30 G2    3      2.56  45.5
#>  7 E1       10    30 G3    1      2.77  46.7
#>  8 E1       10    30 G3    2      3.62  43.2
#>  9 E1       10    30 G3    3      2.28  47.8
#> 10 E1       10    30 G4    1      2.36  47.9
#> # ℹ 410 more rows

# Creating a new variable based on the existing ones.
data_ge %>%
  add_cols(GY2 = GY^2,
           GY2_HM = GY2 + HM,
           .after = GY)
#> # A tibble: 420 × 7
#>    ENV   GEN   REP      GY   GY2 GY2_HM    HM
#>    <fct> <fct> <fct> <dbl> <dbl>  <dbl> <dbl>
#>  1 E1    G1    1      2.17  4.70   49.6  44.9
#>  2 E1    G1    2      2.50  6.27   53.2  46.9
#>  3 E1    G1    3      2.43  5.89   53.6  47.8
#>  4 E1    G2    1      3.21 10.3    55.5  45.2
#>  5 E1    G2    2      2.93  8.60   53.9  45.3
#>  6 E1    G2    3      2.56  6.58   52.1  45.5
#>  7 E1    G3    1      2.77  7.67   54.4  46.7
#>  8 E1    G3    2      3.62 13.1    56.3  43.2
#>  9 E1    G3    3      2.28  5.18   52.9  47.8
#> 10 E1    G4    1      2.36  5.57   53.5  47.9
#> # ℹ 410 more rows

############### Reordering columns ###############
reorder_cols(data_ge2, NKR, .before = ENV)
#> # A tibble: 156 × 18
#>      NKR ENV   GEN   REP      PH    EH    EP    EL    ED    CL    CD    CW    KW
#>    <dbl> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1  36.6 A1    H1    1      2.61  1.71 0.658  16.1  52.2  28.1  16.3  25.1  217.
#>  2  31.4 A1    H1    2      2.87  1.76 0.628  14.2  50.3  27.6  14.5  21.4  184.
#>  3  31.8 A1    H1    3      2.68  1.58 0.591  16.0  50.7  28.4  16.4  24.0  208.
#>  4  32.8 A1    H10   1      2.83  1.64 0.581  16.7  54.1  31.7  17.4  26.2  194.
#>  5  28   A1    H10   2      2.79  1.71 0.616  14.9  52.7  32.0  15.5  20.7  176.
#>  6  32.8 A1    H10   3      2.72  1.51 0.554  16.7  52.7  30.4  17.5  26.8  207.
#>  7  34.6 A1    H11   1      2.75  1.51 0.549  17.4  51.7  30.6  18.0  26.2  217.
#>  8  34.4 A1    H11   2      2.72  1.56 0.573  16.7  47.2  28.7  17.2  24.1  181.
#>  9  34.8 A1    H11   3      2.77  1.67 0.600  15.8  47.9  27.6  16.4  20.5  166.
#> 10  31.6 A1    H12   1      2.73  1.54 0.563  14.9  47.5  28.2  15.5  20.1  161.
#> # ℹ 146 more rows
#> # ℹ 5 more variables: NR <dbl>, CDED <dbl>, PERK <dbl>, TKW <dbl>, NKE <dbl>
reorder_cols(data_ge2, where(is.factor), .after = last_col())
#> # A tibble: 156 × 18
#>       PH    EH    EP    EL    ED    CL    CD    CW    KW    NR   NKR  CDED  PERK
#>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1  2.61  1.71 0.658  16.1  52.2  28.1  16.3  25.1  217.  15.6  36.6 0.538  89.6
#>  2  2.87  1.76 0.628  14.2  50.3  27.6  14.5  21.4  184.  16    31.4 0.551  89.5
#>  3  2.68  1.58 0.591  16.0  50.7  28.4  16.4  24.0  208.  17.2  31.8 0.561  89.7
#>  4  2.83  1.64 0.581  16.7  54.1  31.7  17.4  26.2  194.  15.6  32.8 0.586  87.9
#>  5  2.79  1.71 0.616  14.9  52.7  32.0  15.5  20.7  176.  17.6  28   0.607  89.7
#>  6  2.72  1.51 0.554  16.7  52.7  30.4  17.5  26.8  207.  16.8  32.8 0.577  88.5
#>  7  2.75  1.51 0.549  17.4  51.7  30.6  18.0  26.2  217.  16.8  34.6 0.594  89.1
#>  8  2.72  1.56 0.573  16.7  47.2  28.7  17.2  24.1  181.  13.6  34.4 0.608  88.3
#>  9  2.77  1.67 0.600  15.8  47.9  27.6  16.4  20.5  166.  15.2  34.8 0.576  89.0
#> 10  2.73  1.54 0.563  14.9  47.5  28.2  15.5  20.1  161.  14.8  31.6 0.597  88.7
#> # ℹ 146 more rows
#> # ℹ 5 more variables: TKW <dbl>, NKE <dbl>, ENV <fct>, GEN <fct>, REP <fct>

######## Selecting and removing columns ##########
select_cols(data_ge2, GEN, REP)
#> # A tibble: 156 × 2
#>    GEN   REP  
#>    <fct> <fct>
#>  1 H1    1    
#>  2 H1    2    
#>  3 H1    3    
#>  4 H10   1    
#>  5 H10   2    
#>  6 H10   3    
#>  7 H11   1    
#>  8 H11   2    
#>  9 H11   3    
#> 10 H12   1    
#> # ℹ 146 more rows
remove_cols(data_ge2, GEN, REP)
#> # A tibble: 156 × 16
#>    ENV      PH    EH    EP    EL    ED    CL    CD    CW    KW    NR   NKR  CDED
#>    <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1 A1     2.61  1.71 0.658  16.1  52.2  28.1  16.3  25.1  217.  15.6  36.6 0.538
#>  2 A1     2.87  1.76 0.628  14.2  50.3  27.6  14.5  21.4  184.  16    31.4 0.551
#>  3 A1     2.68  1.58 0.591  16.0  50.7  28.4  16.4  24.0  208.  17.2  31.8 0.561
#>  4 A1     2.83  1.64 0.581  16.7  54.1  31.7  17.4  26.2  194.  15.6  32.8 0.586
#>  5 A1     2.79  1.71 0.616  14.9  52.7  32.0  15.5  20.7  176.  17.6  28   0.607
#>  6 A1     2.72  1.51 0.554  16.7  52.7  30.4  17.5  26.8  207.  16.8  32.8 0.577
#>  7 A1     2.75  1.51 0.549  17.4  51.7  30.6  18.0  26.2  217.  16.8  34.6 0.594
#>  8 A1     2.72  1.56 0.573  16.7  47.2  28.7  17.2  24.1  181.  13.6  34.4 0.608
#>  9 A1     2.77  1.67 0.600  15.8  47.9  27.6  16.4  20.5  166.  15.2  34.8 0.576
#> 10 A1     2.73  1.54 0.563  14.9  47.5  28.2  15.5  20.1  161.  14.8  31.6 0.597
#> # ℹ 146 more rows
#> # ℹ 3 more variables: PERK <dbl>, TKW <dbl>, NKE <dbl>

########## Selecting and removing rows ###########
select_rows(data_ge2, 2:3)
#> # A tibble: 2 × 18
#>   ENV   GEN   REP      PH    EH    EP    EL    ED    CL    CD    CW    KW    NR
#>   <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 A1    H1    2      2.87  1.76 0.628  14.2  50.3  27.6  14.5  21.4  184.  16  
#> 2 A1    H1    3      2.68  1.58 0.591  16.0  50.7  28.4  16.4  24.0  208.  17.2
#> # ℹ 5 more variables: NKR <dbl>, CDED <dbl>, PERK <dbl>, TKW <dbl>, NKE <dbl>
remove_rows(data_ge2, 2:3)
#> # A tibble: 154 × 18
#>    ENV   GEN   REP      PH    EH    EP    EL    ED    CL    CD    CW    KW    NR
#>    <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1 A1    H1    1      2.61  1.71 0.658  16.1  52.2  28.1  16.3  25.1  217.  15.6
#>  2 A1    H10   1      2.83  1.64 0.581  16.7  54.1  31.7  17.4  26.2  194.  15.6
#>  3 A1    H10   2      2.79  1.71 0.616  14.9  52.7  32.0  15.5  20.7  176.  17.6
#>  4 A1    H10   3      2.72  1.51 0.554  16.7  52.7  30.4  17.5  26.8  207.  16.8
#>  5 A1    H11   1      2.75  1.51 0.549  17.4  51.7  30.6  18.0  26.2  217.  16.8
#>  6 A1    H11   2      2.72  1.56 0.573  16.7  47.2  28.7  17.2  24.1  181.  13.6
#>  7 A1    H11   3      2.77  1.67 0.600  15.8  47.9  27.6  16.4  20.5  166.  15.2
#>  8 A1    H12   1      2.73  1.54 0.563  14.9  47.5  28.2  15.5  20.1  161.  14.8
#>  9 A1    H12   2      2.56  1.56 0.616  15.7  49.9  29.9  16.2  24.0  188.  17.2
#> 10 A1    H12   3      2.79  1.53 0.546  15.0  52.7  31.4  15.2  32.9  193.  20  
#> # ℹ 144 more rows
#> # ℹ 5 more variables: NKR <dbl>, CDED <dbl>, PERK <dbl>, TKW <dbl>, NKE <dbl>

########### Concatenating columns ################
concatenate(data_ge, ENV, GEN, REP)
#> # A tibble: 420 × 6
#>    ENV   GEN   REP      GY    HM new_var
#>    <fct> <fct> <fct> <dbl> <dbl> <chr>  
#>  1 E1    G1    1      2.17  44.9 E1_G1_1
#>  2 E1    G1    2      2.50  46.9 E1_G1_2
#>  3 E1    G1    3      2.43  47.8 E1_G1_3
#>  4 E1    G2    1      3.21  45.2 E1_G2_1
#>  5 E1    G2    2      2.93  45.3 E1_G2_2
#>  6 E1    G2    3      2.56  45.5 E1_G2_3
#>  7 E1    G3    1      2.77  46.7 E1_G3_1
#>  8 E1    G3    2      3.62  43.2 E1_G3_2
#>  9 E1    G3    3      2.28  47.8 E1_G3_3
#> 10 E1    G4    1      2.36  47.9 E1_G4_1
#> # ℹ 410 more rows
concatenate(data_ge, ENV, GEN, REP, drop = TRUE)
#> # A tibble: 420 × 1
#>    new_var
#>    <chr>  
#>  1 E1_G1_1
#>  2 E1_G1_2
#>  3 E1_G1_3
#>  4 E1_G2_1
#>  5 E1_G2_2
#>  6 E1_G2_3
#>  7 E1_G3_1
#>  8 E1_G3_2
#>  9 E1_G3_3
#> 10 E1_G4_1
#> # ℹ 410 more rows

# Combine with add_cols() and replace_string()
data_ge2 %>%
 add_cols(ENV_GEN = concatenate(., ENV, GEN, pull = TRUE),
          .after = GEN) %>%
 replace_string(ENV_GEN,
                pattern = "H",
                replacement = "HYB_")
#> # A tibble: 156 × 19
#>    ENV   GEN   ENV_GEN   REP      PH    EH    EP    EL    ED    CL    CD    CW
#>    <fct> <fct> <chr>     <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1 A1    H1    A1_HYB_1  1      2.61  1.71 0.658  16.1  52.2  28.1  16.3  25.1
#>  2 A1    H1    A1_HYB_1  2      2.87  1.76 0.628  14.2  50.3  27.6  14.5  21.4
#>  3 A1    H1    A1_HYB_1  3      2.68  1.58 0.591  16.0  50.7  28.4  16.4  24.0
#>  4 A1    H10   A1_HYB_10 1      2.83  1.64 0.581  16.7  54.1  31.7  17.4  26.2
#>  5 A1    H10   A1_HYB_10 2      2.79  1.71 0.616  14.9  52.7  32.0  15.5  20.7
#>  6 A1    H10   A1_HYB_10 3      2.72  1.51 0.554  16.7  52.7  30.4  17.5  26.8
#>  7 A1    H11   A1_HYB_11 1      2.75  1.51 0.549  17.4  51.7  30.6  18.0  26.2
#>  8 A1    H11   A1_HYB_11 2      2.72  1.56 0.573  16.7  47.2  28.7  17.2  24.1
#>  9 A1    H11   A1_HYB_11 3      2.77  1.67 0.600  15.8  47.9  27.6  16.4  20.5
#> 10 A1    H12   A1_HYB_12 1      2.73  1.54 0.563  14.9  47.5  28.2  15.5  20.1
#> # ℹ 146 more rows
#> # ℹ 7 more variables: KW <dbl>, NR <dbl>, NKR <dbl>, CDED <dbl>, PERK <dbl>,
#> #   TKW <dbl>, NKE <dbl>

# Use prefixes and suffixes
concatenate(data_ge2, REP, prefix = "REP", new_var = REP)
#> # A tibble: 156 × 18
#>    ENV   GEN   REP      PH    EH    EP    EL    ED    CL    CD    CW    KW    NR
#>    <fct> <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1 A1    H1    REP_1  2.61  1.71 0.658  16.1  52.2  28.1  16.3  25.1  217.  15.6
#>  2 A1    H1    REP_2  2.87  1.76 0.628  14.2  50.3  27.6  14.5  21.4  184.  16  
#>  3 A1    H1    REP_3  2.68  1.58 0.591  16.0  50.7  28.4  16.4  24.0  208.  17.2
#>  4 A1    H10   REP_1  2.83  1.64 0.581  16.7  54.1  31.7  17.4  26.2  194.  15.6
#>  5 A1    H10   REP_2  2.79  1.71 0.616  14.9  52.7  32.0  15.5  20.7  176.  17.6
#>  6 A1    H10   REP_3  2.72  1.51 0.554  16.7  52.7  30.4  17.5  26.8  207.  16.8
#>  7 A1    H11   REP_1  2.75  1.51 0.549  17.4  51.7  30.6  18.0  26.2  217.  16.8
#>  8 A1    H11   REP_2  2.72  1.56 0.573  16.7  47.2  28.7  17.2  24.1  181.  13.6
#>  9 A1    H11   REP_3  2.77  1.67 0.600  15.8  47.9  27.6  16.4  20.5  166.  15.2
#> 10 A1    H12   REP_1  2.73  1.54 0.563  14.9  47.5  28.2  15.5  20.1  161.  14.8
#> # ℹ 146 more rows
#> # ℹ 5 more variables: NKR <dbl>, CDED <dbl>, PERK <dbl>, TKW <dbl>, NKE <dbl>

# Use prefixes and suffixes (the ear traits EH, EP, EL, and ED)
add_prefix(data_ge2, PH, EH, EP, EL, prefix = "EAR")
#> # A tibble: 156 × 18
#>    ENV   GEN   REP   EAR_PH EAR_EH EAR_EP EAR_EL    ED    CL    CD    CW    KW
#>    <fct> <fct> <fct>  <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1 A1    H1    1       2.61   1.71  0.658   16.1  52.2  28.1  16.3  25.1  217.
#>  2 A1    H1    2       2.87   1.76  0.628   14.2  50.3  27.6  14.5  21.4  184.
#>  3 A1    H1    3       2.68   1.58  0.591   16.0  50.7  28.4  16.4  24.0  208.
#>  4 A1    H10   1       2.83   1.64  0.581   16.7  54.1  31.7  17.4  26.2  194.
#>  5 A1    H10   2       2.79   1.71  0.616   14.9  52.7  32.0  15.5  20.7  176.
#>  6 A1    H10   3       2.72   1.51  0.554   16.7  52.7  30.4  17.5  26.8  207.
#>  7 A1    H11   1       2.75   1.51  0.549   17.4  51.7  30.6  18.0  26.2  217.
#>  8 A1    H11   2       2.72   1.56  0.573   16.7  47.2  28.7  17.2  24.1  181.
#>  9 A1    H11   3       2.77   1.67  0.600   15.8  47.9  27.6  16.4  20.5  166.
#> 10 A1    H12   1       2.73   1.54  0.563   14.9  47.5  28.2  15.5  20.1  161.
#> # ℹ 146 more rows
#> # ℹ 6 more variables: NR <dbl>, NKR <dbl>, CDED <dbl>, PERK <dbl>, TKW <dbl>,
#> #   NKE <dbl>
add_suffix(data_ge2, PH, EH, EP, EL, suffix = "EAR", sep = ".")
#> # A tibble: 156 × 18
#>    ENV   GEN   REP   PH.EAR EH.EAR EP.EAR EL.EAR    ED    CL    CD    CW    KW
#>    <fct> <fct> <fct>  <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1 A1    H1    1       2.61   1.71  0.658   16.1  52.2  28.1  16.3  25.1  217.
#>  2 A1    H1    2       2.87   1.76  0.628   14.2  50.3  27.6  14.5  21.4  184.
#>  3 A1    H1    3       2.68   1.58  0.591   16.0  50.7  28.4  16.4  24.0  208.
#>  4 A1    H10   1       2.83   1.64  0.581   16.7  54.1  31.7  17.4  26.2  194.
#>  5 A1    H10   2       2.79   1.71  0.616   14.9  52.7  32.0  15.5  20.7  176.
#>  6 A1    H10   3       2.72   1.51  0.554   16.7  52.7  30.4  17.5  26.8  207.
#>  7 A1    H11   1       2.75   1.51  0.549   17.4  51.7  30.6  18.0  26.2  217.
#>  8 A1    H11   2       2.72   1.56  0.573   16.7  47.2  28.7  17.2  24.1  181.
#>  9 A1    H11   3       2.77   1.67  0.600   15.8  47.9  27.6  16.4  20.5  166.
#> 10 A1    H12   1       2.73   1.54  0.563   14.9  47.5  28.2  15.5  20.1  161.
#> # ℹ 146 more rows
#> # ℹ 6 more variables: NR <dbl>, NKR <dbl>, CDED <dbl>, PERK <dbl>, TKW <dbl>,
#> #   NKE <dbl>

# Use prefixes and suffixes (colnames)
concatenate(data_ge2, REP, prefix = "REP", new_var = REP)
#> # A tibble: 156 × 18
#>    ENV   GEN   REP      PH    EH    EP    EL    ED    CL    CD    CW    KW    NR
#>    <fct> <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1 A1    H1    REP_1  2.61  1.71 0.658  16.1  52.2  28.1  16.3  25.1  217.  15.6
#>  2 A1    H1    REP_2  2.87  1.76 0.628  14.2  50.3  27.6  14.5  21.4  184.  16  
#>  3 A1    H1    REP_3  2.68  1.58 0.591  16.0  50.7  28.4  16.4  24.0  208.  17.2
#>  4 A1    H10   REP_1  2.83  1.64 0.581  16.7  54.1  31.7  17.4  26.2  194.  15.6
#>  5 A1    H10   REP_2  2.79  1.71 0.616  14.9  52.7  32.0  15.5  20.7  176.  17.6
#>  6 A1    H10   REP_3  2.72  1.51 0.554  16.7  52.7  30.4  17.5  26.8  207.  16.8
#>  7 A1    H11   REP_1  2.75  1.51 0.549  17.4  51.7  30.6  18.0  26.2  217.  16.8
#>  8 A1    H11   REP_2  2.72  1.56 0.573  16.7  47.2  28.7  17.2  24.1  181.  13.6
#>  9 A1    H11   REP_3  2.77  1.67 0.600  15.8  47.9  27.6  16.4  20.5  166.  15.2
#> 10 A1    H12   REP_1  2.73  1.54 0.563  14.9  47.5  28.2  15.5  20.1  161.  14.8
#> # ℹ 146 more rows
#> # ℹ 5 more variables: NKR <dbl>, CDED <dbl>, PERK <dbl>, TKW <dbl>, NKE <dbl>


########### formating column names ###############
# Creating data with messy column names
df <- head(data_ge, 3)
colnames(df) <- c("Env", "gen", "Rep", "GY", "hm")
df
#> # A tibble: 3 × 5
#>   Env   gen   Rep      GY    hm
#>   <fct> <fct> <fct> <dbl> <dbl>
#> 1 E1    G1    1      2.17  44.9
#> 2 E1    G1    2      2.50  46.9
#> 3 E1    G1    3      2.43  47.8
colnames_to_lower(df)
#> # A tibble: 3 × 5
#>   env   gen   rep      gy    hm
#>   <fct> <fct> <fct> <dbl> <dbl>
#> 1 E1    G1    1      2.17  44.9
#> 2 E1    G1    2      2.50  46.9
#> 3 E1    G1    3      2.43  47.8
colnames_to_upper(df)
#> # A tibble: 3 × 5
#>   ENV   GEN   REP      GY    HM
#>   <fct> <fct> <fct> <dbl> <dbl>
#> 1 E1    G1    1      2.17  44.9
#> 2 E1    G1    2      2.50  46.9
#> 3 E1    G1    3      2.43  47.8
colnames_to_title(df)
#> # A tibble: 3 × 5
#>   Env   Gen   Rep      Gy    Hm
#>   <fct> <fct> <fct> <dbl> <dbl>
#> 1 E1    G1    1      2.17  44.9
#> 2 E1    G1    2      2.50  46.9
#> 3 E1    G1    3      2.43  47.8


################### Adding rows ##################
data_ge %>%
  add_rows(GY = 10.3,
           HM = 100.11,
           .after = 1)
#> # A tibble: 421 × 5
#>    ENV   GEN   REP      GY    HM
#>    <fct> <fct> <fct> <dbl> <dbl>
#>  1 E1    G1    1      2.17  44.9
#>  2 NA    NA    NA    10.3  100. 
#>  3 E1    G1    2      2.50  46.9
#>  4 E1    G1    3      2.43  47.8
#>  5 E1    G2    1      3.21  45.2
#>  6 E1    G2    2      2.93  45.3
#>  7 E1    G2    3      2.56  45.5
#>  8 E1    G3    1      2.77  46.7
#>  9 E1    G3    2      3.62  43.2
#> 10 E1    G3    3      2.28  47.8
#> # ℹ 411 more rows

########## checking if a column exists ###########
column_exists(data_g, "GEN")
#> [1] TRUE

####### get the levels, level combinations and size of levels ########
get_levels(data_g, GEN)
#>  [1] "H1"  "H10" "H11" "H12" "H13" "H2"  "H3"  "H4"  "H5"  "H6"  "H7"  "H8" 
#> [13] "H9" 
get_levels_comb(data_ge, ENV, GEN)
#> # A tibble: 140 × 2
#>    ENV   GEN  
#>    <fct> <fct>
#>  1 E1    G1   
#>  2 E10   G1   
#>  3 E11   G1   
#>  4 E12   G1   
#>  5 E13   G1   
#>  6 E14   G1   
#>  7 E2    G1   
#>  8 E3    G1   
#>  9 E4    G1   
#> 10 E5    G1   
#> # ℹ 130 more rows
get_level_size(data_g, GEN)
#> # A tibble: 13 × 17
#>    GEN     REP    PH    EH    EP    EL    ED    CL    CD    CW    KW    NR   NKR
#>    <fct> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
#>  1 H1        3     3     3     3     3     3     3     3     3     3     3     3
#>  2 H10       3     3     3     3     3     3     3     3     3     3     3     3
#>  3 H11       3     3     3     3     3     3     3     3     3     3     3     3
#>  4 H12       3     3     3     3     3     3     3     3     3     3     3     3
#>  5 H13       3     3     3     3     3     3     3     3     3     3     3     3
#>  6 H2        3     3     3     3     3     3     3     3     3     3     3     3
#>  7 H3        3     3     3     3     3     3     3     3     3     3     3     3
#>  8 H4        3     3     3     3     3     3     3     3     3     3     3     3
#>  9 H5        3     3     3     3     3     3     3     3     3     3     3     3
#> 10 H6        3     3     3     3     3     3     3     3     3     3     3     3
#> 11 H7        3     3     3     3     3     3     3     3     3     3     3     3
#> 12 H8        3     3     3     3     3     3     3     3     3     3     3     3
#> 13 H9        3     3     3     3     3     3     3     3     3     3     3     3
#> # ℹ 4 more variables: CDED <int>, PERK <int>, TKW <int>, NKE <int>

############## all possible pairs ################
all_pairs(data_g, GEN)
#>     V1  V2
#> 1   H1 H10
#> 2   H1 H11
#> 3   H1 H12
#> 4   H1 H13
#> 5   H1  H2
#> 6   H1  H3
#> 7   H1  H4
#> 8   H1  H5
#> 9   H1  H6
#> 10  H1  H7
#> 11  H1  H8
#> 12  H1  H9
#> 13 H10 H11
#> 14 H10 H12
#> 15 H10 H13
#> 16 H10  H2
#> 17 H10  H3
#> 18 H10  H4
#> 19 H10  H5
#> 20 H10  H6
#> 21 H10  H7
#> 22 H10  H8
#> 23 H10  H9
#> 24 H11 H12
#> 25 H11 H13
#> 26 H11  H2
#> 27 H11  H3
#> 28 H11  H4
#> 29 H11  H5
#> 30 H11  H6
#> 31 H11  H7
#> 32 H11  H8
#> 33 H11  H9
#> 34 H12 H13
#> 35 H12  H2
#> 36 H12  H3
#> 37 H12  H4
#> 38 H12  H5
#> 39 H12  H6
#> 40 H12  H7
#> 41 H12  H8
#> 42 H12  H9
#> 43 H13  H2
#> 44 H13  H3
#> 45 H13  H4
#> 46 H13  H5
#> 47 H13  H6
#> 48 H13  H7
#> 49 H13  H8
#> 50 H13  H9
#> 51  H2  H3
#> 52  H2  H4
#> 53  H2  H5
#> 54  H2  H6
#> 55  H2  H7
#> 56  H2  H8
#> 57  H2  H9
#> 58  H3  H4
#> 59  H3  H5
#> 60  H3  H6
#> 61  H3  H7
#> 62  H3  H8
#> 63  H3  H9
#> 64  H4  H5
#> 65  H4  H6
#> 66  H4  H7
#> 67  H4  H8
#> 68  H4  H9
#> 69  H5  H6
#> 70  H5  H7
#> 71  H5  H8
#> 72  H5  H9
#> 73  H6  H7
#> 74  H6  H8
#> 75  H6  H9
#> 76  H7  H8
#> 77  H7  H9
#> 78  H8  H9

########## select numeric variables only #########
select_numeric_cols(data_g)
#> # A tibble: 39 × 15
#>       PH    EH    EP    EL    ED    CL    CD    CW    KW    NR   NKR  CDED  PERK
#>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1  2.11 1.05  0.497  15.7  49.9  30.5  16.6  28.6  164.  15.6  31.2 0.612  85.1
#>  2  2.20 1.09  0.492  13.7  49.2  30.5  14.7  22.3  130.  16.4  24.8 0.619  85.2
#>  3  2.29 1.15  0.502  15.1  52.6  31.7  16.2  29.6  176.  15.6  29.2 0.603  85.9
#>  4  1.79 0.888 0.514  13.9  44.1  26.2  15.0  12.9  116.  14.8  33   0.596  89.8
#>  5  2.05 1.03  0.504  13.6  43.9  23.5  14.4  11.5  118.  16    32.4 0.535  91.1
#>  6  2.27 1.11  0.491  14.5  43.7  24.6  16.1  12.5  128.  15.2  34.6 0.566  90.7
#>  7  1.71 0.808 0.489  15.5  45.2  25.0  16.7  15.2  140.  15.6  36   0.552  90.3
#>  8  2.09 1.06  0.509  12.2  46.9  26.5  14.3  13.5  114.  16.8  26.2 0.566  89.3
#>  9  2.5  1.44  0.577  15.0  49.0  27.5  15.2  19.4  168.  16.4  35   0.562  89.6
#> 10  2.52 1.52  0.601  14.4  49.2  28.4  15    18.2  153.  16.4  32   0.577  89.2
#> # ℹ 29 more rows
#> # ℹ 2 more variables: TKW <dbl>, NKE <dbl>
select_non_numeric_cols(data_g)
#> # A tibble: 39 × 2
#>    GEN   REP  
#>    <fct> <fct>
#>  1 H1    1    
#>  2 H1    2    
#>  3 H1    3    
#>  4 H10   1    
#>  5 H10   2    
#>  6 H10   3    
#>  7 H11   1    
#>  8 H11   2    
#>  9 H11   3    
#> 10 H12   1    
#> # ℹ 29 more rows
# }