add_cols()
: Add one or more columns to an existing data frame. If specified.before
or.after
columns does not exist, columns are appended at the end of the data. Return a data frame with all the original columns in.data
plus the columns declared in...
. Inadd_cols()
columns in.data
are available for the expressions. So, it is possible to add a column based on existing data.add_rows()
: Add one or more rows to an existing data frame. If specified.before
or.after
rows does not exist, rows are appended at the end of the data. Return a data frame with all the original rows in.data
plus the rows declared in...
argument.add_row_id()
: Add a column with the row id as the first column in.data
.add_prefix()
andadd_suffix()
add prefixes and suffixes, respectively, in variable names selected in...
argument.all_pairs()
: Get all the possible pairs between the levels of a factor.colnames_to_lower()
: Translate all column names to lower case.colnames_to_upper()
: Translate all column names to upper case.colnames_to_title()
: Translate all column names to title case.column_exists()
: Checks if a column exists in a data frame. Return a logical value.columns_to_first()
: Move columns to first positions in.data
.columns_to_last()
: Move columns to last positions in.data
.columns_to_rownames()
: Move a column of.data
to its row names.rownames_to_column()
: Move the row names of.data
to a new column.remove_rownames()
: Remove the row names of.data
.concatenate()
: Concatenate columns of a data frame. Ifdrop = TRUE
then the existing variables are dropped. Ifpull = TRUE
then the concatenated variable is pull out to a vector. This is specially useful when usingconcatenate
to add columns to a data frame withadd_cols()
.get_levels()
: Get the levels of a factor variable.get_levels_comb()
: Get the combination of the levels of a factor.get_level_size()
: Get the size of each level of a factor variable.remove_cols()
: Remove one or more columns from a data frame.remove_rows()
: Remove one or more rows from a data frame.reorder_cols()
: Reorder columns in a data frame.select_cols()
: Select one or more columns from a data frame.select_first_col()
: Select first variable, possibly with an offset.select_last_col()
: Select last variable, possibly with an offset.select_numeric_cols()
: Select all the numeric columns of a data frame.select_non_numeric_cols()
: Select all the non-numeric columns of a data frame.select_rows()
: Select one or more rows from a data frame.tidy_colnames()
: Tidy up column names withtidy_strings()
.
Usage
add_cols(.data, ..., .before = NULL, .after = NULL)
add_rows(.data, ..., .before = NULL, .after = NULL)
add_row_id(.data, var = "row_id")
all_pairs(.data, levels)
add_prefix(.data, ..., prefix, sep = "_")
add_suffix(.data, ..., suffix, sep = "_")
colnames_to_lower(.data)
colnames_to_upper(.data)
colnames_to_title(.data)
column_to_first(.data, ...)
column_to_last(.data, ...)
column_to_rownames(.data, var = "rowname")
rownames_to_column(.data, var = "rowname")
remove_rownames(.data, ...)
column_exists(.data, cols)
concatenate(
.data,
...,
prefix = NULL,
suffix = NULL,
new_var = new_var,
sep = "_",
drop = FALSE,
pull = FALSE,
.before = NULL,
.after = NULL
)
get_levels(.data, ...)
get_levels_comb(.data, ...)
get_level_size(.data, ...)
reorder_cols(.data, ..., .before = NULL, .after = NULL)
remove_cols(.data, ...)
remove_rows(.data, ...)
select_first_col(.data, offset = NULL)
select_last_col(.data, offset = NULL)
select_numeric_cols(.data)
select_non_numeric_cols(.data)
select_cols(.data, ...)
select_rows(.data, ...)
tidy_colnames(.data, sep = "_")
Arguments
- .data
A data frame
- ...
The argument depends on the function used.
For
add_cols()
andadd_rows()
is name-value pairs. All values must have one element for each row in.data
when usingadd_cols()
or one element for each column in.data
when usingadd_rows()
. Values of length 1 will be recycled when usingadd_cols()
.For
remove_cols()
andselect_cols()
,...
is the column name or column index of the variable(s) to be dropped.For
add_prefix()
andadd_suffix()
,...
is the column name to add the prefix or suffix, respectively. Select helpers are allowed.For
columns_to_first()
andcolumns_to_last()
,...
is the column name or column index of the variable(s) to be moved to first or last in.data
.For
remove_rows()
andselect_rows()
,...
is an integer row value.For
concatenate()
,...
is the unquoted variable names to be concatenated.For
get_levels()
,get_level_comb()
, andget_level_size()
...
is the unquoted variable names to get the levels, levels combinations and levels size, respectively.
- .before, .after
For
add_cols()
,concatenate()
, andreorder_cols()
, one-based column index or column name where to add the new columns, default: .after last column. Foradd_rows()
, one-based row index where to add the new rows, default: .after last row.- var
Name of column to use for rownames.
- levels
The levels of a factor or a numeric vector.
- prefix, suffix
The prefix and suffix used in
add_prefix()
andadd_suffix()
, respectively.- sep
The separator to appear when using
concatenate()
,add_prefix()
, oradd_suffix()
. Defaults to to"_"
.- cols
A quoted variable name to check if it exists in
.data
.- new_var
The name of the new variable containing the concatenated values. Defaults to
new_var
.- drop
Logical argument. If
TRUE
keeps the new variablenew_var
and drops the existing ones. Defaults toFALSE
.- pull
Logical argument. If
TRUE
, returns the last column (on the assumption that's the column you've created most recently), as a vector.- offset
Set it to n to select the nth variable from the end (for
select_last_col()
) of from the begin (forselect_first_col()
)
Author
Tiago Olivoto tiagoolivoto@gmail.com
Examples
# \donttest{
library(metan)
################# Adding columns #################
# Variables x and y .after last column
data_ge %>%
add_cols(x = 10,
y = 30)
#> # A tibble: 420 × 7
#> ENV GEN REP GY HM x y
#> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl>
#> 1 E1 G1 1 2.17 44.9 10 30
#> 2 E1 G1 2 2.50 46.9 10 30
#> 3 E1 G1 3 2.43 47.8 10 30
#> 4 E1 G2 1 3.21 45.2 10 30
#> 5 E1 G2 2 2.93 45.3 10 30
#> 6 E1 G2 3 2.56 45.5 10 30
#> 7 E1 G3 1 2.77 46.7 10 30
#> 8 E1 G3 2 3.62 43.2 10 30
#> 9 E1 G3 3 2.28 47.8 10 30
#> 10 E1 G4 1 2.36 47.9 10 30
#> # ℹ 410 more rows
# Variables x and y .before the variable GEN
data_ge %>%
add_cols(x = 10,
y = 30,
.before = GEN)
#> # A tibble: 420 × 7
#> ENV x y GEN REP GY HM
#> <fct> <dbl> <dbl> <fct> <fct> <dbl> <dbl>
#> 1 E1 10 30 G1 1 2.17 44.9
#> 2 E1 10 30 G1 2 2.50 46.9
#> 3 E1 10 30 G1 3 2.43 47.8
#> 4 E1 10 30 G2 1 3.21 45.2
#> 5 E1 10 30 G2 2 2.93 45.3
#> 6 E1 10 30 G2 3 2.56 45.5
#> 7 E1 10 30 G3 1 2.77 46.7
#> 8 E1 10 30 G3 2 3.62 43.2
#> 9 E1 10 30 G3 3 2.28 47.8
#> 10 E1 10 30 G4 1 2.36 47.9
#> # ℹ 410 more rows
# Creating a new variable based on the existing ones.
data_ge %>%
add_cols(GY2 = GY^2,
GY2_HM = GY2 + HM,
.after = GY)
#> # A tibble: 420 × 7
#> ENV GEN REP GY GY2 GY2_HM HM
#> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl>
#> 1 E1 G1 1 2.17 4.70 49.6 44.9
#> 2 E1 G1 2 2.50 6.27 53.2 46.9
#> 3 E1 G1 3 2.43 5.89 53.6 47.8
#> 4 E1 G2 1 3.21 10.3 55.5 45.2
#> 5 E1 G2 2 2.93 8.60 53.9 45.3
#> 6 E1 G2 3 2.56 6.58 52.1 45.5
#> 7 E1 G3 1 2.77 7.67 54.4 46.7
#> 8 E1 G3 2 3.62 13.1 56.3 43.2
#> 9 E1 G3 3 2.28 5.18 52.9 47.8
#> 10 E1 G4 1 2.36 5.57 53.5 47.9
#> # ℹ 410 more rows
############### Reordering columns ###############
reorder_cols(data_ge2, NKR, .before = ENV)
#> # A tibble: 156 × 18
#> NKR ENV GEN REP PH EH EP EL ED CL CD CW KW
#> <dbl> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 36.6 A1 H1 1 2.61 1.71 0.658 16.1 52.2 28.1 16.3 25.1 217.
#> 2 31.4 A1 H1 2 2.87 1.76 0.628 14.2 50.3 27.6 14.5 21.4 184.
#> 3 31.8 A1 H1 3 2.68 1.58 0.591 16.0 50.7 28.4 16.4 24.0 208.
#> 4 32.8 A1 H10 1 2.83 1.64 0.581 16.7 54.1 31.7 17.4 26.2 194.
#> 5 28 A1 H10 2 2.79 1.71 0.616 14.9 52.7 32.0 15.5 20.7 176.
#> 6 32.8 A1 H10 3 2.72 1.51 0.554 16.7 52.7 30.4 17.5 26.8 207.
#> 7 34.6 A1 H11 1 2.75 1.51 0.549 17.4 51.7 30.6 18.0 26.2 217.
#> 8 34.4 A1 H11 2 2.72 1.56 0.573 16.7 47.2 28.7 17.2 24.1 181.
#> 9 34.8 A1 H11 3 2.77 1.67 0.600 15.8 47.9 27.6 16.4 20.5 166.
#> 10 31.6 A1 H12 1 2.73 1.54 0.563 14.9 47.5 28.2 15.5 20.1 161.
#> # ℹ 146 more rows
#> # ℹ 5 more variables: NR <dbl>, CDED <dbl>, PERK <dbl>, TKW <dbl>, NKE <dbl>
reorder_cols(data_ge2, where(is.factor), .after = last_col())
#> # A tibble: 156 × 18
#> PH EH EP EL ED CL CD CW KW NR NKR CDED PERK
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 2.61 1.71 0.658 16.1 52.2 28.1 16.3 25.1 217. 15.6 36.6 0.538 89.6
#> 2 2.87 1.76 0.628 14.2 50.3 27.6 14.5 21.4 184. 16 31.4 0.551 89.5
#> 3 2.68 1.58 0.591 16.0 50.7 28.4 16.4 24.0 208. 17.2 31.8 0.561 89.7
#> 4 2.83 1.64 0.581 16.7 54.1 31.7 17.4 26.2 194. 15.6 32.8 0.586 87.9
#> 5 2.79 1.71 0.616 14.9 52.7 32.0 15.5 20.7 176. 17.6 28 0.607 89.7
#> 6 2.72 1.51 0.554 16.7 52.7 30.4 17.5 26.8 207. 16.8 32.8 0.577 88.5
#> 7 2.75 1.51 0.549 17.4 51.7 30.6 18.0 26.2 217. 16.8 34.6 0.594 89.1
#> 8 2.72 1.56 0.573 16.7 47.2 28.7 17.2 24.1 181. 13.6 34.4 0.608 88.3
#> 9 2.77 1.67 0.600 15.8 47.9 27.6 16.4 20.5 166. 15.2 34.8 0.576 89.0
#> 10 2.73 1.54 0.563 14.9 47.5 28.2 15.5 20.1 161. 14.8 31.6 0.597 88.7
#> # ℹ 146 more rows
#> # ℹ 5 more variables: TKW <dbl>, NKE <dbl>, ENV <fct>, GEN <fct>, REP <fct>
######## Selecting and removing columns ##########
select_cols(data_ge2, GEN, REP)
#> # A tibble: 156 × 2
#> GEN REP
#> <fct> <fct>
#> 1 H1 1
#> 2 H1 2
#> 3 H1 3
#> 4 H10 1
#> 5 H10 2
#> 6 H10 3
#> 7 H11 1
#> 8 H11 2
#> 9 H11 3
#> 10 H12 1
#> # ℹ 146 more rows
remove_cols(data_ge2, GEN, REP)
#> # A tibble: 156 × 16
#> ENV PH EH EP EL ED CL CD CW KW NR NKR CDED
#> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 A1 2.61 1.71 0.658 16.1 52.2 28.1 16.3 25.1 217. 15.6 36.6 0.538
#> 2 A1 2.87 1.76 0.628 14.2 50.3 27.6 14.5 21.4 184. 16 31.4 0.551
#> 3 A1 2.68 1.58 0.591 16.0 50.7 28.4 16.4 24.0 208. 17.2 31.8 0.561
#> 4 A1 2.83 1.64 0.581 16.7 54.1 31.7 17.4 26.2 194. 15.6 32.8 0.586
#> 5 A1 2.79 1.71 0.616 14.9 52.7 32.0 15.5 20.7 176. 17.6 28 0.607
#> 6 A1 2.72 1.51 0.554 16.7 52.7 30.4 17.5 26.8 207. 16.8 32.8 0.577
#> 7 A1 2.75 1.51 0.549 17.4 51.7 30.6 18.0 26.2 217. 16.8 34.6 0.594
#> 8 A1 2.72 1.56 0.573 16.7 47.2 28.7 17.2 24.1 181. 13.6 34.4 0.608
#> 9 A1 2.77 1.67 0.600 15.8 47.9 27.6 16.4 20.5 166. 15.2 34.8 0.576
#> 10 A1 2.73 1.54 0.563 14.9 47.5 28.2 15.5 20.1 161. 14.8 31.6 0.597
#> # ℹ 146 more rows
#> # ℹ 3 more variables: PERK <dbl>, TKW <dbl>, NKE <dbl>
########## Selecting and removing rows ###########
select_rows(data_ge2, 2:3)
#> # A tibble: 2 × 18
#> ENV GEN REP PH EH EP EL ED CL CD CW KW NR
#> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 A1 H1 2 2.87 1.76 0.628 14.2 50.3 27.6 14.5 21.4 184. 16
#> 2 A1 H1 3 2.68 1.58 0.591 16.0 50.7 28.4 16.4 24.0 208. 17.2
#> # ℹ 5 more variables: NKR <dbl>, CDED <dbl>, PERK <dbl>, TKW <dbl>, NKE <dbl>
remove_rows(data_ge2, 2:3)
#> # A tibble: 154 × 18
#> ENV GEN REP PH EH EP EL ED CL CD CW KW NR
#> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 A1 H1 1 2.61 1.71 0.658 16.1 52.2 28.1 16.3 25.1 217. 15.6
#> 2 A1 H10 1 2.83 1.64 0.581 16.7 54.1 31.7 17.4 26.2 194. 15.6
#> 3 A1 H10 2 2.79 1.71 0.616 14.9 52.7 32.0 15.5 20.7 176. 17.6
#> 4 A1 H10 3 2.72 1.51 0.554 16.7 52.7 30.4 17.5 26.8 207. 16.8
#> 5 A1 H11 1 2.75 1.51 0.549 17.4 51.7 30.6 18.0 26.2 217. 16.8
#> 6 A1 H11 2 2.72 1.56 0.573 16.7 47.2 28.7 17.2 24.1 181. 13.6
#> 7 A1 H11 3 2.77 1.67 0.600 15.8 47.9 27.6 16.4 20.5 166. 15.2
#> 8 A1 H12 1 2.73 1.54 0.563 14.9 47.5 28.2 15.5 20.1 161. 14.8
#> 9 A1 H12 2 2.56 1.56 0.616 15.7 49.9 29.9 16.2 24.0 188. 17.2
#> 10 A1 H12 3 2.79 1.53 0.546 15.0 52.7 31.4 15.2 32.9 193. 20
#> # ℹ 144 more rows
#> # ℹ 5 more variables: NKR <dbl>, CDED <dbl>, PERK <dbl>, TKW <dbl>, NKE <dbl>
########### Concatenating columns ################
concatenate(data_ge, ENV, GEN, REP)
#> # A tibble: 420 × 6
#> ENV GEN REP GY HM new_var
#> <fct> <fct> <fct> <dbl> <dbl> <chr>
#> 1 E1 G1 1 2.17 44.9 E1_G1_1
#> 2 E1 G1 2 2.50 46.9 E1_G1_2
#> 3 E1 G1 3 2.43 47.8 E1_G1_3
#> 4 E1 G2 1 3.21 45.2 E1_G2_1
#> 5 E1 G2 2 2.93 45.3 E1_G2_2
#> 6 E1 G2 3 2.56 45.5 E1_G2_3
#> 7 E1 G3 1 2.77 46.7 E1_G3_1
#> 8 E1 G3 2 3.62 43.2 E1_G3_2
#> 9 E1 G3 3 2.28 47.8 E1_G3_3
#> 10 E1 G4 1 2.36 47.9 E1_G4_1
#> # ℹ 410 more rows
concatenate(data_ge, ENV, GEN, REP, drop = TRUE)
#> # A tibble: 420 × 1
#> new_var
#> <chr>
#> 1 E1_G1_1
#> 2 E1_G1_2
#> 3 E1_G1_3
#> 4 E1_G2_1
#> 5 E1_G2_2
#> 6 E1_G2_3
#> 7 E1_G3_1
#> 8 E1_G3_2
#> 9 E1_G3_3
#> 10 E1_G4_1
#> # ℹ 410 more rows
# Combine with add_cols() and replace_string()
data_ge2 %>%
add_cols(ENV_GEN = concatenate(., ENV, GEN, pull = TRUE),
.after = GEN) %>%
replace_string(ENV_GEN,
pattern = "H",
replacement = "HYB_")
#> # A tibble: 156 × 19
#> ENV GEN ENV_GEN REP PH EH EP EL ED CL CD CW
#> <fct> <fct> <chr> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 A1 H1 A1_HYB_1 1 2.61 1.71 0.658 16.1 52.2 28.1 16.3 25.1
#> 2 A1 H1 A1_HYB_1 2 2.87 1.76 0.628 14.2 50.3 27.6 14.5 21.4
#> 3 A1 H1 A1_HYB_1 3 2.68 1.58 0.591 16.0 50.7 28.4 16.4 24.0
#> 4 A1 H10 A1_HYB_10 1 2.83 1.64 0.581 16.7 54.1 31.7 17.4 26.2
#> 5 A1 H10 A1_HYB_10 2 2.79 1.71 0.616 14.9 52.7 32.0 15.5 20.7
#> 6 A1 H10 A1_HYB_10 3 2.72 1.51 0.554 16.7 52.7 30.4 17.5 26.8
#> 7 A1 H11 A1_HYB_11 1 2.75 1.51 0.549 17.4 51.7 30.6 18.0 26.2
#> 8 A1 H11 A1_HYB_11 2 2.72 1.56 0.573 16.7 47.2 28.7 17.2 24.1
#> 9 A1 H11 A1_HYB_11 3 2.77 1.67 0.600 15.8 47.9 27.6 16.4 20.5
#> 10 A1 H12 A1_HYB_12 1 2.73 1.54 0.563 14.9 47.5 28.2 15.5 20.1
#> # ℹ 146 more rows
#> # ℹ 7 more variables: KW <dbl>, NR <dbl>, NKR <dbl>, CDED <dbl>, PERK <dbl>,
#> # TKW <dbl>, NKE <dbl>
# Use prefixes and suffixes
concatenate(data_ge2, REP, prefix = "REP", new_var = REP)
#> # A tibble: 156 × 18
#> ENV GEN REP PH EH EP EL ED CL CD CW KW NR
#> <fct> <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 A1 H1 REP_1 2.61 1.71 0.658 16.1 52.2 28.1 16.3 25.1 217. 15.6
#> 2 A1 H1 REP_2 2.87 1.76 0.628 14.2 50.3 27.6 14.5 21.4 184. 16
#> 3 A1 H1 REP_3 2.68 1.58 0.591 16.0 50.7 28.4 16.4 24.0 208. 17.2
#> 4 A1 H10 REP_1 2.83 1.64 0.581 16.7 54.1 31.7 17.4 26.2 194. 15.6
#> 5 A1 H10 REP_2 2.79 1.71 0.616 14.9 52.7 32.0 15.5 20.7 176. 17.6
#> 6 A1 H10 REP_3 2.72 1.51 0.554 16.7 52.7 30.4 17.5 26.8 207. 16.8
#> 7 A1 H11 REP_1 2.75 1.51 0.549 17.4 51.7 30.6 18.0 26.2 217. 16.8
#> 8 A1 H11 REP_2 2.72 1.56 0.573 16.7 47.2 28.7 17.2 24.1 181. 13.6
#> 9 A1 H11 REP_3 2.77 1.67 0.600 15.8 47.9 27.6 16.4 20.5 166. 15.2
#> 10 A1 H12 REP_1 2.73 1.54 0.563 14.9 47.5 28.2 15.5 20.1 161. 14.8
#> # ℹ 146 more rows
#> # ℹ 5 more variables: NKR <dbl>, CDED <dbl>, PERK <dbl>, TKW <dbl>, NKE <dbl>
# Use prefixes and suffixes (the ear traits EH, EP, EL, and ED)
add_prefix(data_ge2, PH, EH, EP, EL, prefix = "EAR")
#> # A tibble: 156 × 18
#> ENV GEN REP EAR_PH EAR_EH EAR_EP EAR_EL ED CL CD CW KW
#> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 A1 H1 1 2.61 1.71 0.658 16.1 52.2 28.1 16.3 25.1 217.
#> 2 A1 H1 2 2.87 1.76 0.628 14.2 50.3 27.6 14.5 21.4 184.
#> 3 A1 H1 3 2.68 1.58 0.591 16.0 50.7 28.4 16.4 24.0 208.
#> 4 A1 H10 1 2.83 1.64 0.581 16.7 54.1 31.7 17.4 26.2 194.
#> 5 A1 H10 2 2.79 1.71 0.616 14.9 52.7 32.0 15.5 20.7 176.
#> 6 A1 H10 3 2.72 1.51 0.554 16.7 52.7 30.4 17.5 26.8 207.
#> 7 A1 H11 1 2.75 1.51 0.549 17.4 51.7 30.6 18.0 26.2 217.
#> 8 A1 H11 2 2.72 1.56 0.573 16.7 47.2 28.7 17.2 24.1 181.
#> 9 A1 H11 3 2.77 1.67 0.600 15.8 47.9 27.6 16.4 20.5 166.
#> 10 A1 H12 1 2.73 1.54 0.563 14.9 47.5 28.2 15.5 20.1 161.
#> # ℹ 146 more rows
#> # ℹ 6 more variables: NR <dbl>, NKR <dbl>, CDED <dbl>, PERK <dbl>, TKW <dbl>,
#> # NKE <dbl>
add_suffix(data_ge2, PH, EH, EP, EL, suffix = "EAR", sep = ".")
#> # A tibble: 156 × 18
#> ENV GEN REP PH.EAR EH.EAR EP.EAR EL.EAR ED CL CD CW KW
#> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 A1 H1 1 2.61 1.71 0.658 16.1 52.2 28.1 16.3 25.1 217.
#> 2 A1 H1 2 2.87 1.76 0.628 14.2 50.3 27.6 14.5 21.4 184.
#> 3 A1 H1 3 2.68 1.58 0.591 16.0 50.7 28.4 16.4 24.0 208.
#> 4 A1 H10 1 2.83 1.64 0.581 16.7 54.1 31.7 17.4 26.2 194.
#> 5 A1 H10 2 2.79 1.71 0.616 14.9 52.7 32.0 15.5 20.7 176.
#> 6 A1 H10 3 2.72 1.51 0.554 16.7 52.7 30.4 17.5 26.8 207.
#> 7 A1 H11 1 2.75 1.51 0.549 17.4 51.7 30.6 18.0 26.2 217.
#> 8 A1 H11 2 2.72 1.56 0.573 16.7 47.2 28.7 17.2 24.1 181.
#> 9 A1 H11 3 2.77 1.67 0.600 15.8 47.9 27.6 16.4 20.5 166.
#> 10 A1 H12 1 2.73 1.54 0.563 14.9 47.5 28.2 15.5 20.1 161.
#> # ℹ 146 more rows
#> # ℹ 6 more variables: NR <dbl>, NKR <dbl>, CDED <dbl>, PERK <dbl>, TKW <dbl>,
#> # NKE <dbl>
# Use prefixes and suffixes (colnames)
concatenate(data_ge2, REP, prefix = "REP", new_var = REP)
#> # A tibble: 156 × 18
#> ENV GEN REP PH EH EP EL ED CL CD CW KW NR
#> <fct> <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 A1 H1 REP_1 2.61 1.71 0.658 16.1 52.2 28.1 16.3 25.1 217. 15.6
#> 2 A1 H1 REP_2 2.87 1.76 0.628 14.2 50.3 27.6 14.5 21.4 184. 16
#> 3 A1 H1 REP_3 2.68 1.58 0.591 16.0 50.7 28.4 16.4 24.0 208. 17.2
#> 4 A1 H10 REP_1 2.83 1.64 0.581 16.7 54.1 31.7 17.4 26.2 194. 15.6
#> 5 A1 H10 REP_2 2.79 1.71 0.616 14.9 52.7 32.0 15.5 20.7 176. 17.6
#> 6 A1 H10 REP_3 2.72 1.51 0.554 16.7 52.7 30.4 17.5 26.8 207. 16.8
#> 7 A1 H11 REP_1 2.75 1.51 0.549 17.4 51.7 30.6 18.0 26.2 217. 16.8
#> 8 A1 H11 REP_2 2.72 1.56 0.573 16.7 47.2 28.7 17.2 24.1 181. 13.6
#> 9 A1 H11 REP_3 2.77 1.67 0.600 15.8 47.9 27.6 16.4 20.5 166. 15.2
#> 10 A1 H12 REP_1 2.73 1.54 0.563 14.9 47.5 28.2 15.5 20.1 161. 14.8
#> # ℹ 146 more rows
#> # ℹ 5 more variables: NKR <dbl>, CDED <dbl>, PERK <dbl>, TKW <dbl>, NKE <dbl>
########### formating column names ###############
# Creating data with messy column names
df <- head(data_ge, 3)
colnames(df) <- c("Env", "gen", "Rep", "GY", "hm")
df
#> # A tibble: 3 × 5
#> Env gen Rep GY hm
#> <fct> <fct> <fct> <dbl> <dbl>
#> 1 E1 G1 1 2.17 44.9
#> 2 E1 G1 2 2.50 46.9
#> 3 E1 G1 3 2.43 47.8
colnames_to_lower(df)
#> # A tibble: 3 × 5
#> env gen rep gy hm
#> <fct> <fct> <fct> <dbl> <dbl>
#> 1 E1 G1 1 2.17 44.9
#> 2 E1 G1 2 2.50 46.9
#> 3 E1 G1 3 2.43 47.8
colnames_to_upper(df)
#> # A tibble: 3 × 5
#> ENV GEN REP GY HM
#> <fct> <fct> <fct> <dbl> <dbl>
#> 1 E1 G1 1 2.17 44.9
#> 2 E1 G1 2 2.50 46.9
#> 3 E1 G1 3 2.43 47.8
colnames_to_title(df)
#> # A tibble: 3 × 5
#> Env Gen Rep Gy Hm
#> <fct> <fct> <fct> <dbl> <dbl>
#> 1 E1 G1 1 2.17 44.9
#> 2 E1 G1 2 2.50 46.9
#> 3 E1 G1 3 2.43 47.8
################### Adding rows ##################
data_ge %>%
add_rows(GY = 10.3,
HM = 100.11,
.after = 1)
#> # A tibble: 421 × 5
#> ENV GEN REP GY HM
#> <fct> <fct> <fct> <dbl> <dbl>
#> 1 E1 G1 1 2.17 44.9
#> 2 NA NA NA 10.3 100.
#> 3 E1 G1 2 2.50 46.9
#> 4 E1 G1 3 2.43 47.8
#> 5 E1 G2 1 3.21 45.2
#> 6 E1 G2 2 2.93 45.3
#> 7 E1 G2 3 2.56 45.5
#> 8 E1 G3 1 2.77 46.7
#> 9 E1 G3 2 3.62 43.2
#> 10 E1 G3 3 2.28 47.8
#> # ℹ 411 more rows
########## checking if a column exists ###########
column_exists(data_g, "GEN")
#> [1] TRUE
####### get the levels, level combinations and size of levels ########
get_levels(data_g, GEN)
#> [1] "H1" "H10" "H11" "H12" "H13" "H2" "H3" "H4" "H5" "H6" "H7" "H8"
#> [13] "H9"
get_levels_comb(data_ge, ENV, GEN)
#> # A tibble: 140 × 2
#> ENV GEN
#> <fct> <fct>
#> 1 E1 G1
#> 2 E10 G1
#> 3 E11 G1
#> 4 E12 G1
#> 5 E13 G1
#> 6 E14 G1
#> 7 E2 G1
#> 8 E3 G1
#> 9 E4 G1
#> 10 E5 G1
#> # ℹ 130 more rows
get_level_size(data_g, GEN)
#> # A tibble: 13 × 17
#> GEN REP PH EH EP EL ED CL CD CW KW NR NKR
#> <fct> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
#> 1 H1 3 3 3 3 3 3 3 3 3 3 3 3
#> 2 H10 3 3 3 3 3 3 3 3 3 3 3 3
#> 3 H11 3 3 3 3 3 3 3 3 3 3 3 3
#> 4 H12 3 3 3 3 3 3 3 3 3 3 3 3
#> 5 H13 3 3 3 3 3 3 3 3 3 3 3 3
#> 6 H2 3 3 3 3 3 3 3 3 3 3 3 3
#> 7 H3 3 3 3 3 3 3 3 3 3 3 3 3
#> 8 H4 3 3 3 3 3 3 3 3 3 3 3 3
#> 9 H5 3 3 3 3 3 3 3 3 3 3 3 3
#> 10 H6 3 3 3 3 3 3 3 3 3 3 3 3
#> 11 H7 3 3 3 3 3 3 3 3 3 3 3 3
#> 12 H8 3 3 3 3 3 3 3 3 3 3 3 3
#> 13 H9 3 3 3 3 3 3 3 3 3 3 3 3
#> # ℹ 4 more variables: CDED <int>, PERK <int>, TKW <int>, NKE <int>
############## all possible pairs ################
all_pairs(data_g, GEN)
#> V1 V2
#> 1 H1 H10
#> 2 H1 H11
#> 3 H1 H12
#> 4 H1 H13
#> 5 H1 H2
#> 6 H1 H3
#> 7 H1 H4
#> 8 H1 H5
#> 9 H1 H6
#> 10 H1 H7
#> 11 H1 H8
#> 12 H1 H9
#> 13 H10 H11
#> 14 H10 H12
#> 15 H10 H13
#> 16 H10 H2
#> 17 H10 H3
#> 18 H10 H4
#> 19 H10 H5
#> 20 H10 H6
#> 21 H10 H7
#> 22 H10 H8
#> 23 H10 H9
#> 24 H11 H12
#> 25 H11 H13
#> 26 H11 H2
#> 27 H11 H3
#> 28 H11 H4
#> 29 H11 H5
#> 30 H11 H6
#> 31 H11 H7
#> 32 H11 H8
#> 33 H11 H9
#> 34 H12 H13
#> 35 H12 H2
#> 36 H12 H3
#> 37 H12 H4
#> 38 H12 H5
#> 39 H12 H6
#> 40 H12 H7
#> 41 H12 H8
#> 42 H12 H9
#> 43 H13 H2
#> 44 H13 H3
#> 45 H13 H4
#> 46 H13 H5
#> 47 H13 H6
#> 48 H13 H7
#> 49 H13 H8
#> 50 H13 H9
#> 51 H2 H3
#> 52 H2 H4
#> 53 H2 H5
#> 54 H2 H6
#> 55 H2 H7
#> 56 H2 H8
#> 57 H2 H9
#> 58 H3 H4
#> 59 H3 H5
#> 60 H3 H6
#> 61 H3 H7
#> 62 H3 H8
#> 63 H3 H9
#> 64 H4 H5
#> 65 H4 H6
#> 66 H4 H7
#> 67 H4 H8
#> 68 H4 H9
#> 69 H5 H6
#> 70 H5 H7
#> 71 H5 H8
#> 72 H5 H9
#> 73 H6 H7
#> 74 H6 H8
#> 75 H6 H9
#> 76 H7 H8
#> 77 H7 H9
#> 78 H8 H9
########## select numeric variables only #########
select_numeric_cols(data_g)
#> # A tibble: 39 × 15
#> PH EH EP EL ED CL CD CW KW NR NKR CDED PERK
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 2.11 1.05 0.497 15.7 49.9 30.5 16.6 28.6 164. 15.6 31.2 0.612 85.1
#> 2 2.20 1.09 0.492 13.7 49.2 30.5 14.7 22.3 130. 16.4 24.8 0.619 85.2
#> 3 2.29 1.15 0.502 15.1 52.6 31.7 16.2 29.6 176. 15.6 29.2 0.603 85.9
#> 4 1.79 0.888 0.514 13.9 44.1 26.2 15.0 12.9 116. 14.8 33 0.596 89.8
#> 5 2.05 1.03 0.504 13.6 43.9 23.5 14.4 11.5 118. 16 32.4 0.535 91.1
#> 6 2.27 1.11 0.491 14.5 43.7 24.6 16.1 12.5 128. 15.2 34.6 0.566 90.7
#> 7 1.71 0.808 0.489 15.5 45.2 25.0 16.7 15.2 140. 15.6 36 0.552 90.3
#> 8 2.09 1.06 0.509 12.2 46.9 26.5 14.3 13.5 114. 16.8 26.2 0.566 89.3
#> 9 2.5 1.44 0.577 15.0 49.0 27.5 15.2 19.4 168. 16.4 35 0.562 89.6
#> 10 2.52 1.52 0.601 14.4 49.2 28.4 15 18.2 153. 16.4 32 0.577 89.2
#> # ℹ 29 more rows
#> # ℹ 2 more variables: TKW <dbl>, NKE <dbl>
select_non_numeric_cols(data_g)
#> # A tibble: 39 × 2
#> GEN REP
#> <fct> <fct>
#> 1 H1 1
#> 2 H1 2
#> 3 H1 3
#> 4 H10 1
#> 5 H10 2
#> 6 H10 3
#> 7 H11 1
#> 8 H11 2
#> 9 H11 3
#> 10 H12 1
#> # ℹ 29 more rows
# }