Subsets either or both the 'Variables' and 'Categories' elements of a data
dictionary. Rows are conserved if their values satisfy the condition.
This is a wrapper function analogous to dplyr::filter()
.
data_dict_filter(
data_dict,
filter_var = NULL,
filter_cat = NULL,
filter_all = NULL
)
A list of data frame(s) representing metadata to be filtered.
Expressions that are defined in the element 'Variables' in the data dictionary.
Expressions that are defined in the element 'Categories' in the data dictionary.
Expressions that are defined both in the 'Categories' and 'Variables' in the data dictionary.
A list of data frame(s) identifying a workable data dictionary structure.
A data dictionary contains the list of variables in a dataset and metadata
about the variables and can be associated with a dataset. A data dictionary
object is a list of data frame(s) named 'Variables' (required) and
'Categories' (if any). To be usable in any function, the data frame
'Variables' must contain at least the name
column, with all unique and
non-missing entries, and the data frame 'Categories' must contain at least
the variable
and name
columns, with unique combination of
variable
and name
.
{
library(dplyr)
# use madshapR_examples provided by the package
# Data dictionary where the column 'table' is added to
# refer to the associated dataset.
data_dict <-
madshapR_examples$`data_dictionary_example` %>%
lapply(function(x) mutate(x,table = "dataset"))
###### Example 1 search and filter through a column in 'Variables' element
data_dict_f1 <- data_dict_filter(data_dict,filter_var = "name == 'gndr'")
glimpse(data_dict_f1)
###### Example 2 search and filter through a column in 'Categories' element
data_dict_f2 <- data_dict_filter(data_dict,filter_cat = "missing == TRUE")
glimpse(data_dict_f2)
###### Example 3 search and filter through a column across all elements.
# The column must exist in both 'Variables' and 'Categories' and have the
# same meaning
data_dict_f3 <- data_dict_filter(data_dict,filter_all = "table == 'dataset'")
glimpse(data_dict_f3)
}
#> List of 2
#> $ Variables : tibble [1 × 9] (S3: tbl_df/tbl/data.frame)
#> ..$ index : num 2
#> ..$ name : chr "gndr"
#> ..$ label:en : chr "gndr"
#> ..$ description:en : chr "gender of the participant"
#> ..$ valueType : chr "integer"
#> ..$ unit : chr NA
#> ..$ datacollection::type : chr "declared"
#> ..$ datacollection::level: chr "high"
#> ..$ table : chr "dataset"
#> $ Categories: tibble [3 × 5] (S3: tbl_df/tbl/data.frame)
#> ..$ variable: chr [1:3] "gndr" "gndr" "gndr"
#> ..$ name : chr [1:3] "1" "2" "-77"
#> ..$ label:en: chr [1:3] "Male" "Female" "Don’t want to answer"
#> ..$ missing : logi [1:3] FALSE FALSE TRUE
#> ..$ table : chr [1:3] "dataset" "dataset" "dataset"
#> List of 2
#> $ Variables : tibble [9 × 9] (S3: tbl_df/tbl/data.frame)
#> ..$ index : num [1:9] 1 2 3 4 5 6 7 8 9
#> ..$ name : chr [1:9] "part_id" "gndr" "height" "weight_ms" ...
#> ..$ label:en : chr [1:9] "id of the participant" "gndr" "height" "weight_ms" ...
#> ..$ description:en : chr [1:9] "id of the participant" "gender of the participant" "height of the participant" "weight of the participant - measured" ...
#> ..$ valueType : chr [1:9] "text" "integer" "integer" "integer" ...
#> ..$ unit : chr [1:9] NA NA "cm" "kg" ...
#> ..$ datacollection::type : chr [1:9] "automatic" "declared" "declared" "measured" ...
#> ..$ datacollection::level: chr [1:9] "high" "high" "moderate" "moderate" ...
#> ..$ table : chr [1:9] "dataset" "dataset" "dataset" "dataset" ...
#> $ Categories: tibble [6 × 5] (S3: tbl_df/tbl/data.frame)
#> ..$ variable: chr [1:6] "gndr" "weight_ms" "weight_ms" "prg_ever" ...
#> ..$ name : chr [1:6] "-77" "-88" "-99" "8" ...
#> ..$ label:en: chr [1:6] "Don’t want to answer" "Don’t want to answer" "Don’t know" "Don’t want to answer" ...
#> ..$ missing : logi [1:6] TRUE TRUE TRUE TRUE TRUE TRUE
#> ..$ table : chr [1:6] "dataset" "dataset" "dataset" "dataset" ...
#> List of 2
#> $ Variables : tibble [9 × 9] (S3: tbl_df/tbl/data.frame)
#> ..$ index : num [1:9] 1 2 3 4 5 6 7 8 9
#> ..$ name : chr [1:9] "part_id" "gndr" "height" "weight_ms" ...
#> ..$ label:en : chr [1:9] "id of the participant" "gndr" "height" "weight_ms" ...
#> ..$ description:en : chr [1:9] "id of the participant" "gender of the participant" "height of the participant" "weight of the participant - measured" ...
#> ..$ valueType : chr [1:9] "text" "integer" "integer" "integer" ...
#> ..$ unit : chr [1:9] NA NA "cm" "kg" ...
#> ..$ datacollection::type : chr [1:9] "automatic" "declared" "declared" "measured" ...
#> ..$ datacollection::level: chr [1:9] "high" "high" "moderate" "moderate" ...
#> ..$ table : chr [1:9] "dataset" "dataset" "dataset" "dataset" ...
#> $ Categories: tibble [11 × 5] (S3: tbl_df/tbl/data.frame)
#> ..$ variable: chr [1:11] "gndr" "gndr" "gndr" "weight_ms" ...
#> ..$ name : chr [1:11] "1" "2" "-77" "-88" ...
#> ..$ label:en: chr [1:11] "Male" "Female" "Don’t want to answer" "Don’t want to answer" ...
#> ..$ missing : logi [1:11] FALSE FALSE TRUE TRUE TRUE FALSE ...
#> ..$ table : chr [1:11] "dataset" "dataset" "dataset" "dataset" ...