Skip to contents
library(STAVE)

Example workflow

Input data take the form of three tables. We will use the pre-loaded example data here, if you are using your own data then you need to match this input format:

library(STAVE)

data("example_input")

Create a STAVE object and append the data:

# create new object
s <- STAVE_object$new()

# append data using a member function
s$append_data(studies_dataframe = example_input$studies,
              surveys_dataframe = example_input$surveys,
              counts_dataframe = example_input$counts)
#> data correctly appended

# check how many studies are now loaded
s
#> Studies: 7
#> Surveys: 24

Once data are loaded, we can always view the different tables using get functions. However, we cannot alter the values directly.

s$get_studies() |> head()
#> # A tibble: 6 × 6
#>   study_id            study_name       study_type authors publication_year url  
#>   <chr>               <chr>            <chr>      <chr>              <dbl> <chr>
#> 1 wwarn_10297_Nelson  wwarn_10297_Nel… peer_revi… Nelson              1000 http…
#> 2 wwarn_10814_Dama    wwarn_10814_Dama peer_revi… Dama                1000 http…
#> 3 wwarn_10992_Mallick wwarn_10992_Mal… peer_revi… Mallick             1000 http…
#> 4 wwarn_11208_Kunasol wwarn_11208_Kun… peer_revi… Kunasol             1000 http…
#> 5 wwarn_11435_Henry   wwarn_11435_Hen… peer_revi… Henry               1000 http…
#> 6 wwarn_11720_Ould    wwarn_11720_Ould peer_revi… Ould                1000 http…
s$get_surveys() |> head()
#> # A tibble: 6 × 11
#>   study_key    survey_id country_name site_name latitude longitude spatial_notes
#>   <chr>        <chr>     <chr>        <chr>        <dbl>     <dbl> <chr>        
#> 1 wwarn_10297… wwarn_10… Thailand     Kanchana…     15.3     98.5  wwarn lat an…
#> 2 wwarn_10814… wwarn_10… Mali         Koulikoro     12.6     -8.14 wwarn lat an…
#> 3 wwarn_10992… wwarn_10… India        Chhattis…     20.1     80.8  wwarn lat an…
#> 4 wwarn_10992… wwarn_10… India        Goa           15.3     74.1  wwarn lat an…
#> 5 wwarn_10992… wwarn_10… India        Gujarat       23.0     73.6  wwarn lat an…
#> 6 wwarn_10992… wwarn_10… India        Heilongj…     88.3     27.2  wwarn lat an…
#> # ℹ 4 more variables: collection_start <chr>, collection_end <chr>,
#> #   collection_day <chr>, time_notes <chr>
s$get_counts() |> head()
#> # A tibble: 6 × 4
#>   survey_key                           variant_string variant_num total_num
#>   <chr>                                <chr>                <dbl>     <dbl>
#> 1 wwarn_10297_Nelson_Sangkhlaburi_2002 mdr1:184:F              27        49
#> 2 wwarn_10297_Nelson_Sangkhlaburi_2002 mdr1:86:Y                4        49
#> 3 wwarn_10814_Dama_Bamako_2014         crt:76:T               130       170
#> 4 wwarn_10814_Dama_Bamako_2014         mdr1:86:Y               46       158
#> 5 wwarn_10992_Mallick_Assam_2002       crt:76:T                26        26
#> 6 wwarn_10992_Mallick_Assam_2002       mdr1:86:Y               19        25

We can calculate the prevalence of any variant using get_prevalence():

p <- s$get_prevalence("mdr1:184:F")
p
#> # A tibble: 24 × 21
#>    study_id       study_name study_type authors publication_year url   survey_id
#>    <chr>          <chr>      <chr>      <chr>              <dbl> <chr> <chr>    
#>  1 wwarn_10297_N… wwarn_102… peer_revi… Nelson              1000 http… wwarn_10…
#>  2 wwarn_10814_D… wwarn_108… peer_revi… Dama                1000 http… wwarn_10…
#>  3 wwarn_10992_M… wwarn_109… peer_revi… Mallick             1000 http… wwarn_10…
#>  4 wwarn_10992_M… wwarn_109… peer_revi… Mallick             1000 http… wwarn_10…
#>  5 wwarn_10992_M… wwarn_109… peer_revi… Mallick             1000 http… wwarn_10…
#>  6 wwarn_10992_M… wwarn_109… peer_revi… Mallick             1000 http… wwarn_10…
#>  7 wwarn_10992_M… wwarn_109… peer_revi… Mallick             1000 http… wwarn_10…
#>  8 wwarn_10992_M… wwarn_109… peer_revi… Mallick             1000 http… wwarn_10…
#>  9 wwarn_10992_M… wwarn_109… peer_revi… Mallick             1000 http… wwarn_10…
#> 10 wwarn_10992_M… wwarn_109… peer_revi… Mallick             1000 http… wwarn_10…
#> # ℹ 14 more rows
#> # ℹ 14 more variables: country_name <chr>, site_name <chr>, latitude <dbl>,
#> #   longitude <dbl>, spatial_notes <chr>, collection_start <chr>,
#> #   collection_end <chr>, collection_day <chr>, time_notes <chr>,
#> #   numerator <dbl>, denominator <dbl>, prevalence <dbl>,
#> #   prevalence_lower <dbl>, prevalence_upper <dbl>

We can also return a list of all variants in the data:

s$get_variants()
#> [1] "crt:76:T"   "k13:469:F"  "k13:469:Y"  "k13:539:T"  "k13:580:Y" 
#> [6] "k13:675:V"  "mdr1:184:F" "mdr1:86:Y"

Finally, we can selectively drop studies from the data using their study ID:

s$drop_study(drop_study_id = "wwarn_10297_Nelson")
s
#> Studies: 6
#> Surveys: 23