Skip to content

Commit

Permalink
fix workflow according to new ingest functions
Browse files Browse the repository at this point in the history
  • Loading branch information
langbart committed Oct 13, 2024
1 parent 7d3c21b commit d8cd0ea
Show file tree
Hide file tree
Showing 15 changed files with 135 additions and 59 deletions.
20 changes: 9 additions & 11 deletions .github/workflows/data-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ jobs:
continue-on-error: true
run: Rscript -e 'peskas.timor.data.pipeline::ingest_rfish_table()'

ingest-preprocess-v1-v3-landings:
name: Ingest legacy and updated landings
ingest-preprocess-landings-v1-v3:
name: Ingest and preprocess legacy and ongoing landings
needs: build-container
runs-on: ubuntu-20.04
container:
Expand All @@ -69,16 +69,14 @@ jobs:
- name: Set env to production
if: endsWith(github.ref, '/main')
run: echo "R_CONFIG_ACTIVE=production" >> $GITHUB_ENV
- name: Call ingest_legacy_landings()
run: Rscript -e 'peskas.timor.data.pipeline::ingest_legacy_landings()'
- name: Call ingest_landings()
run: Rscript -e 'peskas.timor.data.pipeline::ingest_landings_v1v3()'
- name: Call preprocess_legacy_landings()
run: Rscript -e 'peskas.timor.data.pipeline::preprocess_legacy_landings()'
- name: Call ingest_updated_landings()
run: Rscript -e 'peskas.timor.data.pipeline::ingest_updated_landings()'
- name: Call preprocess_updated_landings()
run: Rscript -e 'peskas.timor.data.pipeline::preprocess_updated_landings()'

ingest-preprocess-v1-landings-step1:
ingest-preprocess-v2-landings-step1:
name: "Ingest-process landings 1/2"
needs: build-container
runs-on: ubuntu-20.04
Expand All @@ -91,14 +89,14 @@ jobs:
- name: Set env to production
if: endsWith(github.ref, '/main')
run: echo "R_CONFIG_ACTIVE=production" >> $GITHUB_ENV
- name: Call ingest_landings()
run: Rscript -e 'peskas.timor.data.pipeline::ingest_landings()'
- name: Call ingest_landings_v2()
run: Rscript -e 'peskas.timor.data.pipeline::ingest_landings_v2()'
- name: Call preprocess_landings_step_1()
run: Rscript -e 'peskas.timor.data.pipeline::preprocess_landings_step_1()'

ingest-preprocess-v1-landings-step2:
ingest-preprocess-v2-landings-step2:
name: "Ingest-process landings 2/2"
needs: "ingest-preprocess-v1-landings-step1"
needs: ingest-preprocess-v1-landings-step1
runs-on: ubuntu-20.04
container:
image: docker.pkg.github.com/worldfishcenter/peskas.timor.data.pipeline/r-runner-peskas-timor
Expand Down
3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ export(get_validated_landings)
export(get_validation_sheet)
export(ingest_complete_tracks)
export(ingest_kepler_tracks)
export(ingest_landings)
export(ingest_landings_v1v3)
export(ingest_landings_v2)
export(ingest_metadata_tables)
export(ingest_pds_map)
export(ingest_pds_matched_trips)
Expand Down
76 changes: 70 additions & 6 deletions R/ingest-landings.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#' uploads it to cloud storage services.
#'
#' This function downloads the survey metadata and responses for multiple landing surveys
#' (landings_1, landings_2, landings_3). It then uploads this information to cloud
#' (landings_1, landings_3). It then uploads this information to cloud
#' services. File names contain a versioning string that includes the date-time and,
#' if available, the first 7 digits of the git commit sha, accomplished using [add_version()].
#'
Expand All @@ -17,9 +17,6 @@
#' landings_1:
#' file_prefix:
#' asset_id:
#' landings_2:
#' file_prefix:
#' asset_id:
#' landings_3:
#' file_prefix:
#' asset_id:
Expand All @@ -42,13 +39,80 @@
#' @return No output. This function is used for its side effects
#' @export
#'
ingest_landings <- function(log_threshold = logger::DEBUG) {
ingest_landings_v1v3 <- function(log_threshold = logger::DEBUG) {
logger::log_threshold(log_threshold)

pars <- read_config()

# Create a vector of landing numbers
landing_numbers <- 1:3
landing_numbers <- c(1, 3)

# Use purrr::map to apply get_kobo_data() to each landing number
file_lists <- purrr::map(landing_numbers, function(i) {
survey_name <- paste0("landings_", i)
logger::log_info("Retrieving data for {survey_name}...")

get_kobo_data(
prefix = pars$surveys[[survey_name]]$file_prefix,
uname = pars$surveys$kobo_username,
pwd = pars$surveys$kobo_password,
assetid = pars$surveys[[survey_name]]$asset_id
)
})

# Combine all file lists
all_files <- unlist(file_lists, recursive = FALSE)

logger::log_info("Uploading files to cloud...")
# Iterate over multiple storage providers if there are more than one
purrr::walk(pars$storage, ~ upload_cloud_file(all_files, .$key, .$options))
logger::log_success("File upload succeeded")
}

#' Ingest V2 Landings Survey data
#'
#' Downloads landings information for multiple surveys collected using Kobo Toolbox and
#' uploads it to cloud storage services.
#'
#' This function downloads the survey metadata and responses for landing surveys v2.
#' It then uploads this information to cloud services. File names contain a
#' versioning string that includes the date-time and,
#' if available, the first 7 digits of the git commit sha, accomplished using [add_version()].
#'
#' The parameters needed in `conf.yml` are:
#'
#' ```
#' surveys:
#' kobo_username:
#' kobo_password:
#' landings_2:
#' file_prefix:
#' asset_id:
#' storage:
#' storage_name:
#' key:
#' options:
#' project:
#' bucket:
#' service_account_key:
#' ```
#'
#' Progress through the function is tracked using the package *logger*.
#'
#' @param log_threshold The (standard Apache logj4) log level used as a
#' threshold for the logging infrastructure. See [logger::log_levels] for more details
#'
#' @keywords workflow
#'
#' @return No output. This function is used for its side effects
#' @export
#'
ingest_landings_v2 <- function(log_threshold = logger::DEBUG) {
logger::log_threshold(log_threshold)

pars <- read_config()

landing_numbers <- 2

# Use purrr::map to apply get_kobo_data() to each landing number
file_lists <- purrr::map(landing_numbers, function(i) {
Expand Down
8 changes: 4 additions & 4 deletions R/preprocess-landings.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
#' Progress through the function is tracked using the package *logger*.
#'
#' @param log_threshold
#' @inheritParams ingest_landings
#' @inheritParams ingest_landings_v1v3
#' @keywords workflow
#' @return no outputs. This function is used for it's side effects
#' @export
Expand Down Expand Up @@ -133,7 +133,7 @@ preprocess_updated_landings <- function(log_threshold = logger::DEBUG) {
#' Progress through the function is tracked using the package *logger*.
#'
#' @param log_threshold
#' @inheritParams ingest_landings
#' @inheritParams ingest_landings_v1v3
#' @keywords workflow
#' @return no outputs. This function is used for it's side effects
#' @export
Expand Down Expand Up @@ -225,7 +225,7 @@ preprocess_landings_step_1 <- function(log_threshold = logger::DEBUG) {
#' Progress through the function is tracked using the package *logger*.
#'
#' @param log_threshold
#' @inheritParams ingest_landings
#' @inheritParams ingest_landings_v1v3
#' @keywords workflow
#' @return no outputs. This function is used for it's side effects
#' @export
Expand Down Expand Up @@ -333,7 +333,7 @@ preprocess_landings_step_2 <- function(log_threshold = logger::DEBUG) {
#' Progress through the function is tracked using the package *logger*.
#'
#' @param log_threshold
#' @inheritParams ingest_landings
#' @inheritParams ingest_landings_v1v3
#' @keywords workflow
#' @return no outputs. This funcrion is used for it's side effects
#' @export
Expand Down
2 changes: 1 addition & 1 deletion R/validate-landings.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#' that are `hrs`, `method` and `k`.
#'
#' @param log_threshold
#' @inheritParams ingest_landings
#' @inheritParams ingest_landings_v1v3
#' @inheritParams validate_surveys_time
#' @inheritParams validate_catch_price
#' @keywords workflow
Expand Down
1 change: 0 additions & 1 deletion _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ reference:
desc: Functions that help retrieving survey data
- contents:
- matches("survey")
- get_host_url
- subtitle: PDS
desc: Functions that help retrieving Pelagic Data System data
- contents:
Expand Down
4 changes: 0 additions & 4 deletions man/estimate_fishery_indicators.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 0 additions & 4 deletions man/format_public_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 0 additions & 4 deletions man/get_validated_landings.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 4 additions & 7 deletions man/ingest_landings.Rd → man/ingest_landings_v1v3.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

45 changes: 45 additions & 0 deletions man/ingest_landings_v2.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 0 additions & 4 deletions man/ingest_metadata_tables.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 0 additions & 4 deletions man/ingest_validation_tables.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 0 additions & 4 deletions man/model_indicators.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 0 additions & 4 deletions man/preprocess_metadata_tables.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit d8cd0ea

Please sign in to comment.