From adbbb143dca82cb041e9daec2a2008279153cf3f Mon Sep 17 00:00:00 2001 From: Silas Kieser Date: Wed, 4 Oct 2023 12:36:24 +0200 Subject: [PATCH] add explenation for get_example_data --- README.md | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0594bbb..c65d772 100644 --- a/README.md +++ b/README.md @@ -66,10 +66,70 @@ See also the [get started](https://metagenome-atlas.readthedocs.io/en/latest/usa + + +## Use this code for your project + +First, clone this git repository. +### Copy atlas files to your local machine. +I made some handy scripts to copy the most important atlas output files from a server to your local machine. +As the output files might change between different versions of atlas I use the file [`atlas_output_files.yaml`](atlas_output_files.yaml) +to specify them. Check with atlas version is the closest to the atlas version you used. + +You can run `get_atlas_files.py` or `get_atlas_files.R` to do this. + +The Python script asks for the following information and stores them in `.connection_details.yaml`. +``` + "output_dir": 'atlas_data', + "atlas_version": "v2.17", + "username": "me", + "server": "myserver.server.com", + "base_path_server": '/home/user/my_atlas_run', + "private_key_path": None # "C:/Users/User/.ssh/id_rsa" +``` + +For the R script you need to hard code them into the script. + +:warning: Some output atlas files might be very large, e.g. the gene catalog. + + +### Use files specified in the `atlas_output_files.yaml` + +This might be a complicated but generic way to access the atlas files. +You can also simply copy the path specified in the `atlas_output_files.yaml` + +#### In R you can use +``` R + +data_dir <- "atlas_data" # path specified as output_dir in the get_atlas_files script +atlas_version <- "v2.17" +file_config_files <- "../atlas_output_files.yaml" + +files <- yaml::yaml.load_file(file_config_files)[[atlas_version]] + +for (key1 in names(files)) { + value1 <- files[[key1]] + if (is.character(value1)) { + # It's a direct path + files[[key1]] <- file.path(data_dir, value1) + } else if (is.list(value1)) { + # It's a nested list, go deeper + for (key2 in names(value1)) { + value2 <- value1[[key2]] + files[[key1]][[key2]] <- file.path(data_dir, value2) + } + } +} + + +taxonomy_file <- files[["genomes"]][["taxonomy"]] +tree_file <- files[["genomes"]][["tree_bacteria"]] + +``` + +