From adbbb143dca82cb041e9daec2a2008279153cf3f Mon Sep 17 00:00:00 2001
From: Silas Kieser <SilasK@users.noreply.github.com>
Date: Wed, 4 Oct 2023 12:36:24 +0200
Subject: [PATCH] add explenation for get_example_data

---
 README.md | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 62 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 0594bbb..c65d772 100644
--- a/README.md
+++ b/README.md
@@ -66,10 +66,70 @@ See also the [get started](https://metagenome-atlas.readthedocs.io/en/latest/usa
 
 <!--
 [This cool report](http://htmlpreview.github.io/?https://github.com/metagenome-atlas/Tutorial/blob/master/Example/Results/Summary.html):sparkles: shows the most interesting output of Atlas.
-
-
 Metagenome-Atlas produces a lot of other outputs from the QC and assembly steps. They are  summarized reports such as these ones:
 - [QC_report](https://metagenome-atlas.readthedocs.io/en/latest/_static/QC_report.html)
 - [assembly report](https://metagenome-atlas.readthedocs.io/en/latest/_static/assembly_report.html).
 -->
 
+
+
+## Use this code for your project
+
+First, clone this git repository.
+### Copy atlas files to your local machine.
+I made some handy scripts to copy the most important atlas output files from a server to your local machine.
+As the output files might change between different versions of atlas I use the file [`atlas_output_files.yaml`](atlas_output_files.yaml)
+to specify them. Check with atlas version is the closest to the atlas version you used.
+
+You can run `get_atlas_files.py` or `get_atlas_files.R` to do this. 
+
+The Python script asks for the following information and stores them in `.connection_details.yaml`.
+```
+    "output_dir": 'atlas_data',
+    "atlas_version": "v2.17",
+    "username": "me",
+    "server": "myserver.server.com",
+    "base_path_server": '/home/user/my_atlas_run',
+    "private_key_path": None # "C:/Users/User/.ssh/id_rsa"
+```
+
+For the R script you need to hard code them into the script.
+
+:warning: Some output atlas files might be very large, e.g. the gene catalog.
+
+
+### Use files specified in the `atlas_output_files.yaml`
+
+This might be a complicated but generic way to access the atlas files. 
+You can also simply copy the path specified in the `atlas_output_files.yaml` 
+
+#### In R you can use
+``` R
+
+data_dir <- "atlas_data" # path specified as output_dir in the get_atlas_files script
+atlas_version <- "v2.17"
+file_config_files <- "../atlas_output_files.yaml"
+
+files <- yaml::yaml.load_file(file_config_files)[[atlas_version]]
+
+for (key1 in names(files)) {
+  value1 <- files[[key1]]
+  if (is.character(value1)) {
+    # It's a direct path
+    files[[key1]] <- file.path(data_dir, value1)
+  } else if (is.list(value1)) {
+    # It's a nested list, go deeper
+    for (key2 in names(value1)) {
+      value2 <- value1[[key2]]
+      files[[key1]][[key2]] <- file.path(data_dir, value2)
+    }
+  }
+}
+
+
+taxonomy_file <- files[["genomes"]][["taxonomy"]]
+tree_file <- files[["genomes"]][["tree_bacteria"]]
+
+```
+
+