From 5ac7d0a4701d8d983b574b2000805bcc72e8de1d Mon Sep 17 00:00:00 2001 From: Andy Teucher Date: Mon, 17 Jun 2024 11:03:21 -0700 Subject: [PATCH 01/13] Create policies and admin section --- _quarto.yml | 4 ++-- .../images/budget-threshold-alerts.png | Bin .../images/choose-budget-type.png | Bin .../images/cost-explorer-usage.png | Bin {policies-usage => policies-admin}/index.md | 2 +- 5 files changed, 3 insertions(+), 3 deletions(-) rename {policies-usage => policies-admin}/images/budget-threshold-alerts.png (100%) rename {policies-usage => policies-admin}/images/choose-budget-type.png (100%) rename {policies-usage => policies-admin}/images/cost-explorer-usage.png (100%) rename {policies-usage => policies-admin}/index.md (98%) diff --git a/_quarto.yml b/_quarto.yml index 6ed8429f..d4ee50e8 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -122,8 +122,8 @@ website: contents: - text: "Workshop Setup" href: workshops/setup.md - - text: "Policies & Usage Costs" - href: policies-usage/index.md + - text: "Policies & Administration" + href: policies-admin/index.md - section: "In Development" href: in-development/index.qmd contents: diff --git a/policies-usage/images/budget-threshold-alerts.png b/policies-admin/images/budget-threshold-alerts.png similarity index 100% rename from policies-usage/images/budget-threshold-alerts.png rename to policies-admin/images/budget-threshold-alerts.png diff --git a/policies-usage/images/choose-budget-type.png b/policies-admin/images/choose-budget-type.png similarity index 100% rename from policies-usage/images/choose-budget-type.png rename to policies-admin/images/choose-budget-type.png diff --git a/policies-usage/images/cost-explorer-usage.png b/policies-admin/images/cost-explorer-usage.png similarity index 100% rename from policies-usage/images/cost-explorer-usage.png rename to policies-admin/images/cost-explorer-usage.png diff --git a/policies-usage/index.md b/policies-admin/index.md similarity index 98% rename from policies-usage/index.md rename to policies-admin/index.md index 4651483b..7f153d01 100644 --- a/policies-usage/index.md +++ b/policies-admin/index.md @@ -1,5 +1,5 @@ --- -title: "Policies & Usage Costs" +title: "Policies & Administration" --- ## NASA Openscapes Policies From 17b23eacf810aa1f0324c1e7688620de11eb8924 Mon Sep 17 00:00:00 2001 From: Andy Teucher Date: Mon, 17 Jun 2024 11:06:42 -0700 Subject: [PATCH 02/13] Move 'adding-people' to policies-admin section --- .../add-folks-to-2i2c-github-teams.qmd | 0 .../images/github-add-to-team.png | Bin .../images/github-notification-banner-part2.png | Bin .../images/github-notification-banner.png | Bin .../images/github-notification-email.png | Bin .../images/github-pending-member.png | Bin .../images/google-form-gh-username.png | Bin .../images/google-form-notifications.png | Bin .../images/google-form-view-response.png | Bin .../images/google-form.png | Bin 10 files changed, 0 insertions(+), 0 deletions(-) rename {leading-workshops => policies-admin}/add-folks-to-2i2c-github-teams.qmd (100%) rename {leading-workshops => policies-admin}/images/github-add-to-team.png (100%) rename {leading-workshops => policies-admin}/images/github-notification-banner-part2.png (100%) rename {leading-workshops => policies-admin}/images/github-notification-banner.png (100%) rename {leading-workshops => policies-admin}/images/github-notification-email.png (100%) rename {leading-workshops => policies-admin}/images/github-pending-member.png (100%) rename {leading-workshops => policies-admin}/images/google-form-gh-username.png (100%) rename {leading-workshops => policies-admin}/images/google-form-notifications.png (100%) rename {leading-workshops => policies-admin}/images/google-form-view-response.png (100%) rename {leading-workshops => policies-admin}/images/google-form.png (100%) diff --git a/leading-workshops/add-folks-to-2i2c-github-teams.qmd b/policies-admin/add-folks-to-2i2c-github-teams.qmd similarity index 100% rename from leading-workshops/add-folks-to-2i2c-github-teams.qmd rename to policies-admin/add-folks-to-2i2c-github-teams.qmd diff --git a/leading-workshops/images/github-add-to-team.png b/policies-admin/images/github-add-to-team.png similarity index 100% rename from leading-workshops/images/github-add-to-team.png rename to policies-admin/images/github-add-to-team.png diff --git a/leading-workshops/images/github-notification-banner-part2.png b/policies-admin/images/github-notification-banner-part2.png similarity index 100% rename from leading-workshops/images/github-notification-banner-part2.png rename to policies-admin/images/github-notification-banner-part2.png diff --git a/leading-workshops/images/github-notification-banner.png b/policies-admin/images/github-notification-banner.png similarity index 100% rename from leading-workshops/images/github-notification-banner.png rename to policies-admin/images/github-notification-banner.png diff --git a/leading-workshops/images/github-notification-email.png b/policies-admin/images/github-notification-email.png similarity index 100% rename from leading-workshops/images/github-notification-email.png rename to policies-admin/images/github-notification-email.png diff --git a/leading-workshops/images/github-pending-member.png b/policies-admin/images/github-pending-member.png similarity index 100% rename from leading-workshops/images/github-pending-member.png rename to policies-admin/images/github-pending-member.png diff --git a/leading-workshops/images/google-form-gh-username.png b/policies-admin/images/google-form-gh-username.png similarity index 100% rename from leading-workshops/images/google-form-gh-username.png rename to policies-admin/images/google-form-gh-username.png diff --git a/leading-workshops/images/google-form-notifications.png b/policies-admin/images/google-form-notifications.png similarity index 100% rename from leading-workshops/images/google-form-notifications.png rename to policies-admin/images/google-form-notifications.png diff --git a/leading-workshops/images/google-form-view-response.png b/policies-admin/images/google-form-view-response.png similarity index 100% rename from leading-workshops/images/google-form-view-response.png rename to policies-admin/images/google-form-view-response.png diff --git a/leading-workshops/images/google-form.png b/policies-admin/images/google-form.png similarity index 100% rename from leading-workshops/images/google-form.png rename to policies-admin/images/google-form.png From 9b6077642c669a5b5d9691f3e61b4409762d7976 Mon Sep 17 00:00:00 2001 From: Andy Teucher Date: Mon, 17 Jun 2024 11:07:23 -0700 Subject: [PATCH 03/13] add code-workspace file to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 8d1050b8..3b5a80c3 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,4 @@ examples/NSIDC/data external/ how-tos/test.nc +earthdata-cloud-cookbook.code-workspace From 406b60383de726e6881f53ff4150922bcf0db139 Mon Sep 17 00:00:00 2001 From: Andy Teucher Date: Mon, 17 Jun 2024 11:11:20 -0700 Subject: [PATCH 04/13] move leading workshops to admin chapter --- .../index.qmd => policies-admin/leading-workshops.qmd | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename leading-workshops/index.qmd => policies-admin/leading-workshops.qmd (100%) diff --git a/leading-workshops/index.qmd b/policies-admin/leading-workshops.qmd similarity index 100% rename from leading-workshops/index.qmd rename to policies-admin/leading-workshops.qmd From 8b98eb4a453d18094e6f87bf0551b365f268da66 Mon Sep 17 00:00:00 2001 From: Andy Teucher Date: Tue, 18 Jun 2024 15:23:20 -0700 Subject: [PATCH 05/13] Move hub access and Leading Workshops to policies/admin --- _quarto.yml | 14 +++++++------- policies-admin/index.md | 5 +++++ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/_quarto.yml b/_quarto.yml index d4ee50e8..da92d3a6 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -112,18 +112,18 @@ website: href: tutorials/fair-workflow-geoweaver-demo.ipynb - text: "MATLAB Access NetCDF" href: tutorials/matlab.qmd - - section: "Leading Workshops" - href: leading-workshops/index.qmd - contents: - - text: "2i2c Hub access" - href: leading-workshops/add-folks-to-2i2c-github-teams.qmd - section: "Workshops & Hackathons" href: workshops/index.qmd contents: - text: "Workshop Setup" href: workshops/setup.md - - text: "Policies & Administration" - href: policies-admin/index.md + - section: "Policies & Administration" + href: policies-admin/index.md + contents: + - text: "2i2c Hub access" + href: policies-admin/add-folks-to-2i2c-github-teams.qmd + - text: "Leading workshops" + href: policies-admin/leading-workshops.qmd - section: "In Development" href: in-development/index.qmd contents: diff --git a/policies-admin/index.md b/policies-admin/index.md index 7f153d01..2f3b5485 100644 --- a/policies-admin/index.md +++ b/policies-admin/index.md @@ -2,6 +2,11 @@ title: "Policies & Administration" --- +## Leading Workshops + +See the [leading workshops](leading-workshops.qmd) page for how to get set up +to lead a workshop using the NASA-Openscapes JupyterHub. + ## NASA Openscapes Policies Our current access policies are found here: From 87e9c9e4a99ea02da29dde277476ab09dd8bd82d Mon Sep 17 00:00:00 2001 From: Andy Teucher Date: Tue, 18 Jun 2024 15:23:50 -0700 Subject: [PATCH 06/13] Add callout re new workshop access --- .../add-folks-to-2i2c-github-teams.qmd | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/policies-admin/add-folks-to-2i2c-github-teams.qmd b/policies-admin/add-folks-to-2i2c-github-teams.qmd index 7375ef64..9e392ad9 100644 --- a/policies-admin/add-folks-to-2i2c-github-teams.qmd +++ b/policies-admin/add-folks-to-2i2c-github-teams.qmd @@ -2,14 +2,6 @@ title: "How to Add Folks to the 2i2c Hub" --- - - We use GitHub Teams to manage access to the 2i2c Hub. There are three different patterns of access: @@ -118,21 +110,28 @@ email add a new row - in part so that we as admins knew if someone had already f 3. You can also check your email that you use for GitHub and look for an invitation from GitHub and NASA-Openscapes -## Adding workshop participants or Champions cohorts as a batch +## Adding Champions cohorts as a batch + +Participants in the Champions program workshops are given Openscapes +2i2c JupyterHub access, as are participants in certain workshops run by NASA Openscapes Mentors. -Participants in workshops run by NASA Openscapes Mentors are given Openscapes -2i2c JupyterHub access, as are participants in Champions Cohorts. +::: {.callout-note} +We have a newly developed process for giving people short-term access to the hub for workshops, with low +overhead for instructors and particpants. This process +removes the need to add people to a GitHub team, and gives participants "just in time" access to a special workshop hub with a +username and shared workshop-specific password. Policies and instructions for this currently being developed. +::: -We use a dedicated GitHub Organization - [nasa-openscapes-workshops](https://github.com/nasa-openscapes-workshops) - to manage access, with [GitHub Teams](https://github.com/orgs/nasa-openscapes-workshops/teams) for workshops and Champions Cohorts. +We use a dedicated GitHub Organization - [nasa-openscapes-workshops](https://github.com/nasa-openscapes-workshops) - to manage access, with [GitHub Teams](https://github.com/orgs/nasa-openscapes-workshops/teams) for Champions Cohorts and certain workshops. ### 1. Create a team in [nasa-openscapes-workshops](https://github.com/orgs/nasa-openscapes-workshops/teams) There are several teams in this organization; the `AdminTeam` team is for members who have permission to create teams and add members to teams. +- If this is for a new champions cohort, name the team `nasa-champions-yyyy` - If this is for a one-off workshop, name the team `[workshop-name]-[workshop-date]`, with workshop date in the format `yyyy-mm-dd` -- If this is for a new champions cohort, name the team `nasa-champions-yyyy` ### 2. Add team name to workshop registry From 5323d3d5519363c86b435e2c0ba953da9af3a5cf Mon Sep 17 00:00:00 2001 From: Andy Teucher Date: Tue, 18 Jun 2024 15:57:35 -0700 Subject: [PATCH 07/13] Add policies from https://github.com/NASA-Openscapes/2i2cAccessPolicies --- _quarto.yml | 2 + policies-admin/data-policies.qmd | 207 +++++++++++++++++++++++++++++++ policies-admin/index.md | 45 ++++++- 3 files changed, 249 insertions(+), 5 deletions(-) create mode 100644 policies-admin/data-policies.qmd diff --git a/_quarto.yml b/_quarto.yml index da92d3a6..6c883022 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -120,6 +120,8 @@ website: - section: "Policies & Administration" href: policies-admin/index.md contents: + - text: "Data storage policies" + href: policies-admin/data-policies.qmd - text: "2i2c Hub access" href: policies-admin/add-folks-to-2i2c-github-teams.qmd - text: "Leading workshops" diff --git a/policies-admin/data-policies.qmd b/policies-admin/data-policies.qmd new file mode 100644 index 00000000..63f948f2 --- /dev/null +++ b/policies-admin/data-policies.qmd @@ -0,0 +1,207 @@ +--- +title: "Data Storage in the NASA Openscapes Hub" +--- + +Storing large amounts of data in the cloud can incur significant ongoing costs if not done optimally. We are charged daily for data stored in our Hub. We are developing technical strategies and policies to reduce storage costs that will keep the Openscapes 2i2c Hub a shared resource for us all to use, while also providing reusable strategies for other admins. + +The Hub uses an [EC2](https://aws.amazon.com/ec2/) compute instance, with the +`$HOME` directory (`/users/jovyan/` in python images and `/users/rstudio/` in R +images) mounted to [AWS Elastic File System (EFS)](https://aws.amazon.com/efs/) +storage. This drive is really handy because it is persistent across server +restarts and is a great place to store your code. However the `$HOME` directory +should not be used to store data, as it is very expensive, and can also be quite +slow to read from and write to. + +To that end, the Hub provides every user access to two [AWS +S3](https://aws.amazon.com/s3/) buckets - a "scratch" bucket for short-term +storage, and a "persistent" bucket for longer-term storage. AWS S3 buckets are +like online storage containers, accessible through the internet, where you can +store and retrieve files. S3 buckets have fast read/write, and storage costs are +relatively inexpensive compared to storing in your `$HOME` directory. All major +cloud providers provide a similar storage service - S3 is Amazon's version, while +Google provides "Google Cloud Storage", and Microsoft provides "Azure Blob Storage". + +These buckets are accessible only when you are working inside the Hub; you can +access them using the environment variables: + +- `$SCRATCH_BUCKET` pointing to `s3://openscapeshub-scratch/[your-username]` + - Scratch buckets are designed for storage of temporary files, e.g. + intermediate results. Objects stored in a scratch bucket are removed after + 7 days from their creation. +- `$PERSISTENT_BUCKET` pointing to `s3://openscapeshub-persistent/[your-username]` + - Persistent buckets are designed for storing data that is consistently used + throughout the lifetime of a project. There is no automatic purging of + objects in persistent buckets, so it is the responsibility of the Hub + admin and/or Hub users to delete objects when they are no longer needed to + minimize cloud billing costs. + +### Using S3 Bucket Storage + +Please see the short tutorial in the Earthdata Cloud Cookbook on +[Using S3 Bucket Storage in NASA-Openscapes Hub](/how-tos/using-s3-storage.html). + +### Data retention and archiving policy + +User `$HOME` directories will be retained for six months after their last use. +After a home directory has been idle for six months, it will be [archived to our +"archive" S3 bucket, and removed](#how-to-archive-old-home-directories). If a +user requests their archive back, an admin can restore it for them. + +Once a user's home directory archive has been sitting in the archive for an +additional six months, it will be permanently removed from the archive. After +this it can no longer be retrieved. + +In addition to these policies, admins will keep an eye on the +[Home Directory Usage Dashboard](https://grafana.openscapes.2i2c.cloud/d/bd232539-52d0-4435-8a62-fe637dc822be/home-directory-usage-dashboard?orgId=1) +in Grafana. When a user's home directory increases in size to over 100GB, we +will contact them and work with them to reduce the size of their home directory +- by removing large unnecessary files, and moving the rest to the appropriate S3 +bucket (e.g., `$PERSISTENT_BUCKET`). + +## The `_shared` directory + +[The `_shared` directory](https://infrastructure.2i2c.org/topic/infrastructure/storage-layer/#shared-directories) +is a place where instructors can put workshop materials +for participants to access. It is mounted as `/home/jovyan/shared`, and is _read +only_ for all users. For those with admin access to the Hub, it is also mounted +as a writeable directory as `/home/jovyan/shared-readwrite`. + +This directory will follow the same policies as users' home directories: after +six months, contents will be archived to the "archive" S3 bucket (more below). +After an additional six months, the archive will be deleted. + +### How to archive old home directories (admin) + +To start, you will need to be an admin of the Openscapes Jupyterhub so that +the `allusers` directory is mounted in your home directory. This will contain +all users' home directories, and you will have full read-write access. + +#### Finding large `$HOME` directories + +Look at the [Home Directory Usage +Dashboard](https://grafana.openscapes.2i2c.cloud/d/bd232539-52d0-4435-8a62-fe637dc822be/home-directory-usage-dashboard?orgId=1) +in Grafana to see the directories that haven't been used in a long time and/or +are very large. + +You can also view and sort users' directories by size in the Hub with the +following command, though this takes a while because it has to summarize _a lot_ +of files and directories. This will show the 30 largest home directories: + +``` +du -h --max-depth=1 /home/jovyan/allusers/ | sort -hr | head -n 30 +``` + +#### Authenticate with S3 archive bucket + +We have created an AWS IAM user called `archive-homedirs` with appropriate +permissions to write to the `openscapeshub-prod-homedirs-archive` bucket. +Get access keys for this user from the AWS console, and use these keys to +authenticate in the Hub: + +In the terminal, type: + +``` +awsv2 configure +``` + +Enter the access key and secret key at the prompts, and set default region to +`us-west-2`. + +You will also need to temporarily unset some AWS environment variables that have +been configured to authenticate with NASA S3 storage. (These will be reset the next +time you log in): + +``` +unset AWS_ROLE_ARN +unset AWS_WEB_IDENTITY_TOKEN_FILE +``` + +Test to make sure you can access the archive bucket: + +``` +# test s3 access: +awsv2 s3 ls s3://openscapeshub-prod-homedirs-archive/archives/ +touch test123.txt +awsv2 s3 mv test123.txt s3://openscapeshub-prod-homedirs-archive/archives/ +awsv2 s3 rm s3://openscapeshub-prod-homedirs-archive/archives/test123.txt +``` + +#### Setting up and running the archive script + +We use a [python script](https://github.com/NASA-Openscapes/2i2cAccessPolicies/blob/main/scripts/archive-home-dirs.py), [developed by +@yuvipanda](https://github.com/2i2c-org/features/issues/32), that reproducibly +archives a list of users' directories into a specified S3 bucket. + +Copy the script into your home directory in the Hub. + +In the Hub as of 2024-05-17, a couple of dependencies for the script are +missing; you can install them before running the script: + +``` +pip install escapism + +# I had solver errors with pigz so needed to use the classic solver. +# Also, the installation of pigz required a machine with >= 3.7GB memory +conda install pigz --solver classic +``` + +Create a text file, with one username per line, of users' home directories you +would like to archive to s3. It will look like: + +``` +username1 +username2 +# etc... +``` + +Finally, run the script from the terminal, changing the parameter values as required: + +``` +python3 archive-home-dirs.py \ + --archive-name="archive-$(date +'%Y-%m-%d')" \ + --basedir=/home/jovyan/allusers/ \ + --bucket-name=openscapeshub-prod-homedirs-archive \ + --object-prefix="archives/" \ + --usernames-file=users-to-archive.txt \ + --temp-path=/home/jovyan/archive-staging/ +``` + +Omitted in the above example, but available to use, is the `--delete` flag, +which will delete the users' home directory once the archive is completed. + +If you don't use the `--delete` flag, first verify that the archive was successfully +completed and then remove the user's home directory manually. + +##### Archiving the shared directory + +You can use the same script to archive directories in the `shared` directory, by modifying +the inputs slightly: + +- Set `--basedir=/home/jovyan/shared/`, (or `--basedir=/home/jovyan/shared-readwrite/` + if you want to be able use the `--delete` flag). +- Create a file with a list of directories in the `shared` directory you want to archive, + and pass it to the `--usernames-file` argument. +- Set `--object-prefix="archives/_shared/` to put the archives in the `_shared` subdirectory + in the archive bucket. + +E.g.: + +``` +python3 archive-home-dirs.py \ + --archive-name="archive-$(date +'%Y-%m-%d')" \ + --basedir=/home/jovyan/shared/ \ + --bucket-name=openscapeshub-prod-homedirs-archive \ + --object-prefix="archives/_shared/" \ + --usernames-file=/home/jovyan/shared-to-archive.txt \ + --temp-path=/home/jovyan/archive-staging/ +``` + +By default, archives (`.tar.gz`) are created in your `/tmp` directory before +upload to the S3 bucket. The `/tmp` directory is cleared out when you shut down +the Hub. However, `/tmp` has limited space (80GB shared by up to four users on a +single node), so if you are archiving many large directories, you will likely +need to specify a location in your `$HOME` directory by passing a path to the +`--temp-path` argument. The script will endeavour to clean up after itself and +remove the `tar.gz` file after uploading, but double check that directory +when you are finished or you may have copies of all of the other user +directories in your own `$HOME`! \ No newline at end of file diff --git a/policies-admin/index.md b/policies-admin/index.md index 2f3b5485..6f08da93 100644 --- a/policies-admin/index.md +++ b/policies-admin/index.md @@ -2,14 +2,49 @@ title: "Policies & Administration" --- -## Leading Workshops +## Access Policies -See the [leading workshops](leading-workshops.qmd) page for how to get set up -to lead a workshop using the NASA-Openscapes JupyterHub. +### Introduction -## NASA Openscapes Policies +A key objective of NASA Openscapes is to minimize “the time to science” for researchers. Cloud infrastructure can facilitate shortening this time. We use a 2i2c-managed JupyterHub, which lets us work in the cloud next to NASA Earthdata in AWS US-West-2. The purpose of the JupyterHub is to provide initial, exploratory experiences accessing NASA Earthdata in the cloud. It is not meant to be a long-term solution to support on-going science work or software development. For those users that decide working in the Cloud is advantageous and want to move there, we support a migration from the Hub to their own environment through Coiled.io. -Our current access policies are found here: +**Hub Management:** [2i2c](http://2i2c.org/) is a nonprofit that designs, develops, and operates JupyterHubs in the cloud for research and education, including NASA Openscapes. 2i2c ensures that Hubs are cloud-vendor agnostic and are built using open-source software such as JupyterHub and Kubernetes. + +**User Management and Access:** 2i2c manages users through GitHub Teams within the NASA-Openscapes GitHub organization. This requires new users to accept an invitation from NASA-Openscapes. Following that acceptance, the user can then log on to the 2i2c Hub with their Github credentials. Using the NASA Openscapes Hub, the only software requirement to launch the Hub are access to a computer and the internet. + +**Hub Location and Right to Replicate:** Our Openscapes JupyterHub is built on top of AWS and is in-region with NASA Earthdata (AWS US-West-2). 2i2c gives users the [right to replicate](https://2i2c.org/right-to-replicate/) their infrastructure. This means that our Hub could be replicated on GoogleEarthEngine or Microsoft Azure, or ported to another AWS region. + +With this setup, we have flexibility to support a diverse range of user needs. The 2i2c Openscapes Hub has been used by the NASA-Openscapes Mentors and other NASA DAAC staff internally as a testing ground for developing cloud tutorials and workflows, but also externally in the research community for workshops like those for science teams and “Hackathons”, a term used here to describe multi-day events with split time for teaching and helping researchers implement concepts into their research projects. + +*This section drew from the ‘Solution’ section of the White Paper entitled, “[The Value of Hosted JupyterHubs in enabling Open NASA Earth Science in the Cloud](https://zenodo.org/records/7667299#.Y_Zxt3bMJPY)” (Nickles, et.al, 2022).* + +### Obtaining Access to the NASA Openscapes Hub + +Access is controlled by the NASA Openscapes Team, who oversee the management of the Hub and Cloud costs. The first step to gaining access to the NASA Openscapes 2i2c Hub is to request access via [this form](https://forms.gle/sLM9szAYN2mq6SbL9). + +Our JupyterHub users are managed in three GitHub Teams: + +* [Long-term access](https://github.com/orgs/nasa-openscapes-workshops/teams/longtermaccess-2i2c): This access is for NASA Openscapes mentors and team, DAAC staff and others who request a longer-term engagement +* NASA Openscapes Champions: This access is for teams that participate in the NASA Openscapes Champions Program. These teams have access for up to a year as they migrate their workflows to the Cloud. +* Workshops and Hack-a-thons: This provides short term access of up to 1-month to participants of NASA Earthdata workshops. Participants will be removed at any time and have no expectation of on-going storage in their home directories. + +Instructions for admins on how to add people to the hub can be found [here](add-folks-to-2i2c-github-teams.qmd). + +### Allowable Uses of 2i2c Hub + +Users who join these GitHub teams agree to use the NASA Openscapes Hub only for work on NASA EarthData related activities. Generally, recommended instance size is the smallest instance (1.9GB RAM and up to 3.75 CPUs). + +Run large or parallel jobs over large geographic bounding boxes or over long temporal extents should be cleared with the NASA Openscapes Team by submitting an issue to this repo. + +### Removal From the NASA Openscapes Hub + +The NASA Openscapes Hub is a shared, limited resource that incurs real costs. Users are granted access in the terms above and are removed at the end of those limits. Users that haven’t accessed the Hub in more than six months are also removed for security purposes. + +We will do our best to alert users before they lose access to the NASA Openscapes Hub. However, we reserve the right to remove users at any time for any reason. Users that violate the terms of access or incur large Cloud costs without prior permission from the NASA Openscapes Team will be removed immediately. + +## Data storage policies + +[Policies on data storage and use of the `HOME` directory can be found here](data-policies.qmd) ## AWS Cost Explorer From 0da0ed02e5aee9a345f93b2c6acb86388a24d9b6 Mon Sep 17 00:00:00 2001 From: Andy Teucher Date: Tue, 18 Jun 2024 22:58:32 -0700 Subject: [PATCH 08/13] Structure headings for cost management --- policies-admin/index.md | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/policies-admin/index.md b/policies-admin/index.md index 6f08da93..7c067706 100644 --- a/policies-admin/index.md +++ b/policies-admin/index.md @@ -44,9 +44,13 @@ We will do our best to alert users before they lose access to the NASA Openscape ## Data storage policies -[Policies on data storage and use of the `HOME` directory can be found here](data-policies.qmd) +Policies on data storage and use of the `HOME` directory can be found in the [data policies page](data-policies.qmd). -## AWS Cost Explorer +## Monitoring cloud usage costs + +### AWS Cost Explorer + + *This is a work in progress, currently with minimal steps and screenshots that we will augment.* @@ -54,13 +58,13 @@ AWS Cost Explorer lets you examine how much your usage costs. When using Credits ![AWS Cost Explorer. Charge type == "Usage"](images/cost-explorer-usage.png){fig-align="center" width="437"} -## AWS Budgeting Alerts +### AWS Budgeting Alerts *This is a work in progress, currently with minimal steps and screenshots that we will augment.* There are two types of alerts we set up. -### Budgeting alerts +#### Budgeting alerts When adding new Cloud credits to our AWS account, we also create a budget and alerts (received via email) as we spend our credits. These are some beginning notes (credit and thank you to Joe Kennedy!). @@ -72,7 +76,7 @@ Exclude Credits and Refunds, include Discounts. You can elect to receive emails We set these up at 50, 75, 90, 95% of the total budget, and we will receive emails at those percentages. The thinking is that we will need to request more credits starting at 50-75%, and then make sure we have them in hand by 90-95%. -### Threshold alerts +#### Threshold alerts We can also set up email alerts at certain dollar amounts. From e32246487901a0b90312647a86f0b1e520f6c3f4 Mon Sep 17 00:00:00 2001 From: Andy Teucher Date: Tue, 18 Jun 2024 23:04:42 -0700 Subject: [PATCH 09/13] Use qmd instead of md --- _quarto.yml | 6 +++--- policies-admin/{index.md => index.qmd} | 0 2 files changed, 3 insertions(+), 3 deletions(-) rename policies-admin/{index.md => index.qmd} (100%) diff --git a/_quarto.yml b/_quarto.yml index 6c883022..203371c5 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -118,12 +118,12 @@ website: - text: "Workshop Setup" href: workshops/setup.md - section: "Policies & Administration" - href: policies-admin/index.md + href: policies-admin/index.qmd contents: - - text: "Data storage policies" - href: policies-admin/data-policies.qmd - text: "2i2c Hub access" href: policies-admin/add-folks-to-2i2c-github-teams.qmd + - text: "Data storage policies" + href: policies-admin/data-policies.qmd - text: "Leading workshops" href: policies-admin/leading-workshops.qmd - section: "In Development" diff --git a/policies-admin/index.md b/policies-admin/index.qmd similarity index 100% rename from policies-admin/index.md rename to policies-admin/index.qmd From 4c1bc585236c963ebb25ad4d0854baea78d96f16 Mon Sep 17 00:00:00 2001 From: Andy Teucher Date: Tue, 18 Jun 2024 23:09:25 -0700 Subject: [PATCH 10/13] Add TODO for leading workshops --- policies-admin/leading-workshops.qmd | 2 ++ 1 file changed, 2 insertions(+) diff --git a/policies-admin/leading-workshops.qmd b/policies-admin/leading-workshops.qmd index 5c2dea19..812e1220 100644 --- a/policies-admin/leading-workshops.qmd +++ b/policies-admin/leading-workshops.qmd @@ -8,6 +8,8 @@ This section, under development, will include information for workshop prep, set ## Using the Openscapes 2i2c Hub in a workshop + + - Check with Luis that the Hub image has the packages you need - Reach out to 2i2c a month in advance via email `support at 2i2c.freshdesk.com` (example below) to tell them about the workshop date, start and end times, \# of participants, anticipated level of resources to be used. - [Add participants to the 2i2c Hub](add-folks-to-2i2c-github-teams.qmd) via a GitHub Team From b0d4640d0d5bc05f299484cf61546c012ca97ee0 Mon Sep 17 00:00:00 2001 From: Andy Teucher Date: Tue, 18 Jun 2024 23:09:44 -0700 Subject: [PATCH 11/13] Redo section headings --- policies-admin/data-policies.qmd | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/policies-admin/data-policies.qmd b/policies-admin/data-policies.qmd index 63f948f2..2a6218a7 100644 --- a/policies-admin/data-policies.qmd +++ b/policies-admin/data-policies.qmd @@ -40,7 +40,7 @@ access them using the environment variables: Please see the short tutorial in the Earthdata Cloud Cookbook on [Using S3 Bucket Storage in NASA-Openscapes Hub](/how-tos/using-s3-storage.html). -### Data retention and archiving policy +## Data retention and archiving policy User `$HOME` directories will be retained for six months after their last use. After a home directory has been idle for six months, it will be [archived to our @@ -58,7 +58,7 @@ will contact them and work with them to reduce the size of their home directory - by removing large unnecessary files, and moving the rest to the appropriate S3 bucket (e.g., `$PERSISTENT_BUCKET`). -## The `_shared` directory +### The `_shared` directory [The `_shared` directory](https://infrastructure.2i2c.org/topic/infrastructure/storage-layer/#shared-directories) is a place where instructors can put workshop materials @@ -70,13 +70,13 @@ This directory will follow the same policies as users' home directories: after six months, contents will be archived to the "archive" S3 bucket (more below). After an additional six months, the archive will be deleted. -### How to archive old home directories (admin) +## How to archive old home directories (admin) To start, you will need to be an admin of the Openscapes Jupyterhub so that the `allusers` directory is mounted in your home directory. This will contain all users' home directories, and you will have full read-write access. -#### Finding large `$HOME` directories +### Finding large `$HOME` directories Look at the [Home Directory Usage Dashboard](https://grafana.openscapes.2i2c.cloud/d/bd232539-52d0-4435-8a62-fe637dc822be/home-directory-usage-dashboard?orgId=1) @@ -91,7 +91,7 @@ of files and directories. This will show the 30 largest home directories: du -h --max-depth=1 /home/jovyan/allusers/ | sort -hr | head -n 30 ``` -#### Authenticate with S3 archive bucket +### Authenticate with S3 archive bucket We have created an AWS IAM user called `archive-homedirs` with appropriate permissions to write to the `openscapeshub-prod-homedirs-archive` bucket. @@ -126,7 +126,7 @@ awsv2 s3 mv test123.txt s3://openscapeshub-prod-homedirs-archive/archives/ awsv2 s3 rm s3://openscapeshub-prod-homedirs-archive/archives/test123.txt ``` -#### Setting up and running the archive script +### Setting up and running the archive script We use a [python script](https://github.com/NASA-Openscapes/2i2cAccessPolicies/blob/main/scripts/archive-home-dirs.py), [developed by @yuvipanda](https://github.com/2i2c-org/features/issues/32), that reproducibly @@ -172,7 +172,7 @@ which will delete the users' home directory once the archive is completed. If you don't use the `--delete` flag, first verify that the archive was successfully completed and then remove the user's home directory manually. -##### Archiving the shared directory +### Archiving the shared directory You can use the same script to archive directories in the `shared` directory, by modifying the inputs slightly: @@ -204,4 +204,4 @@ need to specify a location in your `$HOME` directory by passing a path to the `--temp-path` argument. The script will endeavour to clean up after itself and remove the `tar.gz` file after uploading, but double check that directory when you are finished or you may have copies of all of the other user -directories in your own `$HOME`! \ No newline at end of file +directories in your own `$HOME`! From de7909e45f0da79baba013ecab189acb38c22024 Mon Sep 17 00:00:00 2001 From: Andy Teucher Date: Wed, 19 Jun 2024 09:56:52 -0700 Subject: [PATCH 12/13] Add links to policies in sample email --- policies-admin/add-folks-to-2i2c-github-teams.qmd | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/policies-admin/add-folks-to-2i2c-github-teams.qmd b/policies-admin/add-folks-to-2i2c-github-teams.qmd index 9e392ad9..bccffcc8 100644 --- a/policies-admin/add-folks-to-2i2c-github-teams.qmd +++ b/policies-admin/add-folks-to-2i2c-github-teams.qmd @@ -82,9 +82,10 @@ Go back to the Google form response and grab their email address. Send the follo > > Hi \[FIRST NAME\], > -> I have added you to the NASA Openscapes 2i2c Jupyter Hub. Here is the link to the hub:   +> I have added you to the NASA Openscapes 2i2c Jupyter Hub. Here is the link to the hub: > -> There is a getting started guide in the NASA Earthdata Cloud Cookbook here: +> There is a getting started guide in the NASA Earthdata Cloud Cookbook here: . +> You can see policies for hub use here: , and policies and best practices for data storage here > > We'd love to know about the kind of work you are doing on the hub. Please add a brief description of your progress as you go at . We will follow up in the next few months.  > From 3ee7cc5d1bf32b254e41971e421bb4c39cebde35 Mon Sep 17 00:00:00 2001 From: Andy Teucher Date: Wed, 19 Jun 2024 14:28:15 -0700 Subject: [PATCH 13/13] Apply suggestions from code review Co-authored-by: Julia Stewart Lowndes --- policies-admin/index.qmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/policies-admin/index.qmd b/policies-admin/index.qmd index 7c067706..bbe74d3a 100644 --- a/policies-admin/index.qmd +++ b/policies-admin/index.qmd @@ -26,7 +26,7 @@ Our JupyterHub users are managed in three GitHub Teams: * [Long-term access](https://github.com/orgs/nasa-openscapes-workshops/teams/longtermaccess-2i2c): This access is for NASA Openscapes mentors and team, DAAC staff and others who request a longer-term engagement * NASA Openscapes Champions: This access is for teams that participate in the NASA Openscapes Champions Program. These teams have access for up to a year as they migrate their workflows to the Cloud. -* Workshops and Hack-a-thons: This provides short term access of up to 1-month to participants of NASA Earthdata workshops. Participants will be removed at any time and have no expectation of on-going storage in their home directories. +* Workshops and Hackathons: This provides short term access of up to 1-month to participants of NASA Earthdata workshops. Participants will be removed at any time and have no expectation of on-going storage in their home directories. Instructions for admins on how to add people to the hub can be found [here](add-folks-to-2i2c-github-teams.qmd). @@ -34,7 +34,7 @@ Instructions for admins on how to add people to the hub can be found [here](add- Users who join these GitHub teams agree to use the NASA Openscapes Hub only for work on NASA EarthData related activities. Generally, recommended instance size is the smallest instance (1.9GB RAM and up to 3.75 CPUs). -Run large or parallel jobs over large geographic bounding boxes or over long temporal extents should be cleared with the NASA Openscapes Team by submitting an issue to this repo. +Run large or parallel jobs over large geographic bounding boxes or over long temporal extents should be cleared with the NASA Openscapes Team by submitting an issue to the [2i2cAccessPolicies repository]( https://github.com/NASA-Openscapes/2i2cAccessPolicies). ### Removal From the NASA Openscapes Hub