commit loop over lucidrains

Agora-Lab-AI · Sep 15, 2023 · 430d586 · 430d586
1 parent 7e52be4
commit 430d586
Show file tree

Hide file tree

Showing 1,088 changed files with 2,909,037 additions and 51 deletions.
diff --git a/.gitignore b/.gitignore
@@ -23,6 +23,7 @@ var/
 wheels/
 share/python-wheels/
 *.egg-info/
+lucidrains_repositories
 .installed.cfg
 *.egg
 MANIFEST

diff --git a/lucidrains_repositories/ANANSE-master/.codeclimate.yml b/lucidrains_repositories/ANANSE-master/.codeclimate.yml
@@ -0,0 +1,30 @@
+version: "2"
+
+plugins:
+  duplication:  # default: 2
+    enabled: true
+    config:
+      count_threshold: 3
+
+checks:
+  method-complexity:  # default: 5
+    config:
+      threshold: 15
+  file-lines:  # default: 250
+    enabled: false
+  argument-count:  # default: 4
+    enabled: false
+
+exclude_patterns:
+  - ".*"
+  - "LICENSE"
+  - "MANIFEST.in"
+  - "*.md"
+  - "ananse/_version.py"
+  - "ananse/db/"
+  - "docs/"
+  - "*.yml"
+  - "*.yaml"
+  - "setup.*"
+  - "tests/"
+  - "versioneer.py"
diff --git a/lucidrains_repositories/ANANSE-master/.gitattributes b/lucidrains_repositories/ANANSE-master/.gitattributes
@@ -0,0 +1 @@
+ananse/_version.py export-subst
diff --git a/lucidrains_repositories/ANANSE-master/.github/workflows/continuousdeployment.yml b/lucidrains_repositories/ANANSE-master/.github/workflows/continuousdeployment.yml
@@ -0,0 +1,34 @@
+name: continuous-deployment
+
+# on push events tagged with a version number
+on:
+  push:
+    tags:
+      - 'v*'
+
+jobs:
+  build-and-deploy:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - uses: actions/setup-python@v1
+      with:
+        python-version: 3.7
+
+    - name: Get version from tag
+      id: tag_name
+      run: |
+        echo ::set-output name=current_version::${GITHUB_REF#refs/tags/v}
+      shell: bash
+
+    - name: Create Release
+      uses: actions/create-release@latest
+      env:
+        GITHUB_TOKEN: ${{ secrets.AccessToken }}
+      with:
+        tag_name: ${{ github.ref }}
+        release_name: Release ${{ github.ref }}
+        draft: false
+        prerelease: false
diff --git a/lucidrains_repositories/ANANSE-master/.github/workflows/continuousintegration.yml b/lucidrains_repositories/ANANSE-master/.github/workflows/continuousintegration.yml
@@ -0,0 +1,50 @@
+name: continuous-integration
+
+on: [ push, pull_request ]
+
+jobs:
+  test:
+    runs-on: ${{ matrix.os }}
+    defaults:
+      run:
+        shell: bash -l {0}
+    strategy:
+      matrix:
+        python-version: [3.8]  # [3.7, 3.8]
+        os: [ubuntu-latest]  # [ubuntu-latest, macos-latest]
+      fail-fast: false
+
+    env:  # TODO: switch to "ananse/ tests/"
+      TARGETS: "ananse/commands/__init__.py ananse/commands/enhancer_binding.py ananse/commands/network.py ananse/__init__.py ananse/enhancer_binding.py ananse/distributions.py ananse/network.py ananse/utils.py tests/"
+
+    steps:
+    - name: checkout 👀
+      uses: actions/checkout@v2
+
+    - name: Install dependencies 🔨
+      uses: conda-incubator/setup-miniconda@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+        mamba-version: "*"
+        channels: bioconda,conda-forge,defaults
+        channel-priority: true
+        activate-environment: ananse_dev
+        environment-file: requirements.yaml
+
+    - name: Code formatting (black) 🖤
+      run: |
+        black --check $(echo $TARGETS)
+
+    - name: Code formatting (flake8) 🎱
+      run: |
+        flake8 $(echo $TARGETS)
+
+    - name: Run tests 🧬
+      run: |
+        pytest -vv --disable-pytest-warnings \
+        --cov=./ --cov-report=xml
+
+    - name: Upload code coverage ☂️
+      uses: paambaati/[email protected]
+      env:
+        CC_TEST_REPORTER_ID: ${{ secrets.CC_TEST_REPORTER_ID }}
diff --git a/lucidrains_repositories/ANANSE-master/.gitignore b/lucidrains_repositories/ANANSE-master/.gitignore
@@ -0,0 +1,27 @@
+tests/data/
+tests/output/
+tests/**/*pyc
+.cache/
+.idea/
+.vscode/
+build/
+dist/
+site/
+__pycache__
+**/.ipynb_checkpoints/
+dask-worker-space/
+*egg-info
+bld.bat
+build.sh
+pushgit.sh
+pullgit.sh
+tomaster.sh
+conda_build.sh
+**/*.DS_Store
+
+# code coverage results
+.coverage
+coverage.xml
+
+# temporary dirs
+testdata
diff --git a/lucidrains_repositories/ANANSE-master/CHANGELOG.md b/lucidrains_repositories/ANANSE-master/CHANGELOG.md
@@ -0,0 +1,38 @@
+# Changelog
+
+Here, the changes to `ANANSE` will be summarized.
+
+The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
+
+## [Unreleased]
+
+### Added
+
+### Changed
+
+### Removed
+
+### Fixed
+
+
+## [0.3.0] - 2021-07-14
+
+### Added
+
+- `ananse view` command to view the `binding.h5` file that is now produced by `ananse binding`.
+- Support for region /table file as input to `ananse binding`.
+- In-built support for mouse.
+- Warning with information if another species than human or mouse is used.
+- Warning if annotation files don't match for `ananse influence`.
+- Improved logging messages.
+- Better checking of input files.
+
+### Changed
+
+- `ananse binding` produces a HDF5 file (`binding.h5`) which is much smaller on disk.
+- Better memory performance of `ananse network`.
+- Removed threshold for differential network in `ananse influence`.
+
+### Fixed
+
+- Gene names don't get capitalized in `ananse influence` (#87).
diff --git a/lucidrains_repositories/ANANSE-master/LICENSE b/lucidrains_repositories/ANANSE-master/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2019 
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/lucidrains_repositories/ANANSE-master/MANIFEST.in b/lucidrains_repositories/ANANSE-master/MANIFEST.in
@@ -0,0 +1,10 @@
+include README.md
+include LICENSE
+include scripts/**
+graft ananse/db/**
+include ananse/db/tfs.txt
+include ananse/db/hg38.genes.bed
+include ananse/db/hg19.genes.bed
+include ananse/db/lovering.tfs.xlsx
+include versioneer.py
+include ananse/_version.py
diff --git a/lucidrains_repositories/ANANSE-master/README.md b/lucidrains_repositories/ANANSE-master/README.md
@@ -0,0 +1,117 @@
+# ANANSE: ANalysis Algorithm for Networks Specified by Enhancers
+[![bioconda-badge](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io)
+[![Anaconda-Server Badge](https://anaconda.org/bioconda/ananse/badges/version.svg)](https://anaconda.org/bioconda/ananse)
+[![Anaconda-Server Badge](https://anaconda.org/bioconda/ananse/badges/downloads.svg)](https://anaconda.org/bioconda/ananse)
+
+[![Documentation Status](https://readthedocs.org/projects/anansepy/badge/?version=master)](https://anansepy.readthedocs.io/en/master/?badge=master)
+[![Anaconda-Server Badge](https://anaconda.org/bioconda/ananse/badges/license.svg)](https://anaconda.org/bioconda/ananse)
+[![DOI:10.1101/2020.06.05.135798](http://img.shields.io/badge/DOI-10.1101/2020.06.05.135798-B31B1B.svg)](https://doi.org/10.1101/2020.06.05.135798)
+
+[![Maintainability](https://api.codeclimate.com/v1/badges/875df8c40fec66d68b1f/maintainability)](https://codeclimate.com/github/vanheeringen-lab/ANANSE/maintainability)
+[![Test Coverage](https://api.codeclimate.com/v1/badges/875df8c40fec66d68b1f/test_coverage)](https://codeclimate.com/github/vanheeringen-lab/ANANSE/test_coverage)
+### Prediction of key transcription factors in cell fate determination using enhancer networks
+ANANSE is a computational approach to infer enhancer-based gene regulatory networks (GRNs) and to use these GRNs to identify the key transcription factors in cell fate determination. You can use it to generate a shortlist of transcription factors for trans-differentiation experiments, but also to generate cell type-specific gene regulatory networks or to study transcription regulation during development and differentiation. It is written in Python and it contains three command-line scripts: `ananse binding`, `ananse network`, and `ananse influence`. A graphical overview of the tools is shown below.
+
+![](docs/img/Fig2.png)
+
+## Quick start
+
+Read the **[full ANANSE documentation](https://anansepy.readthedocs.io/en/master/)** for detailed installation instructions and usage examples. For documentation on the **development version** see [here](https://anansepy.readthedocs.io/en/develop/).
+
+### Installation
+
+The most straightforward way to install ANANSE is via conda using the bioconda channel.
+
+#### 1. If you have not used bioconda before, first set up the necessary channels (in this order!). You only have to do this once.
+
+```
+$ conda config --add channels defaults
+$ conda config --add channels bioconda
+$ conda config --add channels conda-forge
+```
+
+#### 2. Install ANANSE from bioconda
+
+``` 
+# Create an environment called ananse with all dependencies
+$ conda create -n ananse ananse
+
+# Activate the environment
+$ conda activate ananse
+```
+
+Don't forget to activate the environment with `conda activate ananse` whenever you want to use ANANSE.
+
+#### 3. Using the development version
+
+The latest version, but may not always be stable. 
+
+```
+# Activate the environment
+$ conda activate ananse
+
+# Install development version
+$ pip install git+https://github.com/vanheeringen-lab/ANANSE.git@develop
+```
+
+### Usage
+
+
+
+The three command-line tools (`binding`, `network` and `influence`) can be used separately, but are designed to work together. In general, for a full ANANSE analysis, you would infer binding and calculate the GRN for two (or more) different cell types and then use `ananse influence` to determine influential TFs for the transition from one cell type to the other.
+
+Before you can use the ANANSE tools, you have to install your genome with corresponding annotation using [genomepy](https://github.com/vanheeringen-lab/genomepy). For instance, to use `hg38`:
+
+```
+genomepy install hg38 --annotation
+```
+
+
+#### Genome-wide prediction of transcription factor binding: ananse binding
+
+To predict binding, you need either ATAC-seq and/or H3K27ac ChIP-seq data as BAM files. Using both of these types of data will give the most accurate results, however, either of the two will also work. ANANSE will automatically choose the relevant model depending on which data you use as input. If you have human data, mapped to `hg38`, you can use a more advanced model based on 
+
+```
+ananse binding -A <ATAC.bam> -H <H3k27ac.bam> -o out
+```
+
+
+#### Gene regulatory network inference: ananse network
+
+To create a gene regulatory network you will need a binding prediction from `ananse binding` and one or more files with gene expression quantification. The file should have the **gene** identifier in the first column and a column with `TPM` as a head. You can use, for instance, the `quant.sf` from salmon or the `abundances.tsv` from kallisto, converted to gene-level TPMs with [tximport](https://bioconductor.org/packages/release/bioc/vignettes/tximport/inst/doc/tximport.html). Here we will run `ananse network` with 4 threads:
+
+```
+ananse network -b out/binding.h5 -e <gene_tpm.txt> -o network.txt -n 4
+```
+
+#### Transcription factor influence score: ananse influence
+
+To calculate the influence score, you will need two network files from `ananse network` and a differential expression file. The differential expression file can be generated with DESeq2, where you use the *source* cell type as the reference. This means that up-regulated genes (log2 fold change > 0) will have a higher expression in the *target* cell type.
+
+```
+ananse influence -s source.network.txt -t target.network.txt -d source2target.de.tsv -o source2target.out.txt -n 4 -p
+```
+
+## Development installation
+
+* Clone the repo from git.
+* Checkout the `develop` branch.
+* Install a development environment with conda: `conda env create -n ananse_dev -f requirements.yaml`.
+* Activate the environment with `conda activate ananse_dev`.
+* Install ANANSE with `python setup.py develop`.
+
+## Citation
+
+> ANANSE: an enhancer network-based computational approach for predicting key transcription factors in cell fate determination 
+> Quan Xu, Georgios Georgiou, Siebren Frölich, Maarten van der Sande, Gert Jan C Veenstra, Huiqing Zhou, Simon J van Heeringen
+> Nucleic Acids Research, gkab598, https://doi.org/10.1093/nar/gkab598
+
+
+## Help and Support
+
+* The preferred way to get support is through the [Github issues page](https://github.com/vanheeringen-lab/ANANSE/issues).
+
+## License
+
+  - **[MIT license](http://opensource.org/licenses/mit-license.php)** [![Anaconda-Server Badge](https://anaconda.org/qxuchn/ananse/badges/license.svg)](https://anaconda.org/qxuchn/ananse)
+  - Copyright 2020 © <a href="https://github.com/vanheeringen-lab" target="_blank">vanheeringen-lab</a>.
diff --git a/lucidrains_repositories/ANANSE-master/ananse/__init__.py b/lucidrains_repositories/ANANSE-master/ananse/__init__.py
@@ -0,0 +1,22 @@
+from ._version import get_versions
+import os
+import sys
+from loguru import logger
+
+# Remove default logger
+logger.remove()
+# Add logger
+logger.add(sys.stderr, format="{time} | {level} | {message}", level="INFO")
+
+# This is here to prevent very high memory usage on numpy import.
+# On a machine with many cores, just importing numpy can result in up to
+# 8GB of (virtual) memory. This wreaks havoc on management of the dask
+# workers.
+os.environ["OMP_NUM_THREADS"] = "1"
+os.environ["OPENBLAS_NUM_THREADS"] = "1"
+os.environ["MKL_NUM_THREADS"] = "1"
+os.environ["VECLIB_MAXIMUM_THREADS"] = "1"
+os.environ["NUMEXPR_NUM_THREADS"] = "1"
+
+__version__ = get_versions()["version"]
+del get_versions