Skip to content

Commit

Permalink
download cltk deps
Browse files Browse the repository at this point in the history
  • Loading branch information
csae8092 committed Oct 15, 2021
1 parent cc2011c commit de325eb
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 0 deletions.
1 change: 1 addition & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
python dl_cltk_models.py
pip install coverage
- name: Run collectstatic
run: python manage.py collectstatic --settings=djangobaseproject.settings.pg_local
Expand Down
23 changes: 23 additions & 0 deletions dl_cltk_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from cltk.dependency.stanza import StanzaWrapper
from cltk.data.fetch import FetchCorpus


def download_stanza_model(iso_code):
StanzaWrapper(language=iso_code, interactive=False, silent=False)
print(f"Finished downloading Stanza for '{iso_code}'.")


def download_cltk_models_repo(iso_code):
"""Download CLTK repos."""
print(f"Going to download CLTK models for '{iso_code}'.")
corpus_downloader = FetchCorpus(language=iso_code)
corpus_downloader.import_corpus(corpus_name=f"{iso_code}_models_cltk")
if iso_code == "lat":
corpus_downloader.import_corpus(corpus_name="cltk_lat_lewis_elementary_lexicon")
print(f"Finished downloading CLTK models for '{iso_code}'.")


if __name__ == "__main__":
iso_code = 'lat'
download_cltk_models_repo(iso_code)
download_stanza_model(iso_code)

0 comments on commit de325eb

Please sign in to comment.