Skip to content

Commit

Permalink
global: bump python, update ci, convert to pyproject.toml
Browse files Browse the repository at this point in the history
  • Loading branch information
DonHaul committed Oct 15, 2024
1 parent c046c9c commit bc3b9b3
Show file tree
Hide file tree
Showing 9 changed files with 1,140 additions and 210 deletions.
115 changes: 45 additions & 70 deletions .github/workflows/build-and-release.yml
Original file line number Diff line number Diff line change
@@ -1,75 +1,50 @@
name: Build and release
name: Pull request master (lint and test)

on:
push:
branches: [master]
pull_request:
branches: [master]
push:
branches: [master]
pull_request:
branches: [master]

env:
REGISTRY: registry.cern.ch
IMAGE: cern-sis/inspirehep/refextract

jobs:
test_and_publish:
runs-on: ubuntu-20.04
steps:
- name: Checkout
uses: actions/checkout@v2
with:
fetch-depth: 0

- name: Install dependencies
run: sudo apt-get update && sudo apt-get install poppler-utils

- name: Set up Python 3.8
uses: actions/setup-python@v2
with:
python-version: 3.8

- name: Install python dependencies
run: |
python -m pip install --upgrade pip
pip install setuptools wheel
pip install -e .[docs,tests]
- name: Show python dependencies
run: |
pip freeze
- name: Run tests
run: |
./run-tests.sh
- name: Build package
if: ${{ success() && github.event_name == 'push' }}
run: |
python setup.py sdist bdist_wheel
push_to_registry_and_deploy:
runs-on: ubuntu-20.04
needs: [test_and_publish]
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
steps:
- name: Checkout
uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Pre-commit check
uses: pre-commit/[email protected]
- name: Build Image
id: build
uses: cern-sis/gh-workflows/.github/actions/docker-build@v6
with:
registry: ${{ env.REGISTRY }}
image: ${{ env.IMAGE }}
cache: false
username: ${{ secrets.HARBOR_USERNAME }}
password: ${{ secrets.HARBOR_PASSWORD }}

- name: Deploy QA
uses: cern-sis/gh-workflows/.github/actions/kubernetes-project-new-images@v6
with:
event-type: update
images: ${{ env.REGISTRY }}/${{ env.IMAGE }}@${{ steps.build.outputs.image-digest }}
token: ${{ secrets.PAT_FIRE_EVENTS_ON_CERN_SIS_KUBERNETES }}
lint_and_test:
runs-on: ubuntu-latest
steps:
- name: Checkout Code
uses: actions/checkout@v4
with:
ref: ${{ github.ref }}
- name: Lint - Pre-commit check
uses: pre-commit/[email protected]
- name: Prep Build
uses: docker/setup-buildx-action@v3
- name: Build Docker image
run: docker build --target refextract-tests -t refextract .
- name: Run tests
run: >
docker run
--entrypoint poetry
refextract
run pytest
push_and_deploy:
runs-on: ubuntu-latest
needs: [lint_and_test]
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
steps:
- name: Build Image
id: build
uses: cern-sis/gh-workflows/.github/actions/docker-build@v6
with:
registry: ${{ env.REGISTRY }}
stage: refextract
image: ${{ env.IMAGE }}
cache: false
username: ${{ secrets.HARBOR_USERNAME }}
password: ${{ secrets.HARBOR_PASSWORD }}
- name: Deploy QA
uses: cern-sis/gh-workflows/.github/actions/kubernetes-project-new-images@v6
with:
event-type: update
images: ${{ env.REGISTRY }}/${{ env.IMAGE }}@${{ steps.build.outputs.image-digest }}
token: ${{ secrets.PAT_FIRE_EVENTS_ON_CERN_SIS_KUBERNETES }}
23 changes: 19 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,22 @@
FROM python:3.8
RUN apt update && apt install poppler-utils -y
COPY setup.py setup.cfg README.rst ./
FROM python:3.11.6-slim-bullseye AS refextract

ARG APP_HOME=/refextract
WORKDIR ${APP_HOME}

RUN apt update && apt install poppler-utils libmagic1 -y
COPY poetry.lock pyproject.toml README.rst ${APP_HOME}

RUN pip install --no-cache-dir poetry
RUN poetry config virtualenvs.create false \
&& poetry install --only main

COPY refextract refextract/
RUN python setup.py install


ENV PROMETHEUS_MULTIPROC_DIR='/tmp'
ENTRYPOINT exec gunicorn -b :5000 --access-logfile - --error-logfile - refextract.app:app --timeout 650

FROM refextract AS refextract-tests

COPY tests tests/
RUN poetry install
1,012 changes: 1,012 additions & 0 deletions poetry.lock

Large diffs are not rendered by default.

59 changes: 59 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
[tool.poetry]
name = "refextract"
version = "0.1.0"
description = "Small library for extracting references used in scholarly communication."
readme = "README.rst"
homepage = "https://github.com/inspirehep/refextract"
license = "GPL-2.0-or-later"
authors = [
"CERN <[email protected]>"
]
classifiers = [
"Development Status :: 4 - Beta",
"Environment :: Console",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: GNU General Public License v2 (GPLv2)",
"Operating System :: OS Independent",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Topic :: Scientific/Engineering :: Information Analysis",
"Topic :: Software Development :: Libraries",
"Topic :: Software Development :: Libraries :: Python Modules",
"Topic :: Utilities",
]


[tool.poetry.dependencies]
python = ">=3.11,<4"
PyPDF2 = ">=1.26.0,~=1.0"
six = ">=1.10.0,~=1.0"
unidecode = ">=1.0.22,~=1.0"
Flask = ">=2.0.3"
webargs = "<=5.4.0"
prometheus-flask-exporter = ">=0.20.1,~=0.20"
gunicorn = "^23.0.0"
python-magic = "^0.4.27"
inspire-utils = "^3.0.61"
requests = "^2.32.3"
urllib3 = "1.26.12"


[tool.poetry.group.dev.dependencies]
mock = "^5.1.0"
responses ="^0.10.15"
pytest = "^8.3.3"
pytest-cov = "^5.0.0"

[tool.coverage.run]
include = ["refextract/*.py"]

[tool.pytest.ini_options]
addopts = "--cov=refextract --cov-report=term-missing:skip-covered"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
3 changes: 2 additions & 1 deletion refextract/references/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,8 @@ def extract_references_from_file(path,
extracted_texkeys_urls = extract_texkeys_and_urls_from_pdf(path)
if len(extracted_texkeys_urls) == len(parsed_refs):
parsed_refs_updated = []
for ref, ref_texkey_urls in zip(parsed_refs, extracted_texkeys_urls):
for ref, ref_texkey_urls in zip(parsed_refs, extracted_texkeys_urls,
strict=False):
update_reference_with_urls(ref, ref_texkey_urls.get('urls', []))
if ref.get('url'):
ref['url'] = dedupe_list(ref['url'])
Expand Down
2 changes: 1 addition & 1 deletion ruff.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
target-version = "py38"
target-version = "py311"
[lint.flake8-tidy-imports]
ban-relative-imports = "all"

Expand Down
35 changes: 0 additions & 35 deletions setup.cfg

This file was deleted.

97 changes: 0 additions & 97 deletions setup.py

This file was deleted.

4 changes: 2 additions & 2 deletions tests/test_kbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def test_get_kbs_caches_journal_dict():
# the cache is reused, so identity of the cache elements doesn't change
assert all(
cached_first is cached_second for (cached_first, cached_second)
in zip(first_cache["journals"], second_cache["journals"])
in zip(first_cache["journals"], second_cache["journals"], strict=False)
)


Expand All @@ -55,7 +55,7 @@ def test_get_kbs_invalidates_cache_if_input_changes():
# the cache is invalidated, so identity of the cache elements changes
assert all(
cached_first is not cached_second for (cached_first, cached_second)
in zip(first_cache["journals"], second_cache["journals"])
in zip(first_cache["journals"], second_cache["journals"], strict=False)
)
assert len(second_cache["journals"]) == 3
assert second_cache["journals"][-1] == ["JOURNAL OF TESTING", "J TEST"]

0 comments on commit bc3b9b3

Please sign in to comment.