Skip to content

Commit

Permalink
Use py-rattler to fetch repodata in proxy mode
Browse files Browse the repository at this point in the history
rattler is very efficient to download repodata
  • Loading branch information
beenje committed Dec 1, 2023
1 parent 0b49467 commit 8921261
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 6 deletions.
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,5 +52,6 @@ dependencies:
- pytest-asyncio
- pytest-timeout
- pydantic >=2
- py-rattler
- pip:
- git+https://github.com/jupyter-server/jupyter_releaser.git@v2
1 change: 1 addition & 0 deletions quetz/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class Config:
ConfigEntry("package_unpack_threads", int, 1),
ConfigEntry("frontend_dir", str, default=""),
ConfigEntry("redirect_http_to_https", bool, False),
ConfigEntry("rattler_cache_dir", str, default="rattler_cache"),
],
),
ConfigSection(
Expand Down
51 changes: 45 additions & 6 deletions quetz/tasks/mirror.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
import asyncio
import contextlib
import json
import logging
import os
import shutil
from concurrent.futures import ThreadPoolExecutor
from http.client import IncompleteRead
from pathlib import Path, PurePath
from tempfile import SpooledTemporaryFile
from typing import List

import requests
from fastapi import HTTPException, status
from rattler import Channel, ChannelConfig, Platform, fetch_repo_data
from tenacity import TryAgain, retry
from tenacity.after import after_log
from tenacity.stop import stop_after_attempt
Expand Down Expand Up @@ -59,6 +62,11 @@ def __init__(self, host, session):
def open(self, path):
return RemoteFile(self.host, path, self.session)

@property
def rattler_channel(self):
host_path = PurePath(self.host)
return Channel(host_path.name, ChannelConfig(str(host_path.parent)))


class RemoteServerError(Exception):
pass
Expand Down Expand Up @@ -106,6 +114,32 @@ def json(self):
return json.load(self.file)


def download_repodata(repository: RemoteRepository, channel: str, platform: str):
cache_path = Path(Config().general_rattler_cache_dir) / channel / platform
logger.debug(f"Fetching {platform} repodata from {repository.rattler_channel}")
try:
asyncio.run(
fetch_repo_data(
channels=[repository.rattler_channel],
platforms=[Platform(platform)],
cache_path=cache_path,
callback=None,
)
)
except Exception as e:
logger.error(f"Failed to fetch repodata: {e}")
raise
try:
json_file = list(cache_path.glob("*.json"))[0]
except IndexError:
logger.error(f"No json file found in rattler cache: {cache_path}")
raise RemoteFileNotFound
else:
with open(json_file, mode="rb") as f:
contents = f.read()
return contents


def download_remote_file(
repository: RemoteRepository, pkgstore: PackageStore, channel: str, path: str
):
Expand All @@ -122,13 +156,18 @@ def download_remote_file(
# Acquire a lock to prevent multiple concurrent downloads of the same file
with pkgstore.create_download_lock(channel, path):
logger.debug(f"Downloading {path} from {channel} to pkgstore")
remote_file = repository.open(path)
data_stream = remote_file.file

if path.endswith('.json'):
add_static_file(data_stream.read(), channel, None, path, pkgstore)
if path.endswith("/repodata.json"):
platform = str(PurePath(path).parent)
repodata = download_repodata(repository, channel, platform)
add_static_file(repodata, channel, None, path, pkgstore)
else:
pkgstore.add_package(data_stream, channel, path)
remote_file = repository.open(path)
data_stream = remote_file.file

if path.endswith('.json'):
add_static_file(data_stream.read(), channel, None, path, pkgstore)
else:
pkgstore.add_package(data_stream, channel, path)

pkgstore.delete_download_lock(channel, path)

Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ install_requires =
python-multipart
pydantic>=2.0.0
pyyaml
py-rattler
requests
sqlalchemy
sqlalchemy-utils
Expand Down

0 comments on commit 8921261

Please sign in to comment.