Skip to content

Commit

Permalink
Hability to download artist image from Wikimedia Commons
Browse files Browse the repository at this point in the history
Refs: #143
  • Loading branch information
orontee committed Jul 12, 2023
1 parent be14609 commit d780691
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 5 deletions.
4 changes: 2 additions & 2 deletions argos/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ def __init__(
self._image_dir = Path(xdg.BaseDirectory.save_cache_path("argos/images"))
self._ongoing_task: Optional[asyncio.Task[None]] = None

def get_image_filepath(self, image_uri: str) -> Optional[Path]:
if image_uri == "":
def get_image_filepath(self, image_uri: Optional[str]) -> Optional[Path]:
if image_uri is None or image_uri == "":
filename = None
elif image_uri.startswith("/local/"):
filename = Path(image_uri).parts[-1]
Expand Down
96 changes: 93 additions & 3 deletions argos/info.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import gettext
import hashlib
import logging
import urllib.parse
from enum import Enum
from typing import TYPE_CHECKING, Dict, List, Mapping, Optional, Sequence, Tuple
from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Sequence, Tuple

import aiohttp
from gi.repository import GLib, GObject
Expand All @@ -24,6 +25,7 @@

_MUSICBRAINZ_BASE_URL: str = "https://musicbrainz.org/ws/2/"
_WIKIDATA_BASE_URL: str = "https://www.wikidata.org/"
_WIKIMEDIA_BASE_URL: str = "https://upload.wikimedia.org/wikipedia/commons/"

_SOURCE_MENTION_TEMPLATE = _("Data source: {}")

Expand All @@ -39,6 +41,8 @@ def _get_wikipedia_base_urls(lang_key: str) -> List[str]:


class WikidataProperty(Enum):
Image = "P18"
# Signature = "P109"
MusicBrainzArtistID = "P434"
MusicBrainzReleaseGroupID = "P436"

Expand Down Expand Up @@ -89,13 +93,13 @@ async def _get_related_mbids(

return release_group_mbid, artist_mbids

async def _get_sitelinks_from_wikidata(
async def _search_wikidata_entity_by_mbid(
self,
session: aiohttp.ClientSession,
mbid: str,
*,
criteria: WikidataProperty,
) -> Optional[Dict[str, Dict[str, str]]]:
) -> Optional[Dict[str, Any]]:
if not mbid:
return None

Expand Down Expand Up @@ -128,9 +132,61 @@ async def _get_sitelinks_from_wikidata(

entities = parsed_resp.get("entities")
entity = entities.get(title) if entities is not None else None
return entity

async def _get_sitelinks_from_wikidata(
self,
session: aiohttp.ClientSession,
mbid: str,
*,
criteria: WikidataProperty,
) -> Optional[Dict[str, Dict[str, str]]]:
entity = await self._search_wikidata_entity_by_mbid(
session, mbid, criteria=criteria
)
sitelinks = entity.get("sitelinks") if entity is not None else None
return sitelinks

async def _get_image_url_from_wikidata(
self,
session: aiohttp.ClientSession,
mbid: str,
*,
criteria: WikidataProperty,
) -> Optional[str]:
entity = await self._search_wikidata_entity_by_mbid(
session, mbid, criteria=criteria
)
claims = entity.get("claims") if entity is not None else None
statements = (
claims.get(WikidataProperty.Image.value, []) if claims is not None else []
)
statement = statements[0] if len(statements) > 0 else {}
mainsnak = statement.get("mainsnak")
if mainsnak is None:
return None

if mainsnak.get("datatype") != "commonsMedia":
LOGGER.debug(f"Unsupported data type for artist MBID {mbid}")
return None

mainsnak_value = mainsnak.get("datavalue")
if mainsnak_value is None or mainsnak_value.get("type") != "string":
LOGGER.debug(f"Unexpected value type for artist MBID {mbid}")
return None

filename = mainsnak_value.get("value")
safe_filename = filename.replace(" ", "_")
m = hashlib.md5()
m.update(safe_filename.encode("utf-8"))
digest = m.hexdigest()
url = urllib.parse.urljoin(
_WIKIMEDIA_BASE_URL, f"{digest[0]}/{digest[0:2]}/{safe_filename}"
)
# See
# https://commons.wikimedia.org/wiki/Commons:FAQ#What_are_the_strangely_named_components_in_file_paths?
return url

def _build_preferred_abstract_url(
self, sitelinks: Mapping[str, Mapping[str, str]]
) -> Optional[str]:
Expand Down Expand Up @@ -272,6 +328,21 @@ async def _get_artist_abstract(
joined_abstracts = "\n\n".join(raw_abstracts)
return f"{joined_abstracts}\n\n{self._source_with_markup}"

async def _get_artist_image_url(
self,
session: aiohttp.ClientSession,
artist_mbid: str,
) -> Optional[str]:
if not artist_mbid:
return None

url = await self._get_image_url_from_wikidata(
session,
artist_mbid,
criteria=WikidataProperty.MusicBrainzArtistID,
)
return url

async def get_album_information(
self, release_mbid: str
) -> Tuple[Optional[str], Optional[str]]:
Expand Down Expand Up @@ -355,3 +426,22 @@ async def get_artist_information(self, artist_mbid: str) -> Optional[str]:
)

return artist_abstract

async def get_artist_image_url(self, artist_mbid: str) -> Optional[str]:
if not artist_mbid:
return None

artist_image_url = None
async with self._http_session_manager.get_session() as session:
try:
artist_image_url = await self._get_artist_image_url(
session, artist_mbid
)
except aiohttp.ClientError as err:
LOGGER.error(
f"Failed to request image for artist MBID {artist_mbid}, {err}"
)

LOGGER.debug(f"URL for artist image {artist_image_url}")

return artist_image_url

0 comments on commit d780691

Please sign in to comment.