Skip to content

Commit

Permalink
Merge pull request #158 from DDMAL/update-language-list
Browse files Browse the repository at this point in the history
Update language list
  • Loading branch information
kunfang98927 authored Sep 24, 2024
2 parents 355d5fb + afb68b1 commit 47c3464
Show file tree
Hide file tree
Showing 4 changed files with 165 additions and 17 deletions.
Original file line number Diff line number Diff line change
@@ -1,29 +1,164 @@
"""This module imports possible languages for instrument names from Wikidata."""

import requests
from django.core.management.base import BaseCommand
from VIM.apps.instruments.models import Language

WIKIDATA_URL = "https://www.wikidata.org/w/api.php"


def get_languages_from_wikidata():
"""
Fetches the list of languages from Wikidata using the Wikidata API.
The API endpoint used is the `siteinfo` module with the `languages` parameter.
For more information, see:
https://www.wikidata.org/wiki/Special:ApiHelp/query%2Bsiteinfo
Example API request in the API sandbox:
https://www.wikidata.org/wiki/Special:ApiSandbox#action=query&format=json&prop=&list=&meta=siteinfo&formatversion=2&siprop=languages
Returns:
list: A list of dictionaries containing language information.
For example:
[
{
"code": "aa",
"bcp47": "aa",
"name": "Qafár af"
},
{
"code": "aae",
"bcp47": "aae",
"name": "Arbërisht"
},
...
]
"""

# Define the API endpoint and parameters to get the list of languages
params = {
"action": "query",
"format": "json",
"prop": "",
"list": "",
"meta": "siteinfo",
"formatversion": "2",
"siprop": "languages",
}

# Make the request to the Wikidata API
response = requests.get(WIKIDATA_URL, params=params, timeout=50)

# Check if the request was successful
if response.status_code == 200:
data = response.json()
# Extract the language list from the response
languages = data.get("query", {}).get("languages", [])
return languages
else:
print(f"Error: Failed to fetch data. Status code {response.status_code}")
return []


def get_language_details(language_codes):
"""
Fetches the details of the specified languages from Wikidata using the Wikidata API.
The API endpoint used is the `languageinfo` module with the `liprop` parameter.
For more information, see:
https://www.wikidata.org/w/api.php?action=help&modules=query%2Blanguageinfo
Example API request in the API sandbox:
https://www.wikidata.org/wiki/Special:ApiSandbox#action=query&format=json&prop=&list=&meta=languageinfo&formatversion=2&liprop=autonym%7Ccode%7Cname&licode=aa%7Caae
Args:
language_codes (list): A list of language codes for which details are to be fetched.
Returns:
dict: A dictionary containing language details with the language code as the key.
For example:
{
"aa": {
"code": "aa",
"autonym": "Qafár af",
"name": "Afar"
},
"aae": {
"code": "aae",
"autonym": "Arbërisht",
"name": "Arbëresh"
}
...
}
"""

# Define the API endpoint and parameters to get the language details
params = {
"action": "query",
"format": "json",
"prop": "",
"meta": "languageinfo",
"formatversion": "2",
"liprop": "code|autonym|name",
"licode": "|".join(language_codes),
}

# Make the request to the Wikidata API
response = requests.get(WIKIDATA_URL, params=params, timeout=50)

# Check if the request was successful
if response.status_code == 200:
data = response.json()
# Extract the language details from the response
language_details = data.get("query", {}).get("languageinfo", {})
return language_details
else:
print(f"Error: Failed to fetch data. Status code {response.status_code}")
return None


class Command(BaseCommand):
"""
The import_languages command populates the database with languages in which instrument
names can be provided in VIM.
NOTE: For now, this script only imports English and French.
names can be provided in UMIL. It fetches the language list from Wikidata, retrieves the
'wikidata_code', 'autonym', and 'en_label', and stores them in the database.
"""

help = "Imports possible languages for instrument names from Wikidata."

WIKIDATA_SPARQL_URL = "https://query.wikidata.org/sparql"

def handle(self, *args, **options):
Language.objects.create(
wikidata_code="fr",
wikidata_id="Q150",
en_label="french",
autonym="français",
# Fetch the list of languages
languages = get_languages_from_wikidata()
language_codes = [lang.get("code") for lang in languages]

self.stdout.write(
self.style.SUCCESS(
f"Successfully fetched {len(language_codes)} language codes."
)
)
Language.objects.create(
wikidata_code="en",
wikidata_id="Q1860",
en_label="english",
autonym="english",

# Fetch details for specific language codes, 50 at a time
for i in range(0, len(language_codes), 50):
language_batch = language_codes[i : i + 50]
language_details = get_language_details(language_batch)
if language_details:
for lang in language_details:
wikidata_code = language_details[lang]["code"]
en_label = language_details[lang]["name"]
autonym = language_details[lang]["autonym"]

Language.objects.update_or_create(
wikidata_code=wikidata_code,
defaults={"en_label": en_label, "autonym": autonym},
)

self.stdout.write(
self.style.SUCCESS(
f"Successfully imported {Language.objects.count()} languages."
)
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Generated by Django 4.2.5 on 2024-09-23 15:45

from django.db import migrations


class Migration(migrations.Migration):
dependencies = [
("instruments", "0004_merge_20240816_2008"),
]

operations = [
migrations.RemoveField(
model_name="language",
name="wikidata_id",
),
]
3 changes: 0 additions & 3 deletions web-app/django/VIM/apps/instruments/models/language.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,6 @@ class Language(models.Model):
wikidata_code = models.CharField(
unique=True, blank=False, help_text="Language code in Wikidata"
)
wikidata_id = models.CharField(
unique=True, blank=False, help_text="Language ID (Q number) in Wikidata"
)
en_label = models.CharField(blank=False, help_text="Language label in English")
autonym = models.CharField(
blank=False, help_text="Language label in the language itself"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def get_active_language_en_label(self) -> str:
language_en = self.request.GET.get("language")
if language_en:
return language_en
return self.request.session.get("active_language_en", "english")
return self.request.session.get("active_language_en", "English")

def get_context_data(self, **kwargs):
context = super().get_context_data(**kwargs)
Expand Down

0 comments on commit 47c3464

Please sign in to comment.