Skip to content

Commit

Permalink
feat: make category a property of a work, not a publication (refs #7)
Browse files Browse the repository at this point in the history
also attempt to infer work category from publications it is in
  • Loading branch information
kevinstadler committed Sep 23, 2024
1 parent 40453a4 commit d7332cb
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 15 deletions.
10 changes: 9 additions & 1 deletion app/publication/[id]/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,15 @@ export default async function PublicationPage(props: PublicationPageProps) {
return (
<MainContent className="">
<h1 className="font-bold">{pub.title}</h1>
<h2 className="italic">{pub.categories.join(" / ")}</h2>
<h2 className="italic">
{Array.from(
new Set(
pub.contains.flatMap((t) => {
return t.work.category;
}),
),
).join(" / ")}
</h2>
<div className="flex gap-8">
<PublicationCover className="h-96 grow basis-1/3" publication={pub} />
<div className="grow-[2] basis-2/3">
Expand Down
2 changes: 2 additions & 0 deletions app/search/instantsearch.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ const typesenseInstantsearchAdapter = new TypesenseInstantSearchAdapter({
const searchClient = typesenseInstantsearchAdapter.searchClient as unknown as SearchClient;

const queryArgToRefinementField = {
// the order of elements here determines the order of refinement lists in the UI
category: "contains.work.category" as const,
language: "language" as const,
work: "contains.work.title" as const,
translator: "contains.translators.name" as const,
Expand Down
2 changes: 1 addition & 1 deletion app/works/[category]/[work]/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ export default function WorksPage(props: WorksPageProps) {
facetingValue={props.params?.work ? decodeURI(props.params.work) : undefined}
filter_by={
// eslint-disable-next-line @typescript-eslint/no-explicit-any
`categories := ${catt(props.params?.category as any)}`
`contains.work.category := ${catt(props.params?.category as any)}`
}
// eslint-disable-next-line @typescript-eslint/restrict-template-expressions
path={`works/${props.params?.category}`}
Expand Down
3 changes: 2 additions & 1 deletion lib/model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ export interface Publication {
title: string;
language: string;
contains: Array<Translation>;
categories: Array<Category>;
// categories: Array<Category>;

// from openrefine: whether this publication contains at least one previously unpublished
// translation
Expand Down Expand Up @@ -46,6 +46,7 @@ export interface BernhardWork {
title: string; // german/french original
gnd?: string;
year?: number; // we get the years from gnd-lookup, so no gnd => no year info
category?: Category;
}

export interface Translator {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,18 @@
"stem": false
},
{
"name": "categories",
"name": "year_display",
"type": "string",
"facet": false,
"optional": false,
"index": true,
"sort": false,
"infix": false,
"locale": "",
"stem": false
},
{
"name": "contains.work.category",
"type": "string[]",
"facet": true,
"optional": false,
Expand Down Expand Up @@ -79,7 +90,7 @@
"stem": false
}
],
"default_sorting_field": "",
"default_sorting_field": "year",
"enable_nested_fields": true,
"symbols_to_index": [],
"token_separators": []
Expand Down
47 changes: 37 additions & 10 deletions scripts/tsv-to-json.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ def filter(self, record):
def orig(i):
return f"contains orig. {i}"

def getcategories(pub):
return [c for c in (pub['category 1'].split(' \\ ') + pub['category 2'].split(' \\ ')) if len(c) and c != 'prose']

# herausgabejahr des originalwerks (lookup über lobid.org GND-Datenbank)
def getyear(gnd):
fn = f'gnd/{gnd}.json'
Expand Down Expand Up @@ -95,12 +98,13 @@ def workkey(pub, i):
# used in 2nd pass as a sanity check
pub['origworks'] = []
pub['Signatur'] = pub['Signatur'].strip()
pub_categories = getcategories(pub)

hadBlank = False
for i in range(1, 41):
bwkey = workkey(pub, i)
if bwkey:
origt = pub[orig(i)].strip(' 12345') # in chi_kurz_007 only
origt = pub[orig(i)].strip(' 12345').replace('\n', ' ') # in chi_kurz_007 only
# store for 2nd pass
pub['origworks'].append(origt)

Expand All @@ -112,9 +116,18 @@ def workkey(pub, i):
# did we already see this work? -- use title+gnd as unique id (graphic novels with same title..)
if bwkey in bernhardworks:
bernhardworks[bwkey]['count'] = bernhardworks[bwkey]['count'] + 1
if len(pub_categories) < len(bernhardworks[bwkey]['category']):
for c in pub_categories:
if not c in bernhardworks[bwkey]['category']:
logger.warning(f'could be {c} which it was previously not')
bernhardworks[bwkey]['category'] = pub_categories
elif len(pub_categories) == 1 and len(bernhardworks[bwkey]['category']) == 1 and pub_categories != bernhardworks[bwkey]['category']:
# logger.error(f'{pub["Signatur"]}: unique publication category implies that all works inside it have category "{unique_work_category}", but the following work was already found in a publication with a different unique category: {bernhardworks[bwkey]}')
print(f'''1. *{bernhardworks[bwkey]['title']}*: ist in [{pub["Signatur"]}](https://thomas-bernhard-global.acdh-ch-dev.oeaw.ac.at/publication/{pub["Signatur"]}) das als `{pub_categories[0]}` kategorisiert ist, in anderen Publikationen in denen es enthalten ist sind dagegen `{bernhardworks[bwkey]["category"][0]}`
- [ ] wahrscheinlicher Fix: `{pub["Signatur"]}`'s Kategorie von `{pub_categories[0]}` auf `{bernhardworks[bwkey]["category"][0]}` ändern''')
else:
# new work, write even if we don't know the gnd
bernhardworks[bwkey] = { 'id': str(len(bernhardworks)+1), 'gnd': gnd, 'title': origt, 'year': getyear(gnd) if gnd else None, 'count': 1 }
bernhardworks[bwkey] = { 'id': str(len(bernhardworks)+1), 'gnd': gnd, 'title': origt, 'category': pub_categories, 'year': getyear(gnd) if gnd else None, 'count': 1 }

else:
hadBlank = True
Expand All @@ -140,6 +153,12 @@ def workkey(pub, i):
'gnd': pub[f'{translatorkey} GND'] or None,
# 'wikidata': None
}
for k, v in bernhardworks.items():
if len(v['category']) == 1:
v['category'] = v['category'][0]
else:
print(f"{v['title']} ({v['gnd']}, {v['count']})")
# TODO Brief, Telegramm, Stellungnahme

translations = {}
nrepublications = 0
Expand Down Expand Up @@ -174,7 +193,7 @@ def workkey(pub, i):
'work': work,
# 'work': work['id'],
'translators': worktranslators, #[ t['id'] for t in worktranslators ],
'title': t
'title': t.replace('\n', ' ')
}
worktranslatornames = '+'.join([ t['name'] for t in worktranslators ])
translationkey = work['title'] + worktranslatornames
Expand All @@ -183,7 +202,7 @@ def workkey(pub, i):
nrepublications = nrepublications + 1
newt['id'] = translations[translationkey]['id']
if translations[translationkey] != newt:
logger.warning(f"{pub['Signatur']}: {worktranslatornames}'s translation of '{work['title']}' (GND: {work['gnd']}) was previously published as '{translations[translationkey]['title']}', now found translation titled '{newt['title']}'")
logger.info(f"{pub['Signatur']}: {worktranslatornames}'s translation of '{work['title']}' (GND: {work['gnd']}) was previously published as '{translations[translationkey]['title']}', now found translation titled '{newt['title']}'")
else:
newt['id'] = str(len(translations)+1)
translations[translationkey] = newt
Expand All @@ -196,11 +215,9 @@ def workkey(pub, i):

eltern = [ el.strip() for el in pub['Eltern'].split(' \\ ')] if pub['Eltern'] else None
try:
year = int(pub['year'])
int(pub['year'])
except ValueError:
logger.error(f"{pub['Signatur']} does not have a numeric year ('{pub['year']}')")
# FIXME force
year = int(pub['year'][0:4])
logger.warning(f"{pub['Signatur']} does not have a numeric year ('{pub['year']}')")

assets = [ { 'id': pub['Signatur']} ] if os.path.isfile(f'../public/covers/{pub["Signatur"]}.jpg') else []
if len(pub['more']):
Expand All @@ -213,17 +230,27 @@ def workkey(pub, i):
'later': [],
'more': pub['more'].split(', ') if pub['more'] else None, # TODO
'title': pub['title'],
'year': year,
'year': int(pub['year'][0:4]),
'year_display': pub['year'],
'language': pub['language'],
'contains': ts,
'publisher': publishers[pub['publisher / publication']],
'categories': [c for c in [c for c in pub['category 1'].split(' \\ ')] + [c for c in pub['category 2'].split(' \\ ')] if len(c) and c != 'prose'],
# 'categories': [c for c in [c for c in pub['category 1'].split(' \\ ')] + [c for c in pub['category 2'].split(' \\ ')] if len(c) and c != 'prose'],
'isbn': pub['ISBN'] or None,
'exemplar_suhrkamp_berlin': pub['Exemplar Suhrkamp Berlin (03/2023)'].lower() == 'x',
'exemplar_oeaw': pub['Exemplar ÖAW'].lower() == 'x',
'images': assets
}

categories = ['autobiography', 'novels', 'novellas & short prose', 'adaptations', 'poetry', 'drama & libretti', 'letters, speeches, interviews']
# for p in publications.values():
# if len(p['categories']) == 0:
# logger.warning(f'{p["id"]} has no categories')
# for c in p['categories']:
# if not c in categories:
# logger.warning(f'unknown category: {c}')


# redundantly store children ids in parent
for pub in publications.values():
if pub['parents']:
Expand Down

0 comments on commit d7332cb

Please sign in to comment.