Skip to content

Commit

Permalink
1603_3_12.py (#12, #9): SPARQL break chunks of 2 of 1603:1:51 (langua…
Browse files Browse the repository at this point in the history
…ges); avoid timeout; @todo need actually get the second page
  • Loading branch information
fititnt committed Feb 10, 2022
1 parent 943401b commit f72f864
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 28 deletions.
68 changes: 43 additions & 25 deletions officinam/999999999/0/1603_3_12.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
# Type,
Union
)

import math
import urllib.parse
import requests

Expand Down Expand Up @@ -202,9 +202,9 @@ def _query_linguam(self):
return resultatum

def _query_linguam_limit(self, langpair_full: list):
resultatum = []
# resultatum = []

if len(langpair_full) <= self.linguae_limitibus:
if self.lingua_divisioni < 2:
return langpair_full

# @see https://stackoverflow.com/questions/312443
Expand All @@ -215,11 +215,22 @@ def _query_linguam_limit(self, langpair_full: list):
# yield lst[i:i + n]
# if langpair_full

limited = [langpair_full[i:i + self.linguae_limitibus]
for i in range(
0, len(langpair_full), self.linguae_limitibus)]
# def chunks(lst, n):
# """Yield successive n-sized chunks from lst."""
# for i in range(0, len(lst), n):
# yield lst[i:i + n]
# import math

limited_group = limited[self.linguae_paginarum_limitibus - 1]
divisio_numero = math.ceil(len(langpair_full) / self.lingua_divisioni)

def chunks(l, n):
n = max(1, n)
return (l[i:i+n] for i in range(0, len(l), n))

# limited = list(chunks(langpair_full, self.lingua_divisioni))
limited = list(chunks(langpair_full, divisio_numero))

limited_group = limited[self.lingua_paginae - 1]
# limited = chunks(langpair_full, self.linguae_limitibus)
# raise ValueError(limited_group)
# raise ValueError([limited_group, limited])
Expand All @@ -232,14 +243,14 @@ def est_resultatum_separato(self, resultatum_separato: str):
self.resultatum_separato = resultatum_separato
return self

def est_linguae_paginarum_limitibus(
self, linguae_paginarum_limitibus: Union[str, int]):
self.linguae_paginarum_limitibus = int(linguae_paginarum_limitibus)
def est_lingua_divisioni(
self, lingua_divisioni: Union[str, int]):
self.lingua_divisioni = int(lingua_divisioni)
return self

def est_linguae_limitibus(
self, linguae_limitibus: Union[str, int]):
self.linguae_limitibus = int(linguae_limitibus)
def est_lingua_paginae(
self, lingua_paginae: Union[str, int]):
self.lingua_paginae = int(lingua_paginae)
return self

def est_wikidata_q(self, wikidata_codicem: str):
Expand Down Expand Up @@ -438,23 +449,30 @@ def make_args(self, hxl_output=True):

# linguae, f, pl, (Nominative) https://en.wiktionary.org/wiki/lingua
# pāginārum, f, pl, (Gengitive) https://en.wiktionary.org/wiki/pagina
# dīvīsiōnibus, f, pl, (Dative) https://en.wiktionary.org/wiki/divisio
# līmitibus, m, pl, (Dative) https://en.wiktionary.org/wiki/limes#Latin
# //linguae pāginārum līmitibus//

# lingua, f, s, (Nominative) https://en.wiktionary.org/wiki/lingua#Latin
# pāginae, f, s, (Dative) https://en.wiktionary.org/wiki/lingua#Latin
# dīvīsiōnī, f, s, (Dative) https://en.wiktionary.org/wiki/lingua#Latin
neo_codex.add_argument(
'--linguae-limitibus',
help='Number of languages of [1603:1:51] to limit on a query. ' +
'Default: 1000',
dest='linguae_limitibus',
'--lingua-divisioni',
help='For the languages on [1603:1:51], how many divisions ' +
'(or number of chunks) should be done. 1 means no division.' +
'If using more than 1, use --lingua-paginae do paginate the ' +
'Options. Default: 1',
dest='lingua_divisioni',
metavar='',
default="1000",
default="1",
nargs='?'
)

neo_codex.add_argument(
'--linguae-paginarum-limitibus',
help='If using --linguae-limitibus, which pagination of languages '
'return. Starts with 1. Default: 1',
dest='linguae_paginarum_limitibus',
'--lingua-paginae',
help='If --lingua-divisioni different from 1, defines which page '
'of languages to return. Default 1.',
dest='lingua_paginae',
metavar='',
default="1",
nargs='?'
Expand Down Expand Up @@ -493,9 +511,9 @@ def execute_cli(self, pyargs, stdin=STDIN, stdout=sys.stdout,

# cs1603_3_12.est_verbum_limiti(args.verbum_limiti)
cs1603_3_12.est_resultatum_separato(args.resultatum_separato)
cs1603_3_12.est_linguae_limitibus(args.linguae_limitibus)
cs1603_3_12.est_linguae_paginarum_limitibus(
args.linguae_paginarum_limitibus)
cs1603_3_12.est_lingua_divisioni(args.lingua_divisioni)
cs1603_3_12.est_lingua_paginae(
args.lingua_paginae)

if self.pyargs.actionem_sparql:
# print('oi')
Expand Down
3 changes: 1 addition & 2 deletions officinam/999999999/1603_0_1603.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ PURGATORIA_EXTENSIONEM=( "no1.tm.hxl.csv" "wikiq.tm.hxl.csv" "no11.tm.hxl.csv" "
# PURGATORIA_CONCEPTUM+=( "1603_1_1" )
# PURGATORIA_CONCEPTUM+=( "1603_1_6" )
# PURGATORIA_CONCEPTUM+=( "1603_1_7" )
PURGATORIA_CONCEPTUM+=( "1603_1_51" )
# PURGATORIA_CONCEPTUM+=( "1603_1_51" )
# PURGATORIA_CONCEPTUM+=( "1603_1_101" )

# PURGATORIA_CONCEPTUM+=( "1603_3_12_6" )
Expand All @@ -49,7 +49,6 @@ DE_FACTO="${DE_FACTO:-'0'}"
# DRYRUM="0"



#######################################
# numerordiatio_caput extract from no1.tm.hxl.csv some quick metadata.
# Mostly focused on patters of the headings.
Expand Down
1 change: 1 addition & 0 deletions officinam/999999999/1603_17.sh
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ ROOTDIR="$(pwd)"
# neo_codex_de_numerordinatio_pdf "1603_45_1" "0" "0"

# ./999999999/0/1603_3_12.py --actionem-sparql --query < 999999/0/1603_45_1.uniq.q.txt
# ./999999999/0/1603_3_12.py --actionem-sparql --query --lingua-divisioni=3 --lingua-paginae=3 < 999999/0/1603_45_1.uniq.q.txt

# ./999999999/0/1603_1.py --codex-de 1603_1_7 > 1603/1/7/1603_1_7.mul-Latn.codex.adoc
# ./999999999/0/1603_1.py --codex-de 1603_1_7 > 1603/1/7/1603_1_7.mul-Latn.codex.adoc ; bundle exec asciidoctor-pdf 1603/1/7/1603_1_7.mul-Latn.codex.adoc --out-file 1603/1/7/1603_1_7.mul-Latn.codex.pdf
Expand Down
6 changes: 5 additions & 1 deletion officinam/999999999/999999999.lib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,11 @@ file_translate_csv_de_numerordinatio_q() {
# sort --version-sort --field-separator="Q" < "$objectivum_archivum_temporarium_b" > "$objectivum_archivum_temporarium_b_u"
sort --version-sort --field-separator="Q" <"$objectivum_archivum_temporarium_b" | uniq >"$objectivum_archivum_temporarium_b_u"

"${ROOTDIR}/999999999/0/1603_3_12.py" --actionem-sparql --query <"$objectivum_archivum_temporarium_b_u" |
"${ROOTDIR}/999999999/0/1603_3_12.py" \
--actionem-sparql \
--lingua-divisioni=2 \
--lingua-paginae=1 \
--query <"$objectivum_archivum_temporarium_b_u" |
./999999999/0/1603_3_12.py --actionem-sparql --csv --hxltm \
>"$objectivum_archivum_temporarium_b_u_wiki"
# "$objectivum_archivum_temporarium_b_u"
Expand Down

0 comments on commit f72f864

Please sign in to comment.