Skip to content

Commit

Permalink
1603:1:51 (#9): HXLTM export on "
Browse files Browse the repository at this point in the history
  • Loading branch information
fititnt committed Jan 23, 2022
1 parent 769e5fb commit 8dc806d
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 8 deletions.
75 changes: 71 additions & 4 deletions officinam/999999999/0/1603_3_12.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
# printf "Q1065\nQ82151\n" | ./999999999/0/1603_3_12.py --actionem-sparql --query | ./999999999/0/1603_3_12.py --actionem-sparql --wikidata-link
# printf "Q1065\nQ82151\n" | ./999999999/0/1603_3_12.py --actionem-sparql --query | ./999999999/0/1603_3_12.py --actionem-sparql --tsv > 999999/0/test.tsv
# printf "Q1065\nQ82151\n" | ./999999999/0/1603_3_12.py --actionem-sparql --query | ./999999999/0/1603_3_12.py --actionem-sparql --csv > 999999/0/test.csv
# printf "Q1065\nQ82151\n" | ./999999999/0/1603_3_12.py --actionem-sparql --query | ./999999999/0/1603_3_12.py --actionem-sparql --csv --hxltm

# TODO: https://sinaahmadi.github.io/posts/10-essential-sparql-queries-for-lexicographical-data-on-wikidata.html

Expand Down Expand Up @@ -92,7 +93,32 @@
# SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
# }


def hxltm_hastag_de_csvhxlated(csv_caput: list) -> list:
"""hxltm_hastag_de_csvhxlated [summary]
Make this type of conversion:
- 'item__conceptum__codicem' => '#item+conceptum+codicem'
- 'item__rem__i_ara__is_arab' => '#item+rem+i_ara+is_arab'
- '' => ''
Args:
csv_caput (list): Array of input items
Returns:
[list]:
"""
resultatum = []
for item in csv_caput:
if len(item):
resultatum.append('#' + item.replace('__', '+').replace('?', ''))
else:
resultatum.append('')
return resultatum

# https://stackoverflow.com/questions/43258341/how-to-get-wikidata-labels-in-more-than-one-language


class CS1603z3z12:
""" [summary]
Expand Down Expand Up @@ -194,12 +220,11 @@ def est_wikidata_q(self, wikidata_codicem: str):
# }
# }


def query(self):
qid = ['wd:' + x for x in self.qid if isinstance(x, str)]
# select = '?item ' + " ".join(self._query_linguam())

select = ['?item']
select = ['(?item AS ?item__conceptum__codicem)']
filter_otional = []
for pair in self.D1613_1_51_langpair:
select.append('?' + pair[1])
Expand Down Expand Up @@ -325,6 +350,7 @@ def make_args(self, hxl_output=True):
const=True,
nargs='?'
)

neo_codex.add_argument(
'--tsv',
help='Generate TSV output (from piped in query)',
Expand All @@ -334,6 +360,17 @@ def make_args(self, hxl_output=True):
nargs='?'
)

neo_codex.add_argument(
'--hxltm',
help='Generate HXL-tagged output (from piped in query). ' +
'Concepts use #item+conceptum+codicem instead ' +
'of #item+code+v_wiki_q',
metavar='',
dest='hxltm',
const=True,
nargs='?'
)

# neo_codex.add_argument(
# '--actionem-verbum-simplex',
# help='Do not generate the codes. Just calculate the full matrix ' +
Expand Down Expand Up @@ -624,16 +661,46 @@ def execute_cli(self, pyargs, stdin=STDIN, stdout=sys.stdout,
# https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual/en#Supported_formats

if self.pyargs.tsv:
separator = "\t"
headers = {'Accept': 'text/tab-separated-values'}
if self.pyargs.csv:
separator = ","
headers = {'Accept': 'text/csv'}
if self.pyargs.hxltm:
# headers = {'Accept': 'text/tab-separated-values'}
headers = {'Accept': 'text/csv'}

payload_query = "".join(full_query)
r = requests.get(sparql_backend, headers=headers, params={
r = requests.post(sparql_backend, headers=headers, data={
'query': payload_query
})

print(r.text.strip())
# @TODO: --tsv --hxltm is know to be bugged (not sure if
# Wikidata result already skip values)

if self.pyargs.hxltm:
result_string = r.text.strip()

# @TODO: this likely to break with fields with newlines.
# however no testing sample exists at the moment.
# Eventually needs be checked.
lines = result_string.splitlines()
# caput = hxltm_hastag_de_csvhxlated(next(iter(lines)).split(","))
caput_crudum = lines.pop(0)
# print('caput_crudum', caput_crudum)
caput = hxltm_hastag_de_csvhxlated(caput_crudum.split(','))
print(separator.join(caput))
print("\n".join(lines))

# reader = csv.reader(lines, delimiter="\t")
# caput = hxltm_hastag_de_csvhxlated(next(reader))
# print(separator.join(caput))
# for row in reader:
# print(separator.join(row))
else:
print(r.text.strip())

# TODO: generate explicit error messages and return code
# print(r.content)
return self.EXIT_OK

Expand Down
1 change: 1 addition & 0 deletions officinam/999999999/1603_3_1603_45_1.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#
# USAGE: ./999999999/1603_3_1603_45_1.sh
# time ./999999999/1603_3_1603_45_1.sh
# time FORCE_CHANGED=1 ./999999999/1603_3_1603_45_1.sh
#
# DESCRIPTION: ---
#
Expand Down
9 changes: 5 additions & 4 deletions officinam/999999999/999999999.lib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ file_update_if_necessary() {
csv)
is_valid=$(csvclean --dry-run "$fontem_archivum")
if [ "$is_valid" != "No errors." ]; then
echo "$fontem_archivum"
echo "$is_valid"
return 1
fi
Expand Down Expand Up @@ -393,17 +394,17 @@ file_translate_csv_de_numerordinatio_q() {
fi

fontem_archivum="${_basim_fontem}/$_path/$_nomen.no1.tm.hxl.csv"
objectivum_archivum="${_basim_objectivum}/$_path/$_nomen.wikiq.tm.csv"
objectivum_archivum="${_basim_objectivum}/$_path/$_nomen.wikiq.tm.hxl.csv"
objectivum_archivum_temporarium="${ROOTDIR}/999999/0/$_nomen.no1.tm.hxl.csv"
objectivum_archivum_temporarium_b="${ROOTDIR}/999999/0/$_nomen.q.txt"
objectivum_archivum_temporarium_b_u="${ROOTDIR}/999999/0/$_nomen.uniq.q.txt"
objectivum_archivum_temporarium_b_u_wiki="${ROOTDIR}/999999/0/$_nomen.wikiq.tm.csv"
objectivum_archivum_temporarium_b_u_wiki="${ROOTDIR}/999999/0/$_nomen.wikiq.tm.hxl.csv"

# if [ -z "$(changed_recently "$fontem_archivum")" ]; then return 0; fi

# echo "${FUNCNAME[0]} sources changed_recently. Reloading..."

if [ -z "$(stale_archive "$objectivum_archivum")" ]; then return 0; fi
# if [ -z "$(stale_archive "$objectivum_archivum")" ]; then return 0; fi

echo "${FUNCNAME[0]} stale data on [$objectivum_archivum], refreshing..."

Expand Down Expand Up @@ -445,7 +446,7 @@ file_translate_csv_de_numerordinatio_q() {
sort --version-sort --field-separator="Q" <"$objectivum_archivum_temporarium_b" | uniq >"$objectivum_archivum_temporarium_b_u"

"${ROOTDIR}/999999999/0/1603_3_12.py" --actionem-sparql --query <"$objectivum_archivum_temporarium_b_u" |
./999999999/0/1603_3_12.py --actionem-sparql --csv \
./999999999/0/1603_3_12.py --actionem-sparql --csv --hxltm \
>"$objectivum_archivum_temporarium_b_u_wiki"
# "$objectivum_archivum_temporarium_b_u"

Expand Down

0 comments on commit 8dc806d

Please sign in to comment.