-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
1603_3_12.py (#40): 999999_1679.sh bash script glue 3 out 20 pages (n…
…ot function yet)
- Loading branch information
Showing
2 changed files
with
149 additions
and
36 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,34 +30,13 @@ | |
# pytest | ||
# python3 -m doctest ./999999999/0/1603_3_12.py | ||
|
||
# ./999999999/0/1603_3_12.py | ||
# NUMERORDINATIO_BASIM="/external/ndata" ./999999999/0/1603_3_12.py | ||
# printf "Q1065\nQ82151\n" | ./999999999/0/1603_3_12.py --actionem-sparql --query | ||
# printf "Q1065\nQ82151\n" | ./999999999/0/1603_3_12.py --actionem-sparql --query | ./999999999/0/1603_3_12.py --actionem-sparql --wikidata-link | ||
# printf "Q1065\nQ82151\n" | ./999999999/0/1603_3_12.py --actionem-sparql --query | ./999999999/0/1603_3_12.py --actionem-sparql --tsv > 999999/0/test.tsv | ||
# printf "Q1065\nQ82151\n" | ./999999999/0/1603_3_12.py --actionem-sparql --query | ./999999999/0/1603_3_12.py --actionem-sparql --csv > 999999/0/test.csv | ||
# printf "Q1065\nQ82151\n" | ./999999999/0/1603_3_12.py --actionem-sparql --query | ./999999999/0/1603_3_12.py --actionem-sparql --csv --hxltm | ||
|
||
# 1603_25_1 query | ||
# printf "Q3409626\nQ41055\nQ3321315\nQ160695\nQ9645\nQ9597\nQ713102\nQ133279\n" | ./999999999/0/1603_3_12.py --actionem-sparql --query | ||
|
||
|
||
# SELECT ?pic (STRAFTER(STR(?item), "entity/") AS ?item__conceptum__codicem) ?item__rem__i_lat__is_latn | ||
# WHERE | ||
# { | ||
# VALUES ?item { wd:Q3409626 wd:Q41055 wd:Q3321315 wd:Q160695 wd:Q9645 wd:Q9597 wd:Q713102 wd:Q133279 } | ||
# bind(xsd:integer(strafter(str(?item), 'Q')) as ?id_numeric) . | ||
# OPTIONAL { ?item wdt:P18 ?pic } | ||
# OPTIONAL { ?item rdfs:label ?item__rem__i_qcc__is_zxxx filter (lang(?item__rem__i_qcc__is_zxxx) = ""). } | ||
# OPTIONAL { ?item rdfs:label ?item__rem__i_lat__is_latn filter (lang(?item__rem__i_lat__is_latn) = "la"). } | ||
# } | ||
# ORDER BY ASC (?id_numeric) | ||
|
||
## Example with proxy | ||
# Example with proxy | ||
# export HTTP_PROXY="socks5://127.0.0.1:9050" | ||
# export HTTPS_PROXY="socks5://127.0.0.1:9050" | ||
|
||
# TODO: https://sinaahmadi.github.io/posts/10-essential-sparql-queries-for-lexicographical-data-on-wikidata.html | ||
# TODO: https://sinaahmadi.github.io/posts | ||
# /10-essential-sparql-queries-for-lexicographical-data-on-wikidata.html | ||
|
||
import os | ||
import sys | ||
|
@@ -112,6 +91,9 @@ | |
printf "P1585\\n" | {0} --actionem-sparql --de=P --query \ | ||
--ex-interlinguis | ||
printf "P1585\\n" | {0} --actionem-sparql --de=P --query \ | ||
--ex-interlinguis | ||
printf "P1585\\n" | {0} --actionem-sparql --de=P --query \ | ||
--lingua-divisioni=50 --lingua-paginae=1 | ||
------------------------------------------------------------------------------ | ||
|
@@ -124,7 +106,7 @@ | |
|
||
# @see https://meta.wikimedia.org/wiki/User-Agent_policy | ||
# @see https://www.mediawiki.org/wiki/API:Etiquette | ||
USER_AGENT="EticaAI-multilingual-lexicography/2022.3.9 (https://meta.wikimedia.org/wiki/User:EmericusPetro; [email protected]) 1603_3_12.py/0.1" | ||
USER_AGENT = "EticaAI-multilingual-lexicography/2022.3.9 (https://meta.wikimedia.org/wiki/User:EmericusPetro; [email protected]) 1603_3_12.py/0.1" | ||
|
||
# print('getcwd: ', os.getcwd()) | ||
# print('oi', NUMERORDINATIO_BASIM) | ||
|
@@ -311,8 +293,8 @@ def est_wikidata_p(self, wikidata_codicem: str): | |
|
||
return self | ||
|
||
def est_wikidata_p_interlinguis(self: str): | ||
self.ex_interlinguis = True | ||
def est_wikidata_p_interlinguis(self, statum: bool = True): | ||
self.ex_interlinguis = statum | ||
|
||
return self | ||
|
||
|
@@ -342,6 +324,7 @@ def est_wikidata_p_interlinguis(self: str): | |
# } | ||
# } | ||
|
||
|
||
def query_q(self): | ||
langpair_full = self._query_linguam() | ||
self.D1613_1_51_langpair = self._query_linguam_limit(langpair_full) | ||
|
@@ -392,7 +375,7 @@ def query_q(self): | |
# [TRY IT ↗]() | ||
return term | ||
|
||
## Teste atual | ||
# Teste atual | ||
# SELECT DISTINCT ?item ?itemLabel WHERE { | ||
# SERVICE wikibase:label { | ||
# bd:serviceParam wikibase:language "[AUTO_LANGUAGE]". | ||
|
@@ -403,7 +386,7 @@ def query_q(self): | |
# ?item p:P1585 ?statement0. | ||
# ?statement0 (ps:P1585) _:anyValueP1585. | ||
# #FILTER(EXISTS { ?statement0 prov:wasDerivedFrom ?reference. }) | ||
|
||
# #bind(xsd:integer(strafter(str(?item), 'Q')) as ?id_numeric) . | ||
# } | ||
# } | ||
|
@@ -475,7 +458,37 @@ def query_p(self): | |
return term | ||
|
||
def query_p_ex_interlinguis(self): | ||
return self.query_p() | ||
qid = ['wd:' + x for x in self.qid if isinstance(x, str)] | ||
|
||
_pid = self.pid[0] | ||
|
||
select = [ | ||
'(?wikidata_p_value AS ?item__conceptum__codicem)', | ||
'(STRAFTER(STR(?item), "entity/") AS ' | ||
'?item__rem__i_qcc__is_zxxx__ix_wikiq)' | ||
] | ||
|
||
# @TODO: allow command line specify additional properties to | ||
# optionally fetch together | ||
|
||
term = """ | ||
SELECT {select} WHERE {{ | ||
{{ | ||
SELECT DISTINCT ?item WHERE {{ | ||
?item p:{wikidata_p} ?statement0. | ||
?statement0 (ps:{wikidata_p}) _:anyValue{wikidata_p}. | ||
}} | ||
}} | ||
?item wdt:{wikidata_p} ?wikidata_p_value . | ||
}} | ||
ORDER BY ASC (?wikidata_p_value) | ||
""".format( | ||
wikidata_p=_pid, | ||
qitems=" ".join(qid), | ||
select=" ".join(select) | ||
) | ||
|
||
return term | ||
|
||
def exportatum_sparql(self): | ||
resultatum = [] | ||
|
@@ -485,9 +498,9 @@ def exportatum_sparql(self): | |
resultatum.append(self.query_q()) | ||
if len(self.pid) > 0: | ||
if self.ex_interlinguis: | ||
resultatum.append(self.query_p()) | ||
else: | ||
resultatum.append(self.query_p_ex_interlinguis()) | ||
else: | ||
resultatum.append(self.query_p()) | ||
return resultatum | ||
|
||
|
||
|
@@ -512,7 +525,6 @@ def make_args(self, hxl_output=True): | |
epilog=__EPILOGUM__ | ||
) | ||
|
||
|
||
# https://en.wikipedia.org/wiki/Code_word | ||
# https://en.wikipedia.org/wiki/Coded_set | ||
|
||
|
@@ -704,10 +716,10 @@ def execute_cli(self, pyargs, stdin=STDIN, stdout=sys.stdout, | |
for line in sys.stdin: | ||
codicem = line.replace('\n', ' ').replace('\r', '') | ||
# TODO: deal with cases were have more than WikiQ | ||
|
||
# print(self.pyargs) | ||
if self.pyargs.de == 'P': | ||
if self.pyargs.ex_interlinguis == 'P': | ||
cs1603_3_12.est_wikidata_p_interlinguis(codicem) | ||
if self.pyargs.ex_interlinguis == True: | ||
cs1603_3_12.est_wikidata_p_interlinguis(True) | ||
cs1603_3_12.est_wikidata_p(codicem) | ||
elif self.pyargs.de == 'Q': | ||
cs1603_3_12.est_wikidata_q(codicem) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
#!/bin/bash | ||
#=============================================================================== | ||
# | ||
# FILE: 999999_1679.sh | ||
# | ||
# USAGE: ./999999999/999999_1679.sh | ||
# FORCE_REDOWNLOAD=1 ./999999999/999999_1679.sh | ||
# FORCE_CHANGED=1 ./999999999/999999_1679.sh | ||
# FORCE_REDOWNLOAD_REM="1603_1_51" ./999999999/999999_1679.sh | ||
# time ./999999999/999999_1679.sh | ||
# DESCRIPTION: Temporary tests related to Brazilian namespace. Use case from | ||
# https://github.com/EticaAI/lexicographi-sine-finibus/issues/39 | ||
# | ||
# OPTIONS: --- | ||
# | ||
# REQUIREMENTS: - hxltmcli | ||
# BUGS: --- | ||
# NOTES: --- | ||
# AUTHOR: Emerson Rocha <rocha[at]ieee.org> | ||
# COMPANY: EticaAI | ||
# LICENSE: Public Domain dedication | ||
# SPDX-License-Identifier: Unlicense | ||
# VERSION: v1.0 | ||
# CREATED: 2022-05-12 14:18 UTC started. based on 1603_99.sh | ||
# REVISION: --- | ||
#=============================================================================== | ||
set -e | ||
|
||
# time HTTPS_PROXY="socks5://127.0.0.1:9050" ./999999999/999999_1679.sh | ||
|
||
# ./999999999/0/1603_1.py --methodus='codex' --codex-de 1603_45_31 --codex-in-tabulam-json | jq | ||
# ./999999999/0/1603_1.py --methodus='codex' --codex-de 1603_45_31 --codex-in-tabulam-json > 1603/45/31/1603_45_31.mul-Latn.tab.json | ||
# https://commons.wikimedia.org/wiki/Data:Sandbox/EmericusPetro/Example.tab | ||
|
||
# @TODO: implement download entire sheet | ||
DATA_1603="https://docs.google.com/spreadsheets/d/1ih3ouvx_n8W5ntNcYBqoyZ2NRMdaA0LRg5F9mGriZm4/export?format=xlsx" | ||
|
||
ROOTDIR="$(pwd)" | ||
|
||
# shellcheck source=999999999.lib.sh | ||
. "$ROOTDIR"/999999999/999999999.lib.sh | ||
|
||
|
||
#### Manual action, TEST locally, one per time, START -------------------------- | ||
# Download entire XLSX to local temp | ||
# file_download_1603_xlsx "1" | ||
# actiones_completis_locali "1603_1_1" | ||
# actiones_completis_locali "1603_1_7" | ||
# actiones_completis_locali "1603_1_51" | ||
|
||
# actiones_completis_locali "1679_1_1" | ||
|
||
|
||
#### Manual action, TEST locally, one per time, END ---------------------------- | ||
|
||
## Full drill (remote, specific item) | ||
# actiones_completis_publicis "1603_63_101" | ||
# actiones_completis_publicis "1603_25_1" | ||
# actiones_completis_publicis "1603_99_123" | ||
# actiones_completis_publicis "1603_1_8000" | ||
# actiones_completis_locali "1679_1_1" | ||
# deploy_0_9_markdown | ||
|
||
|
||
#### tests _____________________________________________________________________ | ||
|
||
printf "P1585\n" | ./999999999/0/1603_3_12.py \ | ||
--actionem-sparql --de=P --query --ex-interlinguis \ | ||
| ./999999999/0/1603_3_12.py --actionem-sparql --csv --hxltm \ | ||
> 999999/0/P1585.tm.hxl.csv | ||
|
||
printf "P1585\n" | ./999999999/0/1603_3_12.py \ | ||
--actionem-sparql --de=P --query --lingua-divisioni=20 --lingua-paginae=1 \ | ||
| ./999999999/0/1603_3_12.py --actionem-sparql --csv --hxltm \ | ||
> 999999/0/P1585.wikiq~1-20.hxl.csv | ||
|
||
printf "P1585\n" | ./999999999/0/1603_3_12.py \ | ||
--actionem-sparql --de=P --query --lingua-divisioni=20 --lingua-paginae=2 \ | ||
| ./999999999/0/1603_3_12.py --actionem-sparql --csv --hxltm \ | ||
> 999999/0/P1585.wikiq~2-20.hxl.csv | ||
|
||
printf "P1585\n" | ./999999999/0/1603_3_12.py \ | ||
--actionem-sparql --de=P --query --lingua-divisioni=20 --lingua-paginae=3 \ | ||
| ./999999999/0/1603_3_12.py --actionem-sparql --csv --hxltm \ | ||
> 999999/0/P1585.wikiq~3-20.hxl.csv | ||
|
||
hxlmerge --keys='#item+conceptum+codicem' \ | ||
--tags='#item+rem' \ | ||
--merge="999999/0/P1585.wikiq~1-20.hxl.csv" \ | ||
"999999/0/P1585.wikiq~2-20.hxl.csv" \ | ||
>"999999/0/P1585.wikiq~1+2-20.hxl.csv" | ||
|
||
sed -i '1d' "999999/0/P1585.wikiq~1+2-20.hxl.csv" | ||
|
||
hxlmerge --keys='#item+conceptum+codicem' \ | ||
--tags='#item+rem' \ | ||
--merge="999999/0/P1585.wikiq~1+2-20.hxl.csv" \ | ||
"999999/0/P1585.wikiq~3-20.hxl.csv" \ | ||
>"999999/0/P1585.wikiq~1+2+3-20.hxl.csv" | ||
|
||
sed -i '1d' "999999/0/P1585.wikiq~1+2-20.hxl.csv" |