Skip to content

Commit

Permalink
1603_3_12.py (#40): 999999_1679.sh bash script glue 3 out 20 pages (n…
Browse files Browse the repository at this point in the history
…ot function yet)
  • Loading branch information
fititnt committed May 12, 2022
1 parent 3e87ff6 commit 3ad10eb
Show file tree
Hide file tree
Showing 2 changed files with 149 additions and 36 deletions.
84 changes: 48 additions & 36 deletions officinam/999999999/0/1603_3_12.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,34 +30,13 @@
# pytest
# python3 -m doctest ./999999999/0/1603_3_12.py

# ./999999999/0/1603_3_12.py
# NUMERORDINATIO_BASIM="/external/ndata" ./999999999/0/1603_3_12.py
# printf "Q1065\nQ82151\n" | ./999999999/0/1603_3_12.py --actionem-sparql --query
# printf "Q1065\nQ82151\n" | ./999999999/0/1603_3_12.py --actionem-sparql --query | ./999999999/0/1603_3_12.py --actionem-sparql --wikidata-link
# printf "Q1065\nQ82151\n" | ./999999999/0/1603_3_12.py --actionem-sparql --query | ./999999999/0/1603_3_12.py --actionem-sparql --tsv > 999999/0/test.tsv
# printf "Q1065\nQ82151\n" | ./999999999/0/1603_3_12.py --actionem-sparql --query | ./999999999/0/1603_3_12.py --actionem-sparql --csv > 999999/0/test.csv
# printf "Q1065\nQ82151\n" | ./999999999/0/1603_3_12.py --actionem-sparql --query | ./999999999/0/1603_3_12.py --actionem-sparql --csv --hxltm

# 1603_25_1 query
# printf "Q3409626\nQ41055\nQ3321315\nQ160695\nQ9645\nQ9597\nQ713102\nQ133279\n" | ./999999999/0/1603_3_12.py --actionem-sparql --query


# SELECT ?pic (STRAFTER(STR(?item), "entity/") AS ?item__conceptum__codicem) ?item__rem__i_lat__is_latn
# WHERE
# {
# VALUES ?item { wd:Q3409626 wd:Q41055 wd:Q3321315 wd:Q160695 wd:Q9645 wd:Q9597 wd:Q713102 wd:Q133279 }
# bind(xsd:integer(strafter(str(?item), 'Q')) as ?id_numeric) .
# OPTIONAL { ?item wdt:P18 ?pic }
# OPTIONAL { ?item rdfs:label ?item__rem__i_qcc__is_zxxx filter (lang(?item__rem__i_qcc__is_zxxx) = ""). }
# OPTIONAL { ?item rdfs:label ?item__rem__i_lat__is_latn filter (lang(?item__rem__i_lat__is_latn) = "la"). }
# }
# ORDER BY ASC (?id_numeric)

## Example with proxy
# Example with proxy
# export HTTP_PROXY="socks5://127.0.0.1:9050"
# export HTTPS_PROXY="socks5://127.0.0.1:9050"

# TODO: https://sinaahmadi.github.io/posts/10-essential-sparql-queries-for-lexicographical-data-on-wikidata.html
# TODO: https://sinaahmadi.github.io/posts
# /10-essential-sparql-queries-for-lexicographical-data-on-wikidata.html

import os
import sys
Expand Down Expand Up @@ -112,6 +91,9 @@
printf "P1585\\n" | {0} --actionem-sparql --de=P --query \
--ex-interlinguis
printf "P1585\\n" | {0} --actionem-sparql --de=P --query \
--ex-interlinguis
printf "P1585\\n" | {0} --actionem-sparql --de=P --query \
--lingua-divisioni=50 --lingua-paginae=1
------------------------------------------------------------------------------
Expand All @@ -124,7 +106,7 @@

# @see https://meta.wikimedia.org/wiki/User-Agent_policy
# @see https://www.mediawiki.org/wiki/API:Etiquette
USER_AGENT="EticaAI-multilingual-lexicography/2022.3.9 (https://meta.wikimedia.org/wiki/User:EmericusPetro; [email protected]) 1603_3_12.py/0.1"
USER_AGENT = "EticaAI-multilingual-lexicography/2022.3.9 (https://meta.wikimedia.org/wiki/User:EmericusPetro; [email protected]) 1603_3_12.py/0.1"

# print('getcwd: ', os.getcwd())
# print('oi', NUMERORDINATIO_BASIM)
Expand Down Expand Up @@ -311,8 +293,8 @@ def est_wikidata_p(self, wikidata_codicem: str):

return self

def est_wikidata_p_interlinguis(self: str):
self.ex_interlinguis = True
def est_wikidata_p_interlinguis(self, statum: bool = True):
self.ex_interlinguis = statum

return self

Expand Down Expand Up @@ -342,6 +324,7 @@ def est_wikidata_p_interlinguis(self: str):
# }
# }


def query_q(self):
langpair_full = self._query_linguam()
self.D1613_1_51_langpair = self._query_linguam_limit(langpair_full)
Expand Down Expand Up @@ -392,7 +375,7 @@ def query_q(self):
# [TRY IT ↗]()
return term

## Teste atual
# Teste atual
# SELECT DISTINCT ?item ?itemLabel WHERE {
# SERVICE wikibase:label {
# bd:serviceParam wikibase:language "[AUTO_LANGUAGE]".
Expand All @@ -403,7 +386,7 @@ def query_q(self):
# ?item p:P1585 ?statement0.
# ?statement0 (ps:P1585) _:anyValueP1585.
# #FILTER(EXISTS { ?statement0 prov:wasDerivedFrom ?reference. })

# #bind(xsd:integer(strafter(str(?item), 'Q')) as ?id_numeric) .
# }
# }
Expand Down Expand Up @@ -475,7 +458,37 @@ def query_p(self):
return term

def query_p_ex_interlinguis(self):
return self.query_p()
qid = ['wd:' + x for x in self.qid if isinstance(x, str)]

_pid = self.pid[0]

select = [
'(?wikidata_p_value AS ?item__conceptum__codicem)',
'(STRAFTER(STR(?item), "entity/") AS '
'?item__rem__i_qcc__is_zxxx__ix_wikiq)'
]

# @TODO: allow command line specify additional properties to
# optionally fetch together

term = """
SELECT {select} WHERE {{
{{
SELECT DISTINCT ?item WHERE {{
?item p:{wikidata_p} ?statement0.
?statement0 (ps:{wikidata_p}) _:anyValue{wikidata_p}.
}}
}}
?item wdt:{wikidata_p} ?wikidata_p_value .
}}
ORDER BY ASC (?wikidata_p_value)
""".format(
wikidata_p=_pid,
qitems=" ".join(qid),
select=" ".join(select)
)

return term

def exportatum_sparql(self):
resultatum = []
Expand All @@ -485,9 +498,9 @@ def exportatum_sparql(self):
resultatum.append(self.query_q())
if len(self.pid) > 0:
if self.ex_interlinguis:
resultatum.append(self.query_p())
else:
resultatum.append(self.query_p_ex_interlinguis())
else:
resultatum.append(self.query_p())
return resultatum


Expand All @@ -512,7 +525,6 @@ def make_args(self, hxl_output=True):
epilog=__EPILOGUM__
)


# https://en.wikipedia.org/wiki/Code_word
# https://en.wikipedia.org/wiki/Coded_set

Expand Down Expand Up @@ -704,10 +716,10 @@ def execute_cli(self, pyargs, stdin=STDIN, stdout=sys.stdout,
for line in sys.stdin:
codicem = line.replace('\n', ' ').replace('\r', '')
# TODO: deal with cases were have more than WikiQ

# print(self.pyargs)
if self.pyargs.de == 'P':
if self.pyargs.ex_interlinguis == 'P':
cs1603_3_12.est_wikidata_p_interlinguis(codicem)
if self.pyargs.ex_interlinguis == True:
cs1603_3_12.est_wikidata_p_interlinguis(True)
cs1603_3_12.est_wikidata_p(codicem)
elif self.pyargs.de == 'Q':
cs1603_3_12.est_wikidata_q(codicem)
Expand Down
101 changes: 101 additions & 0 deletions officinam/999999999/999999_1679.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#!/bin/bash
#===============================================================================
#
# FILE: 999999_1679.sh
#
# USAGE: ./999999999/999999_1679.sh
# FORCE_REDOWNLOAD=1 ./999999999/999999_1679.sh
# FORCE_CHANGED=1 ./999999999/999999_1679.sh
# FORCE_REDOWNLOAD_REM="1603_1_51" ./999999999/999999_1679.sh
# time ./999999999/999999_1679.sh
# DESCRIPTION: Temporary tests related to Brazilian namespace. Use case from
# https://github.com/EticaAI/lexicographi-sine-finibus/issues/39
#
# OPTIONS: ---
#
# REQUIREMENTS: - hxltmcli
# BUGS: ---
# NOTES: ---
# AUTHOR: Emerson Rocha <rocha[at]ieee.org>
# COMPANY: EticaAI
# LICENSE: Public Domain dedication
# SPDX-License-Identifier: Unlicense
# VERSION: v1.0
# CREATED: 2022-05-12 14:18 UTC started. based on 1603_99.sh
# REVISION: ---
#===============================================================================
set -e

# time HTTPS_PROXY="socks5://127.0.0.1:9050" ./999999999/999999_1679.sh

# ./999999999/0/1603_1.py --methodus='codex' --codex-de 1603_45_31 --codex-in-tabulam-json | jq
# ./999999999/0/1603_1.py --methodus='codex' --codex-de 1603_45_31 --codex-in-tabulam-json > 1603/45/31/1603_45_31.mul-Latn.tab.json
# https://commons.wikimedia.org/wiki/Data:Sandbox/EmericusPetro/Example.tab

# @TODO: implement download entire sheet
DATA_1603="https://docs.google.com/spreadsheets/d/1ih3ouvx_n8W5ntNcYBqoyZ2NRMdaA0LRg5F9mGriZm4/export?format=xlsx"

ROOTDIR="$(pwd)"

# shellcheck source=999999999.lib.sh
. "$ROOTDIR"/999999999/999999999.lib.sh


#### Manual action, TEST locally, one per time, START --------------------------
# Download entire XLSX to local temp
# file_download_1603_xlsx "1"
# actiones_completis_locali "1603_1_1"
# actiones_completis_locali "1603_1_7"
# actiones_completis_locali "1603_1_51"

# actiones_completis_locali "1679_1_1"


#### Manual action, TEST locally, one per time, END ----------------------------

## Full drill (remote, specific item)
# actiones_completis_publicis "1603_63_101"
# actiones_completis_publicis "1603_25_1"
# actiones_completis_publicis "1603_99_123"
# actiones_completis_publicis "1603_1_8000"
# actiones_completis_locali "1679_1_1"
# deploy_0_9_markdown


#### tests _____________________________________________________________________

printf "P1585\n" | ./999999999/0/1603_3_12.py \
--actionem-sparql --de=P --query --ex-interlinguis \
| ./999999999/0/1603_3_12.py --actionem-sparql --csv --hxltm \
> 999999/0/P1585.tm.hxl.csv

printf "P1585\n" | ./999999999/0/1603_3_12.py \
--actionem-sparql --de=P --query --lingua-divisioni=20 --lingua-paginae=1 \
| ./999999999/0/1603_3_12.py --actionem-sparql --csv --hxltm \
> 999999/0/P1585.wikiq~1-20.hxl.csv

printf "P1585\n" | ./999999999/0/1603_3_12.py \
--actionem-sparql --de=P --query --lingua-divisioni=20 --lingua-paginae=2 \
| ./999999999/0/1603_3_12.py --actionem-sparql --csv --hxltm \
> 999999/0/P1585.wikiq~2-20.hxl.csv

printf "P1585\n" | ./999999999/0/1603_3_12.py \
--actionem-sparql --de=P --query --lingua-divisioni=20 --lingua-paginae=3 \
| ./999999999/0/1603_3_12.py --actionem-sparql --csv --hxltm \
> 999999/0/P1585.wikiq~3-20.hxl.csv

hxlmerge --keys='#item+conceptum+codicem' \
--tags='#item+rem' \
--merge="999999/0/P1585.wikiq~1-20.hxl.csv" \
"999999/0/P1585.wikiq~2-20.hxl.csv" \
>"999999/0/P1585.wikiq~1+2-20.hxl.csv"

sed -i '1d' "999999/0/P1585.wikiq~1+2-20.hxl.csv"

hxlmerge --keys='#item+conceptum+codicem' \
--tags='#item+rem' \
--merge="999999/0/P1585.wikiq~1+2-20.hxl.csv" \
"999999/0/P1585.wikiq~3-20.hxl.csv" \
>"999999/0/P1585.wikiq~1+2+3-20.hxl.csv"

sed -i '1d' "999999/0/P1585.wikiq~1+2-20.hxl.csv"

0 comments on commit 3ad10eb

Please sign in to comment.