1603:1:51 (#9): HXLTM export on "

EticaAI · Jan 23, 2022 · 8dc806d · 8dc806d
1 parent 769e5fb
commit 8dc806d
Show file tree

Hide file tree

Showing 3 changed files with 77 additions and 8 deletions.
diff --git a/officinam/999999999/0/1603_3_12.py b/officinam/999999999/0/1603_3_12.py
@@ -35,6 +35,7 @@
 #    printf "Q1065\nQ82151\n" | ./999999999/0/1603_3_12.py --actionem-sparql --query | ./999999999/0/1603_3_12.py --actionem-sparql --wikidata-link
 #    printf "Q1065\nQ82151\n" | ./999999999/0/1603_3_12.py --actionem-sparql --query | ./999999999/0/1603_3_12.py --actionem-sparql --tsv > 999999/0/test.tsv
 #    printf "Q1065\nQ82151\n" | ./999999999/0/1603_3_12.py --actionem-sparql --query | ./999999999/0/1603_3_12.py --actionem-sparql --csv > 999999/0/test.csv
+#    printf "Q1065\nQ82151\n" | ./999999999/0/1603_3_12.py --actionem-sparql --query | ./999999999/0/1603_3_12.py --actionem-sparql --csv --hxltm
 
 # TODO: https://sinaahmadi.github.io/posts/10-essential-sparql-queries-for-lexicographical-data-on-wikidata.html
 
@@ -92,7 +93,32 @@
 #   SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
 # }
 
+
+def hxltm_hastag_de_csvhxlated(csv_caput: list) -> list:
+    """hxltm_hastag_de_csvhxlated [summary]
+
+    Make this type of conversion:
+    - 'item__conceptum__codicem' => '#item+conceptum+codicem'
+    - 'item__rem__i_ara__is_arab' => '#item+rem+i_ara+is_arab'
+    - '' => ''
+
+    Args:
+        csv_caput (list): Array of input items
+
+    Returns:
+        [list]:
+    """
+    resultatum = []
+    for item in csv_caput:
+        if len(item):
+            resultatum.append('#' + item.replace('__', '+').replace('?', ''))
+        else:
+            resultatum.append('')
+    return resultatum
+
 # https://stackoverflow.com/questions/43258341/how-to-get-wikidata-labels-in-more-than-one-language
+
+
 class CS1603z3z12:
     """ [summary]
 
@@ -194,12 +220,11 @@ def est_wikidata_q(self, wikidata_codicem: str):
 #   }
 # }
 
-
     def query(self):
         qid = ['wd:' + x for x in self.qid if isinstance(x, str)]
         # select = '?item ' + " ".join(self._query_linguam())
 
-        select = ['?item']
+        select = ['(?item AS ?item__conceptum__codicem)']
         filter_otional = []
         for pair in self.D1613_1_51_langpair:
             select.append('?' + pair[1])
@@ -325,6 +350,7 @@ def make_args(self, hxl_output=True):
             const=True,
             nargs='?'
         )
+
         neo_codex.add_argument(
             '--tsv',
             help='Generate TSV output (from piped in query)',
@@ -334,6 +360,17 @@ def make_args(self, hxl_output=True):
             nargs='?'
         )
 
+        neo_codex.add_argument(
+            '--hxltm',
+            help='Generate HXL-tagged output (from piped in query). ' +
+            'Concepts use #item+conceptum+codicem instead ' +
+            'of #item+code+v_wiki_q',
+            metavar='',
+            dest='hxltm',
+            const=True,
+            nargs='?'
+        )
+
         # neo_codex.add_argument(
         #     '--actionem-verbum-simplex',
         #     help='Do not generate the codes. Just calculate the full matrix ' +
@@ -624,16 +661,46 @@ def execute_cli(self, pyargs, stdin=STDIN, stdout=sys.stdout,
                 # https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual/en#Supported_formats
 
                 if self.pyargs.tsv:
+                    separator = "\t"
                     headers = {'Accept': 'text/tab-separated-values'}
                 if self.pyargs.csv:
+                    separator = ","
+                    headers = {'Accept': 'text/csv'}
+                if self.pyargs.hxltm:
+                    # headers = {'Accept': 'text/tab-separated-values'}
                     headers = {'Accept': 'text/csv'}
 
                 payload_query = "".join(full_query)
-                r = requests.get(sparql_backend, headers=headers, params={
+                r = requests.post(sparql_backend, headers=headers, data={
                     'query': payload_query
                 })
 
-                print(r.text.strip())
+                # @TODO: --tsv --hxltm is know to be bugged (not sure if
+                #        Wikidata result already skip values)
+
+                if self.pyargs.hxltm:
+                    result_string = r.text.strip()
+
+                    # @TODO: this likely to break with fields with newlines.
+                    #        however no testing sample exists at the moment.
+                    #        Eventually needs be checked.
+                    lines = result_string.splitlines()
+                    # caput = hxltm_hastag_de_csvhxlated(next(iter(lines)).split(","))
+                    caput_crudum = lines.pop(0)
+                    # print('caput_crudum', caput_crudum)
+                    caput = hxltm_hastag_de_csvhxlated(caput_crudum.split(','))
+                    print(separator.join(caput))
+                    print("\n".join(lines))
+
+                    # reader = csv.reader(lines, delimiter="\t")
+                    # caput = hxltm_hastag_de_csvhxlated(next(reader))
+                    # print(separator.join(caput))
+                    # for row in reader:
+                    #     print(separator.join(row))
+                else:
+                    print(r.text.strip())
+
+                # TODO: generate explicit error messages and return code
                 # print(r.content)
                 return self.EXIT_OK
 

diff --git a/officinam/999999999/1603_3_1603_45_1.sh b/officinam/999999999/1603_3_1603_45_1.sh
@@ -5,6 +5,7 @@
 #
 #         USAGE:  ./999999999/1603_3_1603_45_1.sh
 #                 time ./999999999/1603_3_1603_45_1.sh
+#                 time FORCE_CHANGED=1 ./999999999/1603_3_1603_45_1.sh
 #
 #   DESCRIPTION:  ---
 #

diff --git a/officinam/999999999/999999999.lib.sh b/officinam/999999999/999999999.lib.sh
@@ -127,6 +127,7 @@ file_update_if_necessary() {
   csv)
     is_valid=$(csvclean --dry-run "$fontem_archivum")
     if [ "$is_valid" != "No errors." ]; then
+      echo "$fontem_archivum"
       echo "$is_valid"
       return 1
     fi
@@ -393,17 +394,17 @@ file_translate_csv_de_numerordinatio_q() {
   fi
 
   fontem_archivum="${_basim_fontem}/$_path/$_nomen.no1.tm.hxl.csv"
-  objectivum_archivum="${_basim_objectivum}/$_path/$_nomen.wikiq.tm.csv"
+  objectivum_archivum="${_basim_objectivum}/$_path/$_nomen.wikiq.tm.hxl.csv"
   objectivum_archivum_temporarium="${ROOTDIR}/999999/0/$_nomen.no1.tm.hxl.csv"
   objectivum_archivum_temporarium_b="${ROOTDIR}/999999/0/$_nomen.q.txt"
   objectivum_archivum_temporarium_b_u="${ROOTDIR}/999999/0/$_nomen.uniq.q.txt"
-  objectivum_archivum_temporarium_b_u_wiki="${ROOTDIR}/999999/0/$_nomen.wikiq.tm.csv"
+  objectivum_archivum_temporarium_b_u_wiki="${ROOTDIR}/999999/0/$_nomen.wikiq.tm.hxl.csv"
 
   # if [ -z "$(changed_recently "$fontem_archivum")" ]; then return 0; fi
 
   # echo "${FUNCNAME[0]} sources changed_recently. Reloading..."
 
-  if [ -z "$(stale_archive "$objectivum_archivum")" ]; then return 0; fi
+  # if [ -z "$(stale_archive "$objectivum_archivum")" ]; then return 0; fi
 
   echo "${FUNCNAME[0]} stale data on [$objectivum_archivum], refreshing..."
 
@@ -445,7 +446,7 @@ file_translate_csv_de_numerordinatio_q() {
   sort --version-sort --field-separator="Q" <"$objectivum_archivum_temporarium_b" | uniq >"$objectivum_archivum_temporarium_b_u"
 
   "${ROOTDIR}/999999999/0/1603_3_12.py" --actionem-sparql --query <"$objectivum_archivum_temporarium_b_u" |
-    ./999999999/0/1603_3_12.py --actionem-sparql --csv \
+    ./999999999/0/1603_3_12.py --actionem-sparql --csv --hxltm \
       >"$objectivum_archivum_temporarium_b_u_wiki"
   # "$objectivum_archivum_temporarium_b_u"