Skip to content

Commit

Permalink
1603:3:12 (#3): Q codes extraction (used to get translations) now a b…
Browse files Browse the repository at this point in the history
…it more flexible; still need solve cases where multiple columns have Q codes
  • Loading branch information
fititnt committed Jan 22, 2022
1 parent 21ef0c2 commit 40c9ce0
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 22 deletions.
3 changes: 3 additions & 0 deletions officinam/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ __pycache__
# Use by some temporary files
*.TEMP.*

999999/0/
!999999/0/.gitkep

# These files generate by software start from 30mb and up.
# Not need to be commited on repository.
1603/2600/2/0-9a-z__4__b60.tsv
Expand Down
6 changes: 3 additions & 3 deletions officinam/1603/1/1/1603_1_1.no1.tm.hxl.csv
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@
1603:1:1:13:1603:49,13_1603_49,10,10,1603:13:1603:49,,/HXL replacement maps to UN m49 from other namespaces/@eng-Latn,,,,
1603:1:1:25,25,50,50,1603:25,Q11190,Medicina,[1025] قانون در طب,https://archive.org/details/AlQaawnoonFiTTwibb/Al-Qaawnoon%20fi-t-Twibb/mode/2up,Mīlitāris scientiae,علوم عسكرية
1603:1:1:42,42,50,50,1603:42,Q192386,Mīlitāris scientiae,[142] (142 -100) 魏伯陽 ,https://archive.org/search.php?query=title%3A%28%E6%AD%A6%E7%B6%93%E7%B8%BD%E8%A6%81%29,Forēnsis scientiae,علم الأدلة الجنائية
1603:1:1:44,44,50,50,1603:44,Q495304,Forēnsis scientiae,,,Normās interimperia,
1603:1:1:44,44,50,50,1603:44,Q495304,Forēnsis scientiae,"[42] Antistius ex Caesar post mortī, circa 42-03-15 BC",,Normās interimperia,
1603:1:1:44:1,44_1,70,70,1603:44:1,Q99312209,//dictiōnāria de post mortis condici//,,,,
1603:1:1:44:142,44_142,70,70,1603:44:142,,//dictiōnāria de vulnera ab arma ignifera//,[142] 魏伯陽 ,,,
1603:1:1:45,45,20,20,1603:45,,Normās interimperia,[1945-10-24] Nationes Unitae,,(Interimperia) Locus cōdicī,
1603:1:1:45,45,20,20,1603:45,,Normās interimperia,[1945-10-24] Fundatio de Nationes Unitae,,(Interimperia) Locus cōdicī,
1603:1:1:45:1,45_1,20,20,1603:45:1,,,,,,
1603:1:1:45:16,45_16,20,20,1603:45:16,Q7200235,(Interimperia) Locus cōdicī,[16] P,https://en.wikipedia.org/wiki/Common_Operational_Datasets#P-codes,,
1603:1:1:45:16:?:0,45_16_?_0,20,20,1603:45:16:?:0,,//imperium territōrium//,,,//imperium territōrium//,
Expand All @@ -34,7 +34,7 @@
1603:1:1:45:16:?:4,45_16_?_4,20,20,1603:45:16:?:4,,//Infraimperium territōria ōrdō 4//,,,//Infraimperium territōria ōrdō 4//,
1603:1:1:45:16:?:5,45_16_?_5,20,20,1603:45:16:?:5,,//Infraimperium territōria ōrdō 5//,,,//Infraimperium territōria ōrdō 5//,
1603:1:1:45:16:?:6,45_16_?_6,20,20,1603:45:16:?:6,,//Infraimperium territōria ōrdō 6//,,,//Infraimperium territōria ōrdō 6//,
1603:1:1:45:16:?:21:?,45_16_?_21_?,20,20,1603:45:16:?:21:?,,(Interimperia) Locus cōdicī; exāctō (A1...A6),21] (0 + 1 + 2 + 3 + 4 + 5 + 6),,(Interimperia) Locus cōdicī; exāctō (A1...A6),
1603:1:1:45:16:?:21:?,45_16_?_21_?,20,20,1603:45:16:?:21:?,,(Interimperia) Locus cōdicī; exāctō (A1...A6),[21] (0 + 1 + 2 + 3 + 4 + 5 + 6),,(Interimperia) Locus cōdicī; exāctō (A1...A6),
1603:1:1:45:16:900,45_16_900,20,20,1603:45:16:900,,[private use] Entire world public P-Codes Adm 0 (>110),,,,
1603:1:1:45:16:901,45_16_901,20,20,1603:45:16:901,,[private use] Entire world public P-Codes A.1 (>2500),,,,
1603:1:1:45:16:902,45_16_902,20,20,1603:45:16:902,,[private use] Entire world public P-Codes A.2 (>33700),,,,
Expand Down
26 changes: 16 additions & 10 deletions officinam/1603/1/51/1603_1_51.no1.tm.hxl.csv
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
#item+conceptum+numerordinatio,#item+conceptum+codicem,#status+conceptum,#status+conceptum+codicem,#item+rem+i_qcc+is_zxxx+ix_uid,#item+rem+i_qcc+is_zxxx+ix_csvsffxm,#item+rem+i_qcc+is_zxxx+ix_hxla,#item+rem+i_latn+is_latn,#item+rem+i_qcc+is_zxxx+ix_wikiq+ix_linguam,#item+rem+i_qcc+is_zxxx+ix_wikiq+ix_scriptum,#item+rem+i_qcc+is_zxxx+ix_wikilngm,#item+rem+i_qcc+is_zxxx+ix_glottocode,#item+rem+i_qcc+is_zxxx+ix_iso639p3a3
1603:1:51:1,1,90,19,ara-Arab,__i_ara__is_arab,+i_ara+is_arab,Lingua Arabica (Abecedarium Arabicum),Q13955,Q8196,ar,arab1395,ara
1603:1:51:2,2,90,19,ben-Beng,__i_ben__is_beng,+i_ben+is_beng,Lingua Bengali (?),Q9610,,bn,beng1280,ben
1603:1:51:1,1,90,19,ara-Arab,__i_ara__is_arab,+i_ara+is_arab,Macrolingua Arabica (Abecedarium Arabicum),Q13955,Q8196,ar,arab1395,ara
1603:1:51:2,2,90,19,ben-Beng,__i_ben__is_beng,+i_ben+is_beng,Lingua Bengali (?),Q9610,Q756802,bn,beng1280,ben
1603:1:51:3,3,90,19,grc-Grek,__i_grc__is_grek,+i_grc+is_grek,Lingua Graeca antiqua (Alphabetum Graecum),Q35497,Q8216,grc,anci1242,grc
1603:1:51:4,4,90,19,lat-Latn,__i_lat__is_latn,+i_lat+is_latn,Lingua Latina (Abecedarium Latinum),Q397,Q8229,la,lati1261,lat
1603:1:51:5,5,90,19,rus-Cyrl,__i_rus__is_cyrl,+i_rus+is_cyrl,Lingua Russica (Abecedarium Cyrillicum),Q7737,Q8209,ru,russ1263,rus
1603:1:51:6,6,60,19,san-Zzzz,__i_san__is_zzzz,+i_san+is_zzzz,Lingua Sanscrita,Q11059,,sa,sans1269,san
1603:1:51:100,100,60,19,por-Latn,__i_por__is_latn,+i_por+is_latn,,,,pt,,
1603:1:51:101,101,60,19,eng-Latn,__i_eng__is_latn,+i_eng+is_latn,,,,en,,
1603:1:51:102,102,60,19,fra-Latn,__i_fra__is_latn,+i_fra+is_latn,,,,fr,,
1603:1:51:103,103,60,19,nld-Latn,__i_nld__is_latn,+i_nld+is_latn,,Q7411,,nl,mode1257,nld
1603:1:51:104,104,60,19,deu-Latn,__i_deu__is_latn,+i_deu+is_latn,Lingua Germanica (Abecedarium Latinum),Q188,,de,stan1295,deu
1603:1:51:105,105,60,19,spa-Latn,__i_spa__is_latn,+i_spa+is_latn,Lingua Hispanica (Abecedarium Latinum),Q1321,,es,stan1288,spa
1603:1:51:106,106,60,19,ita-Latn,__i_ita__is_latn,+i_ita+is_latn,Lingua Italiana (Abecedarium Latinum),Q652,,it,ital1282,ita
1603:1:51:107,107,60,19,gle-Latn,__i_gle__is_latn,+i_gle+is_latn,Lingua Hibernica (Abecedarium Latinum),Q9142,,ga,iris1253,gle
1603:1:51:100,100,60,19,por-Latn,__i_por__is_latn,+i_por+is_latn,Lingua Lusitana (Abecedarium Latinum),Q5146,Q8229,pt,port1283,por
1603:1:51:101,101,60,19,eng-Latn,__i_eng__is_latn,+i_eng+is_latn,Lingua Anglica (Abecedarium Latinum),Q1860,Q8229,en,stan1293,eng
1603:1:51:102,102,60,19,fra-Latn,__i_fra__is_latn,+i_fra+is_latn,Lingua Francogallica (Abecedarium Latinum),Q150,Q8229,fr,stan1290,fra
1603:1:51:103,103,60,19,nld-Latn,__i_nld__is_latn,+i_nld+is_latn,Lingua Batavica (Abecedarium Latinum),Q7411,Q8229,nl,mode1257,nld
1603:1:51:104,104,60,19,deu-Latn,__i_deu__is_latn,+i_deu+is_latn,Lingua Germanica (Abecedarium Latinum),Q188,Q8229,de,stan1295,deu
1603:1:51:105,105,60,19,spa-Latn,__i_spa__is_latn,+i_spa+is_latn,Lingua Hispanica (Abecedarium Latinum),Q1321,Q8229,es,stan1288,spa
1603:1:51:106,106,60,19,ita-Latn,__i_ita__is_latn,+i_ita+is_latn,Lingua Italiana (Abecedarium Latinum),Q652,Q8229,it,ital1282,ita
1603:1:51:107,107,60,19,gle-Latn,__i_gle__is_latn,+i_gle+is_latn,Lingua Hibernica (Abecedarium Latinum),Q9142,Q8229,ga,iris1253,gle
1603:1:51:108,108,60,19,swe-Latn,__i_swe__is_latn,+i_swe+is_latn,Lingua Suecica (Abecedarium Latinum),Q9027,Q8229,sv,swed1254,swe
1603:1:51:109,109,60,19,ceb-Latn,__i_ceb__is_latn,+i_ceb+is_latn,Lingua Caebuana (Abecedarium Latinum),Q33239,Q8229,ceb,cebu1242,ceb
1603:1:51:110,110,60,19,sqi-Latn,__i_sqi__is_latn,+i_sqi+is_latn,Macrolingua Albanica (Abecedarium Latinum),Q8748,Q8229,sq,alba1267,sqi
1603:1:51:111,111,60,19,pol-Latn,__i_pol__is_latn,+i_pol+is_latn,Lingua Polonica (Abecedarium Latinum),Q809,Q8229,pl,poli1260,pol
1603:1:51:112,112,60,19,fin-Latn,__i_fin__is_latn,+i_fin+is_latn,Lingua Finnica (Abecedarium Latinum),Q1412,Q8229,fi,finn1318,fin
1603:1:51:113,113,60,19,ron-Latn,__i_ron__is_latn,+i_ron+is_latn,Lingua Dacoromanica (Abecedarium Latinum),Q7913,Q8229,ro,roma1327,ron
10 changes: 10 additions & 0 deletions officinam/999999999/1603_17.sh
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,13 @@ file_convert_numerordinatio_de_hxltm "1603_84_1" "1" "0"

file_download_if_necessary "$DATA_1603_45_1" "1603_45_1" "csv" "tm.hxl.csv" "hxltmcli" "1"
file_convert_numerordinatio_de_hxltm "1603_45_1" "1" "0"


# TODO: maybe move this to somewhere else
# TODO: fix the corner cases where there is more than one concept,
# as is the case with the language table itself
# cat 1603/1/51/1603_1_51.no1.tm.hxl.csv | hxlclean
# file_translate_csv_de_numerordinatio_q "1603_1_51" "0" "0"


# file_translate_csv_de_numerordinatio_q "1603_45_1" "0" "0"
1 change: 0 additions & 1 deletion officinam/999999999/1603_3_1603_45_1.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
# OPTIONS: ---
#
# REQUIREMENTS: - Bash shell (or better)
# - wikibase-cli (https://github.com/maxlath/wikibase-cli)
# BUGS: ---
# NOTES: ---
# AUTHOR: Emerson Rocha <rocha[at]ieee.org>
Expand Down
35 changes: 27 additions & 8 deletions officinam/999999999/999999999.lib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -344,15 +344,13 @@ file_convert_numerordinatio_de_hxltm() {
file_update_if_necessary csv "$objectivum_archivum_temporarium" "$objectivum_archivum"
}


## Tem definidos
# cat 999999/1603/1/1/1603_1_1.tm.hxl.csv | hxlselect --query="#status+conceptum<0"
# cat 999999/1603/1/1/1603_1_1.tm.hxl.csv | hxlselect --query='#status+conceptum+codicem~^(1|2|3|4|5|6|7|8|9)$' --reverse
## Nao tem definidos
# cat 1603/45/1/1603_45_1.no1.tm.hxl.csv | hxlselect --query="#status+conceptum<0"
# cat 1603/45/1/1603_45_1.no1.tm.hxl.csv | hxlselect --query='#status+conceptum+codicem~^(1|2|3|4|5|6|7|8|9)$' --reverse


# @TODO: create helper to remove empty translations;
# @see https://github.com/wireservice/csvkit/issues/962
# Potential example:
Expand All @@ -361,6 +359,9 @@ file_convert_numerordinatio_de_hxltm() {
#######################################
# Extract Wikipedia QIDs from numerordinatio no1.tm.hxl.csv and generate an
# wikiq.tm.csv
# Extract QCodes from:
# - '+ix_wikiq'
# - '+v_wiki_q'
#
# Globals:
# ROOTDIR
Expand Down Expand Up @@ -398,9 +399,13 @@ file_translate_csv_de_numerordinatio_q() {
objectivum_archivum_temporarium_b_u="${ROOTDIR}/999999/0/$_nomen.uniq.q.txt"
objectivum_archivum_temporarium_b_u_wiki="${ROOTDIR}/999999/0/$_nomen.wikiq.tm.csv"

if [ -z "$(changed_recently "$fontem_archivum")" ]; then return 0; fi
# if [ -z "$(changed_recently "$fontem_archivum")" ]; then return 0; fi

echo "${FUNCNAME[0]} sources changed_recently. Reloading..."
# echo "${FUNCNAME[0]} sources changed_recently. Reloading..."

if [ -z "$(stale_archive "$objectivum_archivum")" ]; then return 0; fi

echo "${FUNCNAME[0]} stale data on [$objectivum_archivum], refreshing..."

# echo "$fontem_archivum"

Expand All @@ -409,16 +414,27 @@ file_translate_csv_de_numerordinatio_q() {
# echo "objectivum_archivum [$objectivum_archivum]"
# echo "objectivum_archivum_temporarium [$objectivum_archivum_temporarium]"
# head -n 2 "$fontem_archivum"
# hxlcut \
# --include="#item+rem+i_qcc+is_zxxx+ix_wikiq,#item+conceptum+numerordinatio" \
# "$fontem_archivum" |
# hxlselect --query="#item+rem+i_qcc+is_zxxx+ix_wikiq>0" \
# >"$objectivum_archivum_temporarium"

# hxlcut \
# --include="#item+rem+i_qcc+is_zxxx+ix_wikiq" \
# "$fontem_archivum" |
# hxlselect --query="#item+rem+i_qcc+is_zxxx+ix_wikiq>0" \
# >"$objectivum_archivum_temporarium_b"
hxlcut \
--include="#item+rem+i_qcc+is_zxxx+ix_wikiq,#item+conceptum+numerordinatio" \
--include='#*+ix_wikiq,#*+v_wiki_q,#item+conceptum+numerordinatio' \
"$fontem_archivum" |
hxlselect --query="#item+rem+i_qcc+is_zxxx+ix_wikiq>0" \
hxlselect --query='#*+ix_wikiq>0' --query='#*+v_wiki_q>0' \
>"$objectivum_archivum_temporarium"

hxlcut \
--include="#item+rem+i_qcc+is_zxxx+ix_wikiq" \
--include='#*+ix_wikiq,#*+v_wiki_q' \
"$fontem_archivum" |
hxlselect --query="#item+rem+i_qcc+is_zxxx+ix_wikiq>0" \
hxlselect --query='#*+ix_wikiq>0' --query='#*+v_wiki_q>0' \
>"$objectivum_archivum_temporarium_b"

sed -i '1,2d' "${objectivum_archivum_temporarium_b}"
Expand All @@ -433,6 +449,9 @@ file_translate_csv_de_numerordinatio_q() {
>"$objectivum_archivum_temporarium_b_u_wiki"
# "$objectivum_archivum_temporarium_b_u"

# TODO: implement check fo see if there is more than one Q columns, then use
# as baseline

rm "$objectivum_archivum_temporarium"
rm "$objectivum_archivum_temporarium_b"
rm "$objectivum_archivum_temporarium_b_u"
Expand Down

0 comments on commit 40c9ce0

Please sign in to comment.