Skip to content

Commit

Permalink
#3 - add the remaining old English synonym links as ILI references
Browse files Browse the repository at this point in the history
  • Loading branch information
simongray committed May 30, 2023
1 parent 9fa5123 commit d2b64ba
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 8 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ bootstrap/other/sentiment/sense_polarities.tsv
# The open English WordNet
bootstrap/other/english/english-wordnet-2022.ttl
bootstrap/other/english/ili.ttl
bootstrap/other/english/ili-map-pwn20.tab
bootstrap/other/english/yaml
bootstrap/other/english/senseidx.edn

Expand Down
31 changes: 25 additions & 6 deletions src/main/dk/cst/dannet/bootstrap.clj
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@
[dk.cst.dannet.query :as q]
[dk.cst.dannet.query.operation :as op]))

(declare read-triples)
(declare cor-k-pos)

;; TODO: missing labels
;; http://localhost:3456/dannet/data/synset-48454
;; http://localhost:3456/dannet/data/synset-49086
Expand Down Expand Up @@ -557,17 +560,30 @@
(def senseidx->english-synset
(delay (edn/read-string (slurp "bootstrap/other/english/senseidx.edn"))))

(def wn20-id->ili
(delay (->> (read-triples [identity
"bootstrap/other/english/ili-map-pwn20.tab"
:encoding "UTF-8"
:separator \tab])
(filter (fn [[_ _ confidence]]
(= confidence "1")))
(map (fn [[ili-id wn-id _]]
[(str "ENG20-" wn-id) (keyword "ili" ili-id)]))
(into {}))))

(h/defn ->english-link-triples
"Convert a `row` from 'relations.csv' to triples.
Note: certain rows are unmapped, so the relation will remain a string!"
[[subj-id _ rel obj-id _ _ :as row]]
;; Ignores eq_has_hyponym and eq_has_hyperonym, no equivalent in GWA schema.
;; This loses us 123 of the original 5000l links to the Princton WordNet.
;; TODO: implement dns relations for this as we apparently use those the new data too...
(when (= "eq_has_synonym" rel)
;; TODO: need backup for IDs that match e.g. "ENG20-07945291-n"
(when-let [obj (get @senseidx->english-synset obj-id)]
#{[(synset-uri subj-id) :wn/eq_synonym obj]})))
(if-let [obj (get @senseidx->english-synset obj-id)]
#{[(synset-uri subj-id) :wn/eq_synonym obj]}
(when-let [ili-obj (get @wn20-id->ili obj-id)]
#{[(synset-uri subj-id) :wn/ili ili-obj]}))))

;; TODO: can we create new forms/words/synsets rather than overload writtenRep?
(defn explode-written-reps
Expand Down Expand Up @@ -600,9 +616,6 @@
[s & after]
(apply str "\"" s "\"" after))

(declare read-triples)
(declare cor-k-pos)

(def sense-properties
(let [row->kv (fn [[dannetsemid lemma hom pos dn_lemma id gloss]]
(let [[_ pos'] (re-matches #"([^\.]+)\.?" pos)
Expand Down Expand Up @@ -1103,6 +1116,7 @@
:separator \tab
:preprocess (comp mark-duplicate-senses rest)]

;; TODO: publish as a separate dataset?
;; Links to the Open English WordNet
:oewn-links [->english-link-triples "bootstrap/dannet/DanNet-2.5.1_csv/relations.csv"]}

Expand Down Expand Up @@ -1258,6 +1272,11 @@
(->> (read-triples (get-in imports [prefix/dn-uri :relations]))
(take 10))

;; Example English WordNet or ILI links
(->> (read-triples (get-in imports [prefix/dn-uri :oewn-links]))
(remove nil?)
(take 10))

;; Example links to the Open English WordNet
(->> (read-triples (get-in imports [prefix/dn-uri :en-links]))
(take 10))
Expand Down
11 changes: 9 additions & 2 deletions src/main/dk/cst/dannet/web/section.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
[[nil [:rdf/type
:owl/sameAs
:skos/definition
:wn/definition
:rdfs/comment
:wn/partOfSpeech
:lexinfo/partOfSpeech
:lexinfo/senseExample
:dns/sentiment
Expand All @@ -29,10 +31,15 @@
:ontolex/sense
:ontolex/isSenseOf
:ontolex/lexicalizedSense
:ontolex/isLexicalizedSenseOf]]
:ontolex/isLexicalizedSenseOf
:wn/ili
:wn/eq_synonym]]
[#{(->LangStr "Semantic relations" "en")
(->LangStr "Betydningsrelationer" "da")}
(some-fn (prefix/with-prefix 'wn :except #{:wn/partOfSpeech})
(some-fn (prefix/with-prefix 'wn :except #{:wn/partOfSpeech
:wn/definition
:wn/ili
:wn/eq_synonym})
(comp #{:dns/usedFor
:dns/usedForObject
:dns/nearAntonym
Expand Down

0 comments on commit d2b64ba

Please sign in to comment.