-
Notifications
You must be signed in to change notification settings - Fork 12
/
scripts.py
153 lines (106 loc) · 4.43 KB
/
scripts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
"""
This file is intended to be a repository of sample scripts/queries to run over FrameNet.
@author: Sean Trott
The scripts will be coded as functions, so you can import them into "main" once you run ./build.sh,
as in:
from scripts import retrieve_pt
"""
from src.lexical_units import *
from src.valence_data import *
from src.hypothesize_constructions import *
from src.ecg_utilities import ECGUtilities as utils
def retrieve_pt(frame, pt="DNI"):
""" Requires the lexical units in frame to have already been constructed by FrameNetBuilder, so that valence patterns are accessible.
Returns all valence units with specified phrase type."""
returned = []
for lu in frame.lexicalUnits:
for valence in lu.individual_valences:
if valence.pt == pt:
returned.append(valence)
return returned
def find_cooccurring_fes(frame, elements):
""" Returns a list of FE group realization objects featuring AT LEAST the fes specified in elements.
ELEMENTS should be a list. """
return [realization for realization in frame.group_realizations if set(elements).issubset(realization.elements)]
def retrieve_fe(frame, fe):
""" Requires the lexical units in frame to have already been constructed by FrameNetBuilder, so that valence patterns are accessible.
Returns all valence units matching fe."""
return [valence for valence in frame.individual_valences if valence.fe == fe]
def lus_for_frames(frame_set, fn):
""" Very simple function that returns a list of lexical unit objects for each frame in FRAME_SET.
Input frames in FRAME_SET should be strings, not actual frame objects.
>> lus_for_frames(['Motion', 'Cause_motion'], fn)
[[move.v, go.v, ...], [cast.v, catapult.v, ....]]
"""
return [fn.get_frame(frame).lexicalUnits for frame in frame_set]
def get_valence_patterns(frame):
patterns = []
for re in frame.group_realizations:
patterns += re.valencePatterns
return patterns
def invert_preps(valences):
returned = dict()
for pattern in valences:
if pattern.pt.split("[")[0].lower() == "pp":
if pattern.pt not in returned:
returned[pattern.pt] = []
if pattern.fe not in returned[pattern.pt]:
returned[pattern.pt].append(pattern.fe)
return returned
def build_cxns_for_frame(frame_name, fn, fnb, role_name, pos, filter_value=False):
"""
Takes in:
-frame_name, e.g. "Motion"
-FrameNet object (fn)
-FrameNetBuilder object (fnb)
-"filter_value" boolean: determines if you want to filter valence patterns
-role_name: role to modify in types/tokens
-pos: lexical unit POS to create tokens for (e.g., "V")
TO DO: add PP constructions?
Returns:
-tokens
-types
-VP valences (non-collapsed)
-VP valences (collapsed)
-VP constructions (non-collapsed)
-vP constructions (collapsed)
"""
pos_to_type = dict(V="LexicalVerbType",
N="NounType")
fnb.build_lus_for_frame(frame_name, fn)
frame = fn.get_frame(frame_name)
tokens, types = [], []
tokens = utils.generate_tokens(frame, fn, role_name, pos)
types = utils.generate_types(frame, fn, role_name, pos_to_type[pos])
valence_patterns = get_valence_patterns(frame)
collapsed_valences = collapse_valences_to_cxns(frame)
cxns_all = utils.generate_cxns_from_patterns(valence_patterns, collapsed=False)
cxns_collapsed = utils.generate_cxns_from_patterns(collapsed_valences)
roles = [v.fe for v in frame.individual_valences if v.pt.split("[")[0].lower() == "pp"]
types = invert_preps(frame.individual_valences)
pp = utils.generate_pps_from_roles(roles)
prep_types = utils.generate_general_preps_from_roles(roles)
prepositions = utils.generate_preps_from_types(types, fn)
returned = dict(tokens=tokens,
types=types,
valence_patterns=valence_patterns,
collapsed_valences=collapsed_valences,
cxns_all=cxns_all,
cxns_collapsed=cxns_collapsed,
pp = pp,
prep_types=prep_types,
prepositions = prepositions)
return returned
def find_pattern_frequency(frame, target):
""" Takes in a frame (with lus already built), and a target "Valence" object. Returns the
total frequency of that object in frame, which means:
Total number of annotations across all lus for frame. """
all_valences = all_individual_valences(frame)
#target = all_valences[0]
return sum(i.total for i in all_valences if i==target)
def pattern_across_frames(frames, target):
""" Takes in multiple frames (a list) and finds frequency of target across them. """
returned = dict()
for frame in frames:
returned[frame.name] = find_pattern_frequency(frame, target)
return returned