-
Notifications
You must be signed in to change notification settings - Fork 0
/
define_ontology_to_csv.py
90 lines (72 loc) · 3.44 KB
/
define_ontology_to_csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import ast
import csv
import ollama
from argparse import ArgumentParser
from brain2kg.text2kg.utils.llm_utils import parse_relation_definition
def define_ontology_relations_to_csv(
csv_schema_file_path: str,
few_shot_examples_file_path: str,
ontology_triplets_file_path: str,
model: str = 'llama3.1',
detail_log=True,
) -> None:
# generates a CSV in `schemas/` mapping each relation to its formal definition
ontology_triplets_str = open(ontology_triplets_file_path).read()
ontology_triplets = ast.literal_eval(ontology_triplets_str)
relations = set()
for t in ontology_triplets:
relations.add(t[1])
PROMPT = """
You will be given a list of relational triples in the format of [Subject, Relation, Object] extracted from a predefined neuroscience-domain ontology. For each relation present in the triples, your task is to write a description to express the meaning of the relation. In your answer, please strictly ONLY INCLUDE the relation and description pairs and DO NOT include any other comments, explanations or apologies.
Here are some examples (pay attention to answer structure and format):
{few_shot_examples}
Now please extract relation descriptions given the following triples. Note that the description needs to be general and can be used to describe relations between other entities as well. Pay attention to the order of subject and object entities. ENSURE that the relations in your answer exactly match (exact letters, numbers, characters) the provided Relations.
Triples: {triples}
Relations: {relations}
"""
filled_prompt = PROMPT.format_map(
{
'few_shot_examples': open(few_shot_examples_file_path).read(),
'triples': ontology_triplets,
'relations': relations,
}
)
messages = [{'role': 'user', 'content': filled_prompt}]
completion = ollama.chat(
model=model,
messages=messages,
)['message']['content']
if detail_log:
print('---')
print('PROMPT:')
print(filled_prompt)
print('---')
print('COMPLETION:')
print(completion)
print('---')
relation_definition_dict = parse_relation_definition(completion, relations)
if not relation_definition_dict:
print('Error parsing response.')
# write to CSV
with open(csv_schema_file_path, 'w') as csvfile:
spamwriter = csv.writer(csvfile)
for relation, relation_definition in relation_definition_dict.items():
spamwriter.writerow([relation, relation_definition])
print('Complete!')
if __name__ == '__main__':
parser = ArgumentParser()
parser.add_argument("--csv_schema_file_path", help="Path to CSV schmea")
parser.add_argument("--few_shot_examples_file_path", help='Path to few-shot examples for defining relations')
parser.add_argument("--ontology_triplets_file_path", help="Path to Triplets list of lists, in string form")
parser.add_argument("--model", default="llama3.1", help='Ollama LLM to use')
args = parser.parse_args()
csv_schema_file_path = args.csv_schema_file_path
few_shot_examples_file_path = args.few_shot_examples_file_path
ontology_triplets_file_path = args.ontology_triplets_file_path
model = args.model
define_ontology_relations_to_csv(
csv_schema_file_path,
few_shot_examples_file_path,
ontology_triplets_file_path,
model
)