Skip to content

Commit

Permalink
Get the transcript from the document instead of the view.
Browse files Browse the repository at this point in the history
Just trying to accomplish something today.
  • Loading branch information
BillFarber committed Oct 16, 2024
1 parent 791b570 commit 4707e54
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 10 deletions.
13 changes: 8 additions & 5 deletions rag-langchain-python/vector_query_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def _build_eval_script(self, query, query_embedding):
null, {{'scoreMethod': 'score-bm25'}}
)
.limit(100)
.bind(op.as('transcript', op.xpath('doc', '/transcript')))
.joinInner(
op.fromView('example','events', '', op.fragmentIdCol('vectorsDocId')),
op.on(
Expand All @@ -45,6 +46,7 @@ def _build_eval_script(self, query, query_embedding):
op.vec.vector(op.col('embedding')),
op.vec.vector(vec.vector({}))
)))
.select(['uri', 'transcript', 'similarity'])
.orderBy(op.desc(op.col('similarity')))
.limit(10)
.result()
Expand All @@ -56,9 +58,10 @@ def _build_eval_script(self, query, query_embedding):
def _get_relevant_documents(self, query: str) -> List[Document]:
query_embedding = self.embedding_generator.embed_query(query)
eval_script = self._build_eval_script(query, query_embedding)
results = self.client.eval(javascript=eval_script)
optic_rows = self.client.eval(javascript=eval_script)
print(optic_rows[1].keys())

print(f"Count of MarkLogic documents sent to the LLM: {len(results)}")
for result in results:
print(f"URI: {result['uri']}")
return map(lambda doc: Document(page_content=doc["text"]), results)
print(f"Count of MarkLogic chunks sent to the LLM: {len(optic_rows)}")
for optic_row in optic_rows:
print(f"URI: {optic_row['uri']}")
return map(lambda optic_row: Document(page_content=optic_row["transcript"]), optic_rows)
5 changes: 0 additions & 5 deletions setup/src/main/ml-schemas-12/tde/events.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,6 @@
"val": "vec:vector(embedding)",
"dimension": "1536",
"invalidValues": "reject"
},
{
"name": "text",
"scalarType": "string",
"val": "transcript"
}
]
}
Expand Down

0 comments on commit 4707e54

Please sign in to comment.