diff --git a/rag-langchain-python/vector_query_retriever.py b/rag-langchain-python/vector_query_retriever.py index 548a05f..02eb7dd 100644 --- a/rag-langchain-python/vector_query_retriever.py +++ b/rag-langchain-python/vector_query_retriever.py @@ -34,6 +34,7 @@ def _build_eval_script(self, query, query_embedding): null, {{'scoreMethod': 'score-bm25'}} ) .limit(100) + .bind(op.as('transcript', op.xpath('doc', '/transcript'))) .joinInner( op.fromView('example','events', '', op.fragmentIdCol('vectorsDocId')), op.on( @@ -45,6 +46,7 @@ def _build_eval_script(self, query, query_embedding): op.vec.vector(op.col('embedding')), op.vec.vector(vec.vector({})) ))) + .select(['uri', 'transcript', 'similarity']) .orderBy(op.desc(op.col('similarity'))) .limit(10) .result() @@ -56,9 +58,10 @@ def _build_eval_script(self, query, query_embedding): def _get_relevant_documents(self, query: str) -> List[Document]: query_embedding = self.embedding_generator.embed_query(query) eval_script = self._build_eval_script(query, query_embedding) - results = self.client.eval(javascript=eval_script) + optic_rows = self.client.eval(javascript=eval_script) + print(optic_rows[1].keys()) - print(f"Count of MarkLogic documents sent to the LLM: {len(results)}") - for result in results: - print(f"URI: {result['uri']}") - return map(lambda doc: Document(page_content=doc["text"]), results) + print(f"Count of MarkLogic chunks sent to the LLM: {len(optic_rows)}") + for optic_row in optic_rows: + print(f"URI: {optic_row['uri']}") + return map(lambda optic_row: Document(page_content=optic_row["transcript"]), optic_rows) diff --git a/setup/src/main/ml-schemas-12/tde/events.json b/setup/src/main/ml-schemas-12/tde/events.json index 164dd35..1eb8df5 100644 --- a/setup/src/main/ml-schemas-12/tde/events.json +++ b/setup/src/main/ml-schemas-12/tde/events.json @@ -20,11 +20,6 @@ "val": "vec:vector(embedding)", "dimension": "1536", "invalidValues": "reject" - }, - { - "name": "text", - "scalarType": "string", - "val": "transcript" } ] }