added notebook to search audio

tattle-made · Oct 22, 2024 · fc0b6b6 · fc0b6b6
1 parent 690d48a
commit fc0b6b6
Show file tree

Hide file tree

Showing 5 changed files with 330 additions and 1 deletion.
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -57,6 +57,7 @@ services:
     ports:
       - 7000:7000
       - 5678:5678
+      - 8888:8888
     command: tail -f /dev/null
     depends_on:
       store:

diff --git a/src/Dockerfile b/src/Dockerfile
@@ -39,6 +39,7 @@ COPY --chown=python:python requirements.txt /usr/app/requirements.txt
 RUN pip install --no-cache-dir --require-hashes --no-deps -r requirements.txt
 COPY --chown=python:python . /usr/app
 EXPOSE 7000
+EXPOSE 8888
 
 # RUN apt-get update \
 #     && apt-get -y upgrade \

diff --git a/src/core/store/es_vec_mappings.py b/src/core/store/es_vec_mappings.py
@@ -101,7 +101,7 @@
                         },
                         "audio_vec": {
                             "type":"dense_vector",
-                            "dims": 2048
+                            "dims": 512
                         },
                         "date_added": {
                             "type": "date"

diff --git a/src/notebooks/audio-config.yml b/src/notebooks/audio-config.yml
@@ -0,0 +1,17 @@
+store:
+  entities:
+    - label: "Data Store"
+      type: "es_vec"
+      parameters:
+        host_name: "es"
+        image_index_name: "image"
+        text_index_name: "text"
+        video_index_name: "video"
+        audio_index_name: "audio"
+
+operators : 
+  label : "Operators"
+  parameters :
+    - name : "Audio Vector Representation"
+      type : "audio_vec_embedding_clap"
+      parameters: { index_name : "audio" }
diff --git a/src/notebooks/search_similar_audios.ipynb b/src/notebooks/search_similar_audios.ipynb
@@ -0,0 +1,310 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "1e091bae-5d04-42a3-ac01-0982ff289fca",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "1c04f4af-7a6a-47e4-9dd9-218749c39ecf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sys.path.append(os.path.abspath('../../app'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "5493ef2f-9f6f-46c6-aff3-25c01569409b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from core.feluda import Feluda,ComponentType\n",
+    "from core.models.media import MediaType\n",
+    "from core.models.media_factory import AudioFactory\n",
+    "from datetime import datetime"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "01d81101-99d5-49a6-96b5-3c4d94d0886e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/app/venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "audio CLAP Model successfully initialized and loaded onto cpu\n"
+     ]
+    }
+   ],
+   "source": [
+    "feluda = Feluda(\"audio-config.yml\")\n",
+    "feluda.setup()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "bf99edf8-00af-434a-8ad2-01f2c52cac70",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if feluda.config.store:\n",
+    "    feluda.start_component(ComponentType.STORE)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "69589541-38d3-41fb-ae05-2898ae507b1e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "audio_urls = [\n",
+    "    \"https://raw.githubusercontent.com/tattle-made/feluda/main/src/core/operators/sample_data/en_speech.wav\",\n",
+    "    \"https://raw.githubusercontent.com/tattle-made/feluda/main/src/core/operators/sample_data/hi_speech.wav\",\n",
+    "    \"https://raw.githubusercontent.com/tattle-made/feluda/main/src/core/operators/sample_data/ta_speech.wav\"\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "1591b43b-3580-4d5a-b1de-649e10d95f7b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "def generate_document(post_id: str, representation: any):\n",
+    "    # This is specific to clip operator\n",
+    "    base_doc = {\n",
+    "        \"e_kosh_id\": \"\",\n",
+    "        \"dataset\": post_id,\n",
+    "        \"metadata\": None,\n",
+    "        \"audio_vec\" : representation,\n",
+    "        \"date_added\":datetime.utcnow(),\n",
+    "    }\n",
+    "\n",
+    "    return base_doc\n",
+    "\n",
+    "def store_audio(audio_url):\n",
+    "    filename = audio_url.split('/')[-1]\n",
+    "    \n",
+    "    audio = AudioFactory.make_from_url(audio_url)\n",
+    "    operator = feluda.operators.get()[feluda.config.operators.parameters[0].type]\n",
+    "    embedding = operator.run(audio)\n",
+    "\n",
+    "    if feluda.store:\n",
+    "        doc = generate_document(filename,embedding)\n",
+    "        media_type = MediaType.AUDIO\n",
+    "        result = feluda.store[feluda.config.store.entities[0].type].store(media_type,doc)\n",
+    "        return(\"result:\",result)\n",
+    "    else:\n",
+    "        raise Exception(\"Store is not Configured\")\n",
+    "\n",
+    "def search_audio(audio_url):\n",
+    "    audio = AudioFactory.make_from_url(audio_url)\n",
+    "    operator = feluda.operators.get()[feluda.config.operators.parameters[0].type]\n",
+    "    embedding = operator.run(audio)\n",
+    "    # average_vector = next(embedding)\n",
+    "\n",
+    "    if feluda.store:\n",
+    "        result = feluda.store[feluda.config.store.entities[0].type].find(\"audio\",embedding)\n",
+    "        return result\n",
+    "    else:\n",
+    "        raise Exception(\"Store is not Configured\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "ca261f64-0693-4830-88ed-5d7a70693d3b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "It is strongly recommended to pass the `sampling_rate` argument to this function. Failing to do so can result in silent errors that might be hard to debug.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading audio from URL\n",
+      "100% [............................................................................] 131406 / 131406\n",
+      "Audio downloaded\n",
+      "Downloading audio from URL\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "It is strongly recommended to pass the `sampling_rate` argument to this function. Failing to do so can result in silent errors that might be hard to debug.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "100% [............................................................................] 119758 / 119758\n",
+      "Audio downloaded\n",
+      "Downloading audio from URL\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "It is strongly recommended to pass the `sampling_rate` argument to this function. Failing to do so can result in silent errors that might be hard to debug.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "100% [............................................................................] 118158 / 118158\n",
+      "Audio downloaded\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Storing audio files\n",
+    "\n",
+    "for url in audio_urls:\n",
+    "    store_audio(url)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "f9a9c1f0-e343-48b2-b558-5aa5c4bbcce1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "It is strongly recommended to pass the `sampling_rate` argument to this function. Failing to do so can result in silent errors that might be hard to debug.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading audio from URL\n",
+      "100% [............................................................................] 119758 / 119758\n",
+      "Audio downloaded\n",
+      "calculation: 1 / (1 + l2norm(params.query_vector, 'audio_vec'))\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[{'doc_id': 'IpvDtJIBWjgd5dmgGFTR',\n",
+       "  'dist': 1.0,\n",
+       "  'dataset': 'hi_speech.wav',\n",
+       "  'e_kosh_id': '',\n",
+       "  'text': None,\n",
+       "  'metadata': None},\n",
+       " {'doc_id': 'I5vDtJIBWjgd5dmgG1RZ',\n",
+       "  'dist': 0.5291093,\n",
+       "  'dataset': 'ta_speech.wav',\n",
+       "  'e_kosh_id': '',\n",
+       "  'text': None,\n",
+       "  'metadata': None},\n",
+       " {'doc_id': 'HZu-tJIBWjgd5dmgY1S7',\n",
+       "  'dist': 0.44020036,\n",
+       "  'dataset': 'audio.wav',\n",
+       "  'e_kosh_id': '',\n",
+       "  'text': None,\n",
+       "  'metadata': None},\n",
+       " {'doc_id': 'HpvAtJIBWjgd5dmgt1Rb',\n",
+       "  'dist': 0.41548398,\n",
+       "  'dataset': 'en_speech.wav',\n",
+       "  'e_kosh_id': '',\n",
+       "  'text': None,\n",
+       "  'metadata': None},\n",
+       " {'doc_id': 'H5vCtJIBWjgd5dmgJVTw',\n",
+       "  'dist': 0.41548398,\n",
+       "  'dataset': 'en_speech.wav',\n",
+       "  'e_kosh_id': '',\n",
+       "  'text': None,\n",
+       "  'metadata': None},\n",
+       " {'doc_id': 'IJvCtJIBWjgd5dmguFR5',\n",
+       "  'dist': 0.41548398,\n",
+       "  'dataset': 'en_speech.wav',\n",
+       "  'e_kosh_id': '',\n",
+       "  'text': None,\n",
+       "  'metadata': None},\n",
+       " {'doc_id': 'IZvDtJIBWjgd5dmgFlR2',\n",
+       "  'dist': 0.41548398,\n",
+       "  'dataset': 'en_speech.wav',\n",
+       "  'e_kosh_id': '',\n",
+       "  'text': None,\n",
+       "  'metadata': None}]"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Searching for hindi speech\n",
+    "\n",
+    "search_audio(audio_urls[1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8fa416ed-7c52-4ef8-858a-635348ab4c2f",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}