Skip to content

Commit

Permalink
added notebook to search audio
Browse files Browse the repository at this point in the history
  • Loading branch information
plon-Susk7 committed Oct 22, 2024
1 parent 690d48a commit fc0b6b6
Show file tree
Hide file tree
Showing 5 changed files with 330 additions and 1 deletion.
1 change: 1 addition & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ services:
ports:
- 7000:7000
- 5678:5678
- 8888:8888
command: tail -f /dev/null
depends_on:
store:
Expand Down
1 change: 1 addition & 0 deletions src/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ COPY --chown=python:python requirements.txt /usr/app/requirements.txt
RUN pip install --no-cache-dir --require-hashes --no-deps -r requirements.txt
COPY --chown=python:python . /usr/app
EXPOSE 7000
EXPOSE 8888

# RUN apt-get update \
# && apt-get -y upgrade \
Expand Down
2 changes: 1 addition & 1 deletion src/core/store/es_vec_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@
},
"audio_vec": {
"type":"dense_vector",
"dims": 2048
"dims": 512
},
"date_added": {
"type": "date"
Expand Down
17 changes: 17 additions & 0 deletions src/notebooks/audio-config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
store:
entities:
- label: "Data Store"
type: "es_vec"
parameters:
host_name: "es"
image_index_name: "image"
text_index_name: "text"
video_index_name: "video"
audio_index_name: "audio"

operators :
label : "Operators"
parameters :
- name : "Audio Vector Representation"
type : "audio_vec_embedding_clap"
parameters: { index_name : "audio" }
310 changes: 310 additions & 0 deletions src/notebooks/search_similar_audios.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,310 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "1e091bae-5d04-42a3-ac01-0982ff289fca",
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "1c04f4af-7a6a-47e4-9dd9-218749c39ecf",
"metadata": {},
"outputs": [],
"source": [
"sys.path.append(os.path.abspath('../../app'))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "5493ef2f-9f6f-46c6-aff3-25c01569409b",
"metadata": {},
"outputs": [],
"source": [
"from core.feluda import Feluda,ComponentType\n",
"from core.models.media import MediaType\n",
"from core.models.media_factory import AudioFactory\n",
"from datetime import datetime"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "01d81101-99d5-49a6-96b5-3c4d94d0886e",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/app/venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"audio CLAP Model successfully initialized and loaded onto cpu\n"
]
}
],
"source": [
"feluda = Feluda(\"audio-config.yml\")\n",
"feluda.setup()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "bf99edf8-00af-434a-8ad2-01f2c52cac70",
"metadata": {},
"outputs": [],
"source": [
"if feluda.config.store:\n",
" feluda.start_component(ComponentType.STORE)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "69589541-38d3-41fb-ae05-2898ae507b1e",
"metadata": {},
"outputs": [],
"source": [
"audio_urls = [\n",
" \"https://raw.githubusercontent.com/tattle-made/feluda/main/src/core/operators/sample_data/en_speech.wav\",\n",
" \"https://raw.githubusercontent.com/tattle-made/feluda/main/src/core/operators/sample_data/hi_speech.wav\",\n",
" \"https://raw.githubusercontent.com/tattle-made/feluda/main/src/core/operators/sample_data/ta_speech.wav\"\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "1591b43b-3580-4d5a-b1de-649e10d95f7b",
"metadata": {},
"outputs": [],
"source": [
"\n",
"def generate_document(post_id: str, representation: any):\n",
" # This is specific to clip operator\n",
" base_doc = {\n",
" \"e_kosh_id\": \"\",\n",
" \"dataset\": post_id,\n",
" \"metadata\": None,\n",
" \"audio_vec\" : representation,\n",
" \"date_added\":datetime.utcnow(),\n",
" }\n",
"\n",
" return base_doc\n",
"\n",
"def store_audio(audio_url):\n",
" filename = audio_url.split('/')[-1]\n",
" \n",
" audio = AudioFactory.make_from_url(audio_url)\n",
" operator = feluda.operators.get()[feluda.config.operators.parameters[0].type]\n",
" embedding = operator.run(audio)\n",
"\n",
" if feluda.store:\n",
" doc = generate_document(filename,embedding)\n",
" media_type = MediaType.AUDIO\n",
" result = feluda.store[feluda.config.store.entities[0].type].store(media_type,doc)\n",
" return(\"result:\",result)\n",
" else:\n",
" raise Exception(\"Store is not Configured\")\n",
"\n",
"def search_audio(audio_url):\n",
" audio = AudioFactory.make_from_url(audio_url)\n",
" operator = feluda.operators.get()[feluda.config.operators.parameters[0].type]\n",
" embedding = operator.run(audio)\n",
" # average_vector = next(embedding)\n",
"\n",
" if feluda.store:\n",
" result = feluda.store[feluda.config.store.entities[0].type].find(\"audio\",embedding)\n",
" return result\n",
" else:\n",
" raise Exception(\"Store is not Configured\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "ca261f64-0693-4830-88ed-5d7a70693d3b",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"It is strongly recommended to pass the `sampling_rate` argument to this function. Failing to do so can result in silent errors that might be hard to debug.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading audio from URL\n",
"100% [............................................................................] 131406 / 131406\n",
"Audio downloaded\n",
"Downloading audio from URL\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"It is strongly recommended to pass the `sampling_rate` argument to this function. Failing to do so can result in silent errors that might be hard to debug.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"100% [............................................................................] 119758 / 119758\n",
"Audio downloaded\n",
"Downloading audio from URL\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"It is strongly recommended to pass the `sampling_rate` argument to this function. Failing to do so can result in silent errors that might be hard to debug.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"100% [............................................................................] 118158 / 118158\n",
"Audio downloaded\n"
]
}
],
"source": [
"# Storing audio files\n",
"\n",
"for url in audio_urls:\n",
" store_audio(url)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "f9a9c1f0-e343-48b2-b558-5aa5c4bbcce1",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"It is strongly recommended to pass the `sampling_rate` argument to this function. Failing to do so can result in silent errors that might be hard to debug.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading audio from URL\n",
"100% [............................................................................] 119758 / 119758\n",
"Audio downloaded\n",
"calculation: 1 / (1 + l2norm(params.query_vector, 'audio_vec'))\n"
]
},
{
"data": {
"text/plain": [
"[{'doc_id': 'IpvDtJIBWjgd5dmgGFTR',\n",
" 'dist': 1.0,\n",
" 'dataset': 'hi_speech.wav',\n",
" 'e_kosh_id': '',\n",
" 'text': None,\n",
" 'metadata': None},\n",
" {'doc_id': 'I5vDtJIBWjgd5dmgG1RZ',\n",
" 'dist': 0.5291093,\n",
" 'dataset': 'ta_speech.wav',\n",
" 'e_kosh_id': '',\n",
" 'text': None,\n",
" 'metadata': None},\n",
" {'doc_id': 'HZu-tJIBWjgd5dmgY1S7',\n",
" 'dist': 0.44020036,\n",
" 'dataset': 'audio.wav',\n",
" 'e_kosh_id': '',\n",
" 'text': None,\n",
" 'metadata': None},\n",
" {'doc_id': 'HpvAtJIBWjgd5dmgt1Rb',\n",
" 'dist': 0.41548398,\n",
" 'dataset': 'en_speech.wav',\n",
" 'e_kosh_id': '',\n",
" 'text': None,\n",
" 'metadata': None},\n",
" {'doc_id': 'H5vCtJIBWjgd5dmgJVTw',\n",
" 'dist': 0.41548398,\n",
" 'dataset': 'en_speech.wav',\n",
" 'e_kosh_id': '',\n",
" 'text': None,\n",
" 'metadata': None},\n",
" {'doc_id': 'IJvCtJIBWjgd5dmguFR5',\n",
" 'dist': 0.41548398,\n",
" 'dataset': 'en_speech.wav',\n",
" 'e_kosh_id': '',\n",
" 'text': None,\n",
" 'metadata': None},\n",
" {'doc_id': 'IZvDtJIBWjgd5dmgFlR2',\n",
" 'dist': 0.41548398,\n",
" 'dataset': 'en_speech.wav',\n",
" 'e_kosh_id': '',\n",
" 'text': None,\n",
" 'metadata': None}]"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Searching for hindi speech\n",
"\n",
"search_audio(audio_urls[1])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8fa416ed-7c52-4ef8-858a-635348ab4c2f",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit fc0b6b6

Please sign in to comment.