-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #80 from meedan/CV2-4136-yake-model
CV2-4136-yake-mode
- Loading branch information
Showing
26 changed files
with
370 additions
and
76 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
from typing import Dict | ||
import io | ||
import urllib.request | ||
|
||
from lib.model.model import Model | ||
|
||
from lib import schemas | ||
|
||
import yake | ||
|
||
class Model(Model): | ||
def run_yake(self, text: str, | ||
language: str, | ||
max_ngram_size: int, | ||
deduplication_threshold: float, | ||
deduplication_algo: str, | ||
window_size: int, | ||
num_of_keywords: int) -> str: | ||
"""run key word/phrase extraction using Yake library in reference https://github.com/LIAAD/yake | ||
:param text: str | ||
:param language: str | ||
:param max_ngram_size: int | ||
:param deduplication_threshold: float | ||
:param deduplication_algo: str | ||
:param window_size: int | ||
:param num_of_keywords: int | ||
:returns: str | ||
""" | ||
custom_kw_extractor = yake.KeywordExtractor(lan=language, n=max_ngram_size, dedupLim=deduplication_threshold, | ||
dedupFunc=deduplication_algo, windowsSize=window_size, | ||
top=num_of_keywords, features=None) | ||
return {"keywords": custom_kw_extractor.extract_keywords(text)} | ||
|
||
def get_params(self, message: schemas.Message) -> dict: | ||
params = { | ||
"text": message.body.text, | ||
"language": message.body.parameters.get("language", "en"), | ||
"max_ngram_size": message.body.parameters.get("max_ngram_size", 3), | ||
"deduplication_threshold": message.body.parameters.get("deduplication_threshold", 0.25), | ||
"deduplication_algo": message.body.parameters.get("deduplication_algo", 'seqm'), | ||
"window_size": message.body.parameters.get("window_size", 0), | ||
"num_of_keywords": message.body.parameters.get("num_of_keywords", 10) | ||
} | ||
assert params.get("text") is not None | ||
return params | ||
|
||
def process(self, message: schemas.Message) -> schemas.YakeKeywordsResponse: | ||
""" | ||
Generic function for returning the actual response. | ||
""" | ||
keywords = self.run_yake(**self.get_params(message)) | ||
return keywords |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,30 +1,42 @@ | ||
from typing import Any, List, Optional, Union | ||
from pydantic import BaseModel, root_validator | ||
from pydantic import BaseModel, ValidationError | ||
from typing import Any, Dict, List, Optional, Union | ||
|
||
# Output hash values can be of different types. | ||
class GenericItem(BaseModel): | ||
id: str | ||
callback_url: Optional[str] = None | ||
url: Optional[str] = None | ||
text: Optional[str] = None | ||
raw: Optional[dict] = {} | ||
|
||
class MediaItem(GenericItem): | ||
class MediaResponse(BaseModel): | ||
hash_value: Optional[Any] = None | ||
|
||
class VideoItem(MediaItem): | ||
class VideoResponse(MediaResponse): | ||
folder: Optional[str] = None | ||
filepath: Optional[str] = None | ||
|
||
class YakeKeywordsResponse(BaseModel): | ||
keywords: Optional[List[List[Union[str, float]]]] = None | ||
|
||
class GenericItem(BaseModel): | ||
id: Union[str, int, float] | ||
callback_url: Optional[str] = None | ||
url: Optional[str] = None | ||
text: Optional[str] = None | ||
raw: Optional[Dict] = {} | ||
parameters: Optional[Dict] = {} | ||
result: Optional[Union[MediaResponse, VideoResponse, YakeKeywordsResponse]] = None | ||
|
||
class Message(BaseModel): | ||
body: Union[MediaItem, VideoItem] | ||
body: GenericItem | ||
model_name: str | ||
@root_validator(pre=True) | ||
def set_body(cls, values): | ||
body = values.get("body") | ||
model_name = values.get("model_name") | ||
if model_name == "video__Model": | ||
values["body"] = VideoItem(**values["body"]).dict() | ||
if model_name in ["audio__Model", "image__Model", "fptg__Model", "indian_sbert__Model", "mean_tokens__Model", "fasttext__Model"]: | ||
values["body"] = MediaItem(**values["body"]).dict() | ||
return values | ||
|
||
def parse_message(message_data: Dict) -> Message: | ||
body_data = message_data['body'] | ||
model_name = message_data['model_name'] | ||
result_data = body_data.get('result', {}) | ||
if 'yake_keywords' in model_name: | ||
result_instance = YakeKeywordsResponse(**result_data) | ||
elif 'video' in model_name: | ||
result_instance = VideoResponse(**result_data) | ||
else: | ||
result_instance = MediaResponse(**result_data) | ||
if 'result' in body_data: | ||
del body_data['result'] | ||
body_instance = GenericItem(**body_data) | ||
body_instance.result = result_instance | ||
message_instance = Message(body=body_instance, model_name=model_name) | ||
return message_instance |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.