Skip to content

Commit

Permalink
Don't translate english frases
Browse files Browse the repository at this point in the history
  • Loading branch information
OperKH committed Feb 27, 2024
1 parent 0973aea commit 36259df
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 14 deletions.
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "ai_bot",
"version": "1.2.5",
"version": "1.2.6",
"private": true,
"main": "src/app.ts",
"type": "module",
Expand Down
6 changes: 6 additions & 0 deletions src/bot/commands/mediaTracker.command.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ export class MediaTrackerCommand extends Command {
similarity: number;
};
const chatPhotoMessageRepository = this.dataSource.getRepository(ChatPhotoMessage);
const t1 = performance.now();
const messages = await chatPhotoMessageRepository
.createQueryBuilder('msg')
.select('msg.messageId', 'messageId')
Expand All @@ -66,6 +67,8 @@ export class MediaTrackerCommand extends Command {
matchImageThreshold: this.configService.get('MATCH_IMAGE_THRESHOLD'),
})
.getRawMany<Messages>();
const t2 = performance.now();
console.log(`DB query time: ${Math.round(t2 - t1)} ms`);
// When similar
if (messages.length > 0) {
await ctx.reply('🕵️‍♀️ Здається, я це вже десь бачив...', {
Expand Down Expand Up @@ -110,6 +113,7 @@ export class MediaTrackerCommand extends Command {
similarity: number;
};
const chatPhotoMessageRepository = this.dataSource.getRepository(ChatPhotoMessage);
const t1 = performance.now();
const messages = await chatPhotoMessageRepository
.createQueryBuilder('msg')
.select('msg.messageId', 'messageId')
Expand All @@ -124,6 +128,8 @@ export class MediaTrackerCommand extends Command {
matchImageThreshold: this.configService.get('MATCH_TEXT_THRESHOLD'),
})
.getRawMany<Messages>();
const t2 = performance.now();
console.log(`DB query time: ${Math.round(t2 - t1)} ms`);
// When similar
if (messages.length > 0) {
await ctx.reply('🔎 Ось, що мені вдалось знайти:', {
Expand Down
26 changes: 15 additions & 11 deletions src/services/ai.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -118,18 +118,26 @@ export class AIService {
return new RawImage(new Uint8ClampedArray(data), info.width, info.height, info.channels);
}

async getTextClipEmbedding(text: string): Promise<number[]> {
const tokenizer = await this.getClipTokenizer();
const text_model = await this.getClipTextModel();
async getEnglishTranslation(text: string) {
const isEnglish = /^[a-zA-Z\s\d!"#№$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~]+$/.test(text);
if (isEnglish) return text;
const t1 = performance.now();
const { text: engText } = await googleTranslate(text);
const t2 = performance.now();
console.log(`googleTranslate(${Math.round(t2 - t1)} ms)`, '|', text, '|', engText);
return engText;
}

async getTextClipEmbedding(text: string): Promise<number[]> {
const tokenizer = await this.getClipTokenizer();
const text_model = await this.getClipTextModel();
const engText = await this.getEnglishTranslation(text);
const t1 = performance.now();
const textInputs = tokenizer(engText, { padding: true, truncation: true });
const { text_embeds } = await text_model(textInputs);
const textEmbedding = text_embeds.tolist()[0] as number[];
const t3 = performance.now();
console.log(`textEmbedding(${Math.round(t3 - t2)} ms)`);
const t2 = performance.now();
console.log(`textEmbedding(${Math.round(t2 - t1)} ms)`);
return textEmbedding;
}

Expand Down Expand Up @@ -174,17 +182,13 @@ export class AIService {

async isTextToxic(text: string): Promise<boolean> {
const toxicThreshold = 0.7;
const { text: engText } = await googleTranslate(text);
console.log('googleTranslate', '|', text, '|', engText);
const engText = await this.getEnglishTranslation(text);
const toxicResult = await this.toxicAnalysis(engText);
return !!toxicResult.find(({ score }) => score > toxicThreshold);
}

async getMaxToxicScore(text: string): Promise<number> {
const t1 = performance.now();
const { text: engText } = await googleTranslate(text);
const t2 = performance.now();
console.log(`googleTranslate(${Math.round(t2 - t1)} ms)`, '|', text, '|', engText);
const engText = await this.getEnglishTranslation(text);
const [{ score }] = await this.toxicAnalysis(engText);
return score;
}
Expand Down

0 comments on commit 36259df

Please sign in to comment.