Skip to content

Commit

Permalink
Add search api
Browse files Browse the repository at this point in the history
  • Loading branch information
zensh committed Jul 20, 2023
1 parent a7dbbf2 commit d1073ff
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 3 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "webscraper",
"version": "0.4.0",
"version": "0.4.1",
"description": "",
"private": true,
"main": "dist/main.js",
Expand Down
20 changes: 20 additions & 0 deletions src/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,26 @@ export function healthzAPI(ctx: Context): void {
}
}

export async function searchAPI(ctx: Context): Promise<void> {
const db = ctx.app.context.db as cassandra.Client
const { url } = ctx.request.query

if (!isValidUrl(url)) {
ctx.throw(400, format('Invalid scraping URL: %s', url))
}

const doc = await DocumentModel.findLatest(db, url as string)
if (doc.row.title != null && doc.row.title != "") {
try {
await doc.fill(db, ['src', 'meta', 'content'])
} catch (_) { }
}

ctx.body = {
result: doc.row
}
}

export async function scrapingAPI(ctx: Context): Promise<void> {
const db = ctx.app.context.db as cassandra.Client
const { url } = ctx.request.query
Expand Down
3 changes: 2 additions & 1 deletion src/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { encode } from 'cbor-x'

import { LogLevel, createLog, writeLog } from './log.js'
import { connect } from './db/scylladb.js'
import { versionAPI, healthzAPI, scrapingAPI, documentAPI } from './api.js'
import { versionAPI, healthzAPI, scrapingAPI, searchAPI, documentAPI } from './api.js'

const GZIP_MIN_LENGTH = 128

Expand All @@ -19,6 +19,7 @@ export async function initApp(app: Koa): Promise<void> {
router.get('/', versionAPI)
router.get('/healthz', healthzAPI)
router.get('/v1/scraping', scrapingAPI)
router.get('/v1/search', searchAPI)
router.get('/v1/document', documentAPI)

app.use(router.routes())
Expand Down
2 changes: 1 addition & 1 deletion src/db/model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ export class DocumentModel {
}

get isFresh(): boolean {
return this.row.title !== '' && this.row.id.timestamp() > (Date.now() / 1000 - 24 * 3600)
return this.row.title !== '' && this.row.id.timestamp() > (Date.now() / 1000 - 3 * 24 * 3600)
}

toJSON(): Document {
Expand Down

0 comments on commit d1073ff

Please sign in to comment.