diff --git a/dist/app.js b/dist/app.js index 82be4d6..7c6f54c 100644 --- a/dist/app.js +++ b/dist/app.js @@ -4,7 +4,7 @@ import { encode } from 'cborg'; import { LogLevel, createLog, writeLog } from './log.js'; import { connect } from './db/scylladb.js'; import { healthzAPI, scrapingAPI, searchAPI, documentAPI, convertingAPI, } from './api.js'; -import { renderIndex, renderPublication, renderGroup } from './ssr.js'; +import { renderIndex, renderPublication, renderGroup, renderCollection, } from './ssr.js'; const GZIP_MIN_LENGTH = 128; export async function initApp(app) { // attach stateful components to the application context @@ -18,8 +18,13 @@ export async function initApp(app) { router.get('/v1/search', searchAPI); router.get('/v1/document', documentAPI); router.post('/v1/converting', convertingAPI); - router.get('/pub/:id', renderPublication); - router.get('/group/:id', renderGroup); + router.get('/pub/:cid', renderPublication); + router.get('/group/:gid', renderGroup); + router.get('/group/:gid/collection', renderCollection); + router.all('/:other+', (ctx) => { + ctx.redirect('/'); + ctx.status = 307; + }); app.use(router.routes()); app.use(router.allowedMethods()); } diff --git a/dist/ssr.js b/dist/ssr.js index 4d1e7c7..7c082be 100644 --- a/dist/ssr.js +++ b/dist/ssr.js @@ -9,8 +9,9 @@ import { toHTML } from './tiptap.js'; import { lang639_3, isRTL } from './lang.js'; const ZeroID = Xid.default().toString(); const indexTpl = readFileSync('./html/index.html', 'utf-8'); -const publicationTpl = readFileSync('./html/publication.html', 'utf-8'); const groupTpl = readFileSync('./html/group.html', 'utf-8'); +const publicationTpl = readFileSync('./html/publication.html', 'utf-8'); +const collectionTpl = readFileSync('./html/collection.html', 'utf-8'); const siteBase = config.get('siteBase'); const writingBase = config.get('writingBase'); const userBase = config.get('userBase'); @@ -44,9 +45,9 @@ export async function renderIndex(ctx) { const headers = ctxHeaders(ctx); const $ = cheerio.load(indexTpl); const lang = headers['x-language'] ?? 'eng'; - const info = metaInfos[lang] || metaInfos.eng; - $('title').text(info.title); - $('meta[name="description"]').prop('content', info.desc); + const siteInfo = metaInfos[lang] || metaInfos.eng; + $('title').text(siteInfo.title); + $('meta[name="description"]').prop('content', siteInfo.desc); try { await Promise.all([ (async () => { @@ -72,40 +73,49 @@ export async function renderIndex(ctx) { } export async function renderPublication(ctx) { const headers = ctxHeaders(ctx); - const cid = ctx.params.id; + const cid = ctx.params.cid; const { gid, language } = ctx.query; const $ = cheerio.load(publicationTpl); const lang = headers['x-language'] ?? 'eng'; - const info = metaInfos[lang]; - if (info) { - $('title').text(info.title); - $('meta[name="description"]').prop('content', info.desc); + const siteInfo = metaInfos[lang]; + if (siteInfo) { + $('title').text(siteInfo.title); + $('meta[name="description"]').prop('content', siteInfo.desc); } try { - const docs = await listPublished(headers, Xid.fromValue(cid)); - renderPublicationItems($, docs); const doc = await getPublication(headers, cid, (gid ?? ''), (language ?? lang)); + const docs = await listPublished(headers, Xid.fromValue(cid)); + renderPublicationItems($, docs.filter((item) => item.language !== doc.language)); const docUrl = `${siteBase}/pub/${Xid.fromValue(doc.cid).toString()}`; const groupUrl = `${siteBase}/group/${Xid.fromValue(doc.gid).toString()}`; $('html').prop('lang', doc.language); if (isRTL(doc.language)) { $('html').prop('dir', 'rtl'); } - $('meta[property="og:title"]').prop('content', doc.title); $('meta[property="og:url"]').prop('content', docUrl); + $('meta[property="og:title"]').prop('content', doc.title); + if (doc.summary) { + $('meta[property="og:description"]').prop('content', doc.summary); + } $('#title').text(doc.title); - const authors = $('#authors'); - authors.prop('href', groupUrl); - authors.text(groupUrl); - if (doc.authors != null && doc.authors.length > 0) { - authors.text(doc.authors.join(', ')); + if (doc.summary) { + $('#summary').text(doc.summary); + } + if (doc.authors) { + doc.authors.forEach((author) => $(`${author}`).appendTo(`#authors`)); } + if (doc.keywords) { + doc.keywords.forEach((keyword) => $(`${keyword}`).appendTo(`#keywords`)); + } + const groupInfo = $('#group'); + groupInfo.prop('href', groupUrl); + groupInfo.text(`Group: ${groupUrl}`); const updated_at = new Date(doc.updated_at).toUTCString(); - $('#updated_time').text(updated_at); - $('#version').text(doc.version.toString()); + $('#updated_time').text(`Updated: ${updated_at}`); + $('#version').text(`Version: ${doc.version}`); const content = decode(doc.content); let contentHtml = toHTML(content) + - `\n

${docUrl}

`; + `\n

Permalink: ${docUrl}

`; if (doc.rfp?.creation) { contentHtml += `\n

Request For Payment, Price: ${doc.rfp.creation.price} WEN

`; } @@ -123,32 +133,97 @@ export async function renderPublication(ctx) { ctx.type = 'text/html'; ctx.body = $.html(); } +export async function renderCollection(ctx) { + const headers = ctxHeaders(ctx); + // const gid = ctx.params.gid as string + const { cid: _cid } = ctx.query; + const $ = cheerio.load(collectionTpl); + const lang = headers['x-language'] ?? 'eng'; + const siteInfo = metaInfos[lang]; + if (siteInfo) { + $('title').text(siteInfo.title); + $('meta[name="description"]').prop('content', siteInfo.desc); + } + try { + const doc = await getCollection(headers, _cid); + const [language, info] = getCollectionInfo(doc, lang) ?? []; + if (!info || !language) { + throw createError(404, 'collection not found'); + } + const gid = Xid.fromValue(doc.gid); + const cid = Xid.fromValue(doc.id); + const groupUrl = `${siteBase}/group/${gid.toString()}`; + const docUrl = `${groupUrl}?cid=${cid.toString()}`; + $('html').prop('lang', language); + if (isRTL(language)) { + $('html').prop('dir', 'rtl'); + } + $('meta[property="og:url"]').prop('content', docUrl); + $('meta[property="og:title"]').prop('content', info.title); + if (info.summary) { + $('meta[property="og:description"]').prop('content', info.summary); + } + $('#title').text(info.title); + if (info.summary) { + $('#summary').text(info.summary); + } + if (info.authors) { + info.authors.forEach((author) => $(`${author}`).appendTo(`#authors`)); + } + if (info.keywords) { + info.keywords.forEach((keyword) => $(`${keyword}`).appendTo(`#keywords`)); + } + const groupInfo = $('#group'); + groupInfo.prop('href', groupUrl); + groupInfo.text(`Group: ${groupUrl}`); + const updated_at = new Date(doc.updated_at).toUTCString(); + $('#updated_time').text(`Updated: ${updated_at}`); + ctx.set('last-modified', updated_at); + try { + const docs = await listCollectionChildren(headers, cid); + renderCollectionChildrenItems($, docs); + } + catch (err) { + ignoreError(); + } + } + catch (err) { + ctx.status = 404; + const url = ctx.get('x-request-url'); + if (url !== '') { + $('#content').text(url + ' not found'); + } + } + ctx.vary('Accept-Language'); + ctx.type = 'text/html'; + ctx.body = $.html(); +} export async function renderGroup(ctx) { const headers = ctxHeaders(ctx); - const _gid = ctx.params.id; + const gid = ctx.params.gid; const $ = cheerio.load(groupTpl); const lang = headers['x-language'] ?? 'eng'; - const info = metaInfos[lang]; - if (info) { - $('title').text(info.title); - $('meta[name="description"]').prop('content', info.desc); + const siteInfo = metaInfos[lang]; + if (siteInfo) { + $('title').text(siteInfo.title); + $('meta[name="description"]').prop('content', siteInfo.desc); } try { - const group = await getGroup(headers, _gid); - const gid = Xid.fromValue(group.id); + const group = await getGroup(headers, gid); + const xGid = Xid.fromValue(group.id); const groupUrl = `${siteBase}/group/${gid.toString()}`; + $('meta[property="og:url"]').prop('content', groupUrl); $('meta[property="og:title"]').prop('content', group.name); $('meta[property="og:description"]').prop('content', group.slogan); - $('meta[property="og:url"]').prop('content', groupUrl); $('#group_name').text(group.name); $('#group_slogan').text(group.slogan); await Promise.all([ (async () => { - const docs = await listCollections(headers, gid); + const docs = await listCollections(headers, xGid); renderCollectionItems($, docs, lang); })().catch(ignoreError), (async () => { - const docs = await listPublications(headers, gid); + const docs = await listPublications(headers, xGid); renderPublicationItems($, docs); })().catch(ignoreError), ]); @@ -199,6 +274,25 @@ function renderCollectionItems($, docs, lang) { }) .filter((item) => !!item)); } +function renderCollectionChildrenItems($, docs) { + renderList($, 'children', docs + .map((doc) => { + if (doc.kind == 2) + return null; // collection + const cid = Xid.fromValue(doc.cid).toString(); + const gid = Xid.fromValue(doc.gid).toString(); + return { + id: `${gid}-${cid}`, + url: `${siteBase}/pub/${cid}?gid=${gid}`, + title: doc.title, + language: doc.language, + summary: doc.summary ?? '', + keywords: doc.keywords, + authors: doc.authors, + }; + }) + .filter((item) => !!item)); +} function renderList($, ulId, items) { const ul = $('#' + ulId); for (const item of items) { @@ -339,6 +433,22 @@ async function listLatestPublications(headers) { const obj = decode(Buffer.from(data)); return obj.result; } +async function getCollection(headers, cid) { + const api = new URL('/v1/collection', writingBase); + api.searchParams.append('gid', '000000000000000anon0'); + api.searchParams.append('id', cid); + api.searchParams.append('fields', 'info,updated_at'); + headers.accept = 'application/cbor'; + const res = await fetch(api, { + headers, + }); + if (res.status !== 200) { + throw createError(res.status, await res.text()); + } + const data = await res.arrayBuffer(); + const obj = decode(Buffer.from(data)); + return obj.result; +} async function listCollections(headers, gid) { const api = new URL('/v1/collection/list', writingBase); headers.accept = 'application/cbor'; @@ -378,6 +488,26 @@ async function listLatestCollections(headers) { const obj = decode(Buffer.from(data)); return obj.result; } +async function listCollectionChildren(headers, id) { + const api = new URL('/v1/collection/list_children', writingBase); + headers.accept = 'application/cbor'; + headers['content-type'] = 'application/cbor'; + const res = await fetch(api, { + method: 'POST', + headers, + body: Buffer.from(encode({ + gid: Xid.default().toBytes(), + id: id.toBytes(), + page_size: 100, + })), + }); + if (res.status !== 200) { + throw createError(res.status, await res.text()); + } + const data = await res.arrayBuffer(); + const obj = decode(Buffer.from(data)); + return obj.result; +} function isXid(id) { try { Xid.parse(id); diff --git a/html/collection.html b/html/collection.html new file mode 100644 index 0000000..0a94ae9 --- /dev/null +++ b/html/collection.html @@ -0,0 +1,34 @@ + + + + + + + + Yiwen — AI-based Translingual Knowledge Content Platform + + + + + + + + + +
+

+
+
+
+
+ + +
+
+ +
+
+ + + \ No newline at end of file diff --git a/html/group.html b/html/group.html index e3036b0..94ef092 100644 --- a/html/group.html +++ b/html/group.html @@ -11,6 +11,8 @@ + + diff --git a/html/publication.html b/html/publication.html index ad9d1ec..10eb2cf 100644 --- a/html/publication.html +++ b/html/publication.html @@ -11,16 +11,21 @@ + +
-
+
    -

    +

    +
    +
    +
    - +
    diff --git a/package.json b/package.json index bf1d025..2f6acee 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "webscraper", - "version": "1.2.0", + "version": "1.2.1", "description": "", "private": true, "main": "dist/main.js", diff --git a/src/app.ts b/src/app.ts index a3fc520..d110489 100644 --- a/src/app.ts +++ b/src/app.ts @@ -13,7 +13,12 @@ import { convertingAPI, } from './api.js' -import { renderIndex, renderPublication, renderGroup } from './ssr.js' +import { + renderIndex, + renderPublication, + renderGroup, + renderCollection, +} from './ssr.js' const GZIP_MIN_LENGTH = 128 @@ -30,8 +35,13 @@ export async function initApp(app: Koa): Promise { router.get('/v1/search', searchAPI) router.get('/v1/document', documentAPI) router.post('/v1/converting', convertingAPI) - router.get('/pub/:id', renderPublication) - router.get('/group/:id', renderGroup) + router.get('/pub/:cid', renderPublication) + router.get('/group/:gid', renderGroup) + router.get('/group/:gid/collection', renderCollection) + router.all('/:other+', (ctx) => { + ctx.redirect('/') + ctx.status = 307 + }) app.use(router.routes()) app.use(router.allowedMethods()) diff --git a/src/ssr.ts b/src/ssr.ts index 78d658a..115292b 100644 --- a/src/ssr.ts +++ b/src/ssr.ts @@ -11,8 +11,10 @@ import { lang639_3, isRTL } from './lang.js' const ZeroID = Xid.default().toString() const indexTpl = readFileSync('./html/index.html', 'utf-8') -const publicationTpl = readFileSync('./html/publication.html', 'utf-8') const groupTpl = readFileSync('./html/group.html', 'utf-8') +const publicationTpl = readFileSync('./html/publication.html', 'utf-8') +const collectionTpl = readFileSync('./html/collection.html', 'utf-8') + const siteBase = config.get('siteBase') const writingBase = config.get('writingBase') const userBase = config.get('userBase') @@ -51,9 +53,9 @@ export async function renderIndex(ctx: Context) { const $ = cheerio.load(indexTpl) const lang = headers['x-language'] ?? 'eng' - const info = metaInfos[lang] || metaInfos.eng - $('title').text(info.title) - $('meta[name="description"]').prop('content', info.desc) + const siteInfo = metaInfos[lang] || metaInfos.eng + $('title').text(siteInfo.title) + $('meta[name="description"]').prop('content', siteInfo.desc) try { await Promise.all([ @@ -82,20 +84,17 @@ export async function renderIndex(ctx: Context) { export async function renderPublication(ctx: Context): Promise { const headers = ctxHeaders(ctx) - const cid = ctx.params.id as string + const cid = ctx.params.cid as string const { gid, language } = ctx.query const $ = cheerio.load(publicationTpl) const lang = headers['x-language'] ?? 'eng' - const info = metaInfos[lang] - if (info) { - $('title').text(info.title) - $('meta[name="description"]').prop('content', info.desc) + const siteInfo = metaInfos[lang] + if (siteInfo) { + $('title').text(siteInfo.title) + $('meta[name="description"]').prop('content', siteInfo.desc) } try { - const docs = await listPublished(headers, Xid.fromValue(cid)) - renderPublicationItems($, docs) - const doc = await getPublication( headers, cid, @@ -103,6 +102,12 @@ export async function renderPublication(ctx: Context): Promise { (language ?? lang) as string ) + const docs = await listPublished(headers, Xid.fromValue(cid)) + renderPublicationItems( + $, + docs.filter((item) => item.language !== doc.language) + ) + const docUrl = `${siteBase}/pub/${Xid.fromValue(doc.cid).toString()}` const groupUrl = `${siteBase}/group/${Xid.fromValue(doc.gid).toString()}` $('html').prop('lang', doc.language) @@ -110,25 +115,39 @@ export async function renderPublication(ctx: Context): Promise { $('html').prop('dir', 'rtl') } - $('meta[property="og:title"]').prop('content', doc.title) $('meta[property="og:url"]').prop('content', docUrl) + $('meta[property="og:title"]').prop('content', doc.title) + if (doc.summary) { + $('meta[property="og:description"]').prop('content', doc.summary) + } $('#title').text(doc.title) - const authors = $('#authors') - authors.prop('href', groupUrl) - authors.text(groupUrl) - if (doc.authors != null && doc.authors.length > 0) { - authors.text(doc.authors.join(', ')) + if (doc.summary) { + $('#summary').text(doc.summary) + } + if (doc.authors) { + doc.authors.forEach((author) => + $(`${author}`).appendTo(`#authors`) + ) + } + if (doc.keywords) { + doc.keywords.forEach((keyword) => + $(`${keyword}`).appendTo(`#keywords`) + ) } + const groupInfo = $('#group') + groupInfo.prop('href', groupUrl) + groupInfo.text(`Group: ${groupUrl}`) + const updated_at = new Date(doc.updated_at).toUTCString() - $('#updated_time').text(updated_at) - $('#version').text(doc.version.toString()) + $('#updated_time').text(`Updated: ${updated_at}`) + $('#version').text(`Version: ${doc.version}`) const content = decode(doc.content) as Node let contentHtml = toHTML(content) + - `\n

    ${docUrl}

    ` + `\n

    Permalink: ${docUrl}

    ` if (doc.rfp?.creation) { contentHtml += `\n

    Request For Payment, Price: ${doc.rfp.creation.price} WEN

    ` } @@ -148,36 +167,116 @@ export async function renderPublication(ctx: Context): Promise { ctx.body = $.html() } +export async function renderCollection(ctx: Context): Promise { + const headers = ctxHeaders(ctx) + + // const gid = ctx.params.gid as string + const { cid: _cid } = ctx.query + const $ = cheerio.load(collectionTpl) + const lang = headers['x-language'] ?? 'eng' + const siteInfo = metaInfos[lang] + if (siteInfo) { + $('title').text(siteInfo.title) + $('meta[name="description"]').prop('content', siteInfo.desc) + } + + try { + const doc = await getCollection(headers, _cid as string) + const [language, info] = getCollectionInfo(doc, lang) ?? [] + if (!info || !language) { + throw createError(404, 'collection not found') + } + + const gid = Xid.fromValue(doc.gid) + const cid = Xid.fromValue(doc.id) + + const groupUrl = `${siteBase}/group/${gid.toString()}` + const docUrl = `${groupUrl}?cid=${cid.toString()}` + $('html').prop('lang', language) + if (isRTL(language)) { + $('html').prop('dir', 'rtl') + } + + $('meta[property="og:url"]').prop('content', docUrl) + $('meta[property="og:title"]').prop('content', info.title) + if (info.summary) { + $('meta[property="og:description"]').prop('content', info.summary) + } + + $('#title').text(info.title) + if (info.summary) { + $('#summary').text(info.summary) + } + + if (info.authors) { + info.authors.forEach((author) => + $(`${author}`).appendTo(`#authors`) + ) + } + if (info.keywords) { + info.keywords.forEach((keyword) => + $(`${keyword}`).appendTo(`#keywords`) + ) + } + + const groupInfo = $('#group') + groupInfo.prop('href', groupUrl) + groupInfo.text(`Group: ${groupUrl}`) + + const updated_at = new Date(doc.updated_at).toUTCString() + $('#updated_time').text(`Updated: ${updated_at}`) + ctx.set('last-modified', updated_at) + + try { + const docs = await listCollectionChildren(headers, cid) + renderCollectionChildrenItems($, docs) + } catch (err: any) { + ignoreError() + } + } catch (err: any) { + ctx.status = 404 + const url = ctx.get('x-request-url') + if (url !== '') { + $('#content').text(url + ' not found') + } + } + + ctx.vary('Accept-Language') + ctx.type = 'text/html' + ctx.body = $.html() +} + export async function renderGroup(ctx: Context) { const headers = ctxHeaders(ctx) - const _gid = ctx.params.id as string + const gid = ctx.params.gid as string const $ = cheerio.load(groupTpl) const lang = headers['x-language'] ?? 'eng' - const info = metaInfos[lang] - if (info) { - $('title').text(info.title) - $('meta[name="description"]').prop('content', info.desc) + const siteInfo = metaInfos[lang] + if (siteInfo) { + $('title').text(siteInfo.title) + $('meta[name="description"]').prop('content', siteInfo.desc) } try { - const group = await getGroup(headers, _gid) - const gid = Xid.fromValue(group.id) + const group = await getGroup(headers, gid) + const xGid = Xid.fromValue(group.id) const groupUrl = `${siteBase}/group/${gid.toString()}` + + $('meta[property="og:url"]').prop('content', groupUrl) $('meta[property="og:title"]').prop('content', group.name) $('meta[property="og:description"]').prop('content', group.slogan) - $('meta[property="og:url"]').prop('content', groupUrl) $('#group_name').text(group.name) $('#group_slogan').text(group.slogan) await Promise.all([ (async () => { - const docs = await listCollections(headers, gid) + const docs = await listCollections(headers, xGid) renderCollectionItems($, docs, lang) })().catch(ignoreError), (async () => { - const docs = await listPublications(headers, gid) + const docs = await listPublications(headers, xGid) renderPublicationItems($, docs) })().catch(ignoreError), ]) @@ -246,6 +345,33 @@ function renderCollectionItems( ) } +function renderCollectionChildrenItems( + $: cheerio.CheerioAPI, + docs: CollectionChildrenOutput[] +): void { + renderList( + $, + 'children', + docs + .map((doc) => { + if (doc.kind == 2) return null // collection + + const cid = Xid.fromValue(doc.cid).toString() + const gid = Xid.fromValue(doc.gid).toString() + return { + id: `${gid}-${cid}`, + url: `${siteBase}/pub/${cid}?gid=${gid}`, + title: doc.title, + language: doc.language, + summary: doc.summary ?? '', + keywords: doc.keywords, + authors: doc.authors, + } + }) + .filter((item) => !!item) as ListItem[] + ) +} + interface ListItem { id: string url: string @@ -389,6 +515,25 @@ interface CollectionInfo { authors?: string[] } +interface CollectionChildrenOutput { + parent: Uint8Array + gid: Uint8Array + cid: Uint8Array + kind: number + ord: number + language: string + version: number + status: number + rating?: number + price?: number + updated_at: number + title: string + cover?: string + keywords?: string[] + authors?: string[] + summary?: string +} + async function getPublication( headers: Record, cid: string, @@ -515,6 +660,29 @@ async function listLatestPublications( return obj.result } +async function getCollection( + headers: Record, + cid: string +): Promise { + const api = new URL('/v1/collection', writingBase) + api.searchParams.append('gid', '000000000000000anon0') + api.searchParams.append('id', cid) + api.searchParams.append('fields', 'info,updated_at') + + headers.accept = 'application/cbor' + const res = await fetch(api, { + headers, + }) + + if (res.status !== 200) { + throw createError(res.status, await res.text()) + } + + const data = await res.arrayBuffer() + const obj = decode(Buffer.from(data)) + return obj.result +} + async function listCollections( headers: Record, gid: Xid @@ -569,6 +737,34 @@ async function listLatestCollections( return obj.result } +async function listCollectionChildren( + headers: Record, + id: Xid +): Promise { + const api = new URL('/v1/collection/list_children', writingBase) + headers.accept = 'application/cbor' + headers['content-type'] = 'application/cbor' + const res = await fetch(api, { + method: 'POST', + headers, + body: Buffer.from( + encode({ + gid: Xid.default().toBytes(), + id: id.toBytes(), + page_size: 100, + }) + ), + }) + + if (res.status !== 200) { + throw createError(res.status, await res.text()) + } + + const data = await res.arrayBuffer() + const obj = decode(Buffer.from(data)) + return obj.result +} + function isXid(id: string): boolean { try { Xid.parse(id)