Skip to content

Commit

Permalink
v8.0.5
Browse files Browse the repository at this point in the history
- Fix error while parsing ldjson
- Update dependencies

Related issues: #378, #374, #373
  • Loading branch information
ndaidong committed Jan 22, 2024
1 parent 2ec2573 commit 901d1cf
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 21 deletions.
10 changes: 5 additions & 5 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "8.0.4",
"version": "8.0.5",
"name": "@extractus/article-extractor",
"description": "To extract main article from given URL",
"homepage": "https://github.com/extractus/article-extractor",
Expand Down Expand Up @@ -30,18 +30,18 @@
"reset": "node reset"
},
"dependencies": {
"@mozilla/readability": "^0.4.4",
"@mozilla/readability": "^0.5.0",
"bellajs": "^11.1.2",
"cross-fetch": "^4.0.0",
"linkedom": "^0.16.4",
"linkedom": "^0.16.6",
"sanitize-html": "2.11.0"
},
"devDependencies": {
"@types/sanitize-html": "^2.9.5",
"eslint": "^8.55.0",
"eslint": "^8.56.0",
"https-proxy-agent": "^7.0.2",
"jest": "^29.7.0",
"nock": "^13.4.0"
"nock": "^13.5.0"
},
"keywords": [
"article",
Expand Down
39 changes: 23 additions & 16 deletions src/utils/extractLdSchema.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,14 @@ const attributeLists = {
type: '@type',
}

const parseJson = (text) => {
try {
return JSON.parse(text)
} catch {
return null
}
}

/**
* Parses JSON-LD data from a document and populates an entry object.
* Only populates if the original entry object is empty or undefined.
Expand All @@ -46,26 +54,25 @@ const attributeLists = {
export default (document, entry) => {
const ldSchema = document.querySelector('script[type="application/ld+json"]')?.textContent

if (!ldSchema) {
return entry
}
const ldJson = ldSchema ? parseJson(ldSchema) : null

const ldJson = JSON.parse(ldSchema)
Object.entries(attributeLists).forEach(([key, attr]) => {
if ((typeof entry[key] === 'undefined' || entry[key] === '') && ldJson[attr]) {
if (key === 'type' && typeof ldJson[attr] === 'string') {
return entry[key] = typeSchemas.includes(ldJson[attr].toLowerCase()) ? ldJson[attr].toLowerCase() : ''
}
if (ldJson) {
Object.entries(attributeLists).forEach(([key, attr]) => {
if ((typeof entry[key] === 'undefined' || entry[key] === '') && ldJson[attr]) {
if (key === 'type' && typeof ldJson[attr] === 'string') {
return entry[key] = typeSchemas.includes(ldJson[attr].toLowerCase()) ? ldJson[attr].toLowerCase() : ''
}

if (typeof ldJson[attr] === 'string') {
return entry[key] = ldJson[attr].toLowerCase()
}
if (typeof ldJson[attr] === 'string') {
return entry[key] = ldJson[attr].toLowerCase()
}

if (Array.isArray(ldJson[attr]) && typeof ldJson[attr][0] === 'string') {
return entry[key] = ldJson[attr][0].toLowerCase()
if (Array.isArray(ldJson[attr]) && typeof ldJson[attr][0] === 'string') {
return entry[key] = ldJson[attr][0].toLowerCase()
}
}
}
})
})
}

return entry
}

0 comments on commit 901d1cf

Please sign in to comment.