Skip to content

Commit

Permalink
Merge pull request #2655 from lpsinger/filter-duplicate-bibcodes
Browse files Browse the repository at this point in the history
Filter out ADS entries for duplicate bibcodes
  • Loading branch information
dakota002 authored Oct 30, 2024
2 parents 7321140 + 3a1644e commit ab67535
Showing 1 changed file with 30 additions and 19 deletions.
49 changes: 30 additions & 19 deletions app/scheduled/ads/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ async function* getAdsEntries() {
const url = new URL('https://api.adsabs.harvard.edu/v1/search/query')
url.searchParams.set('q', 'bibstem:GCN')
url.searchParams.set('fl', 'bibcode,volume')
url.searchParams.set('sort', 'bibcode asc')
url.searchParams.set('rows', '2000')
let start = 0
let length
Expand All @@ -42,28 +43,38 @@ async function* getAdsEntries() {

export async function handler() {
const db = await tables()
const seenCircularIds = new Set<number>()
for await (const entries of getAdsEntries()) {
await Promise.all(
entries.map(async ({ bibcode, circularId }) => {
try {
await db.circulars.update({
ConditionExpression: 'attribute_exists(circularId)',
ExpressionAttributeValues: {
':bibcode': bibcode,
},
Key: { circularId },
UpdateExpression: 'set bibcode = :bibcode',
})
} catch (e) {
if (e instanceof ConditionalCheckFailedException) {
console.error(
`Attempted to update Circular ${circularId}, which does not exist in DynamoDB`
)
} else {
throw e
entries
.filter(({ circularId }) => {
// Filter out duplicate entries for the same circular ID;
// prefer the chronologically earlier entries which are also entries
// with lexically least value of bibcode
const result = !seenCircularIds.has(circularId)
seenCircularIds.add(circularId)
return result
})
.map(async ({ bibcode, circularId }) => {
try {
await db.circulars.update({
ConditionExpression: 'attribute_exists(circularId)',
ExpressionAttributeValues: {
':bibcode': bibcode,
},
Key: { circularId },
UpdateExpression: 'set bibcode = :bibcode',
})
} catch (e) {
if (e instanceof ConditionalCheckFailedException) {
console.error(
`Attempted to update Circular ${circularId}, which does not exist in DynamoDB`
)
} else {
throw e
}
}
}
})
})
)
}
}

0 comments on commit ab67535

Please sign in to comment.