Skip to content

Commit

Permalink
fix(#568): TWIC scrape permission error
Browse files Browse the repository at this point in the history
  • Loading branch information
jackstenglein committed Jul 23, 2024
1 parent 88636d4 commit fd59b67
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 2 deletions.
6 changes: 5 additions & 1 deletion backend/pgnService/serverless.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,11 @@ functions:
- dynamodb:PutItem
- dynamodb:UpdateItem
- dynamodb:DeleteItem
Resource: !GetAtt ExplorerTable.Arn
Resource: !If
- IsProd
- - !GetAtt ExplorerTable.Arn
- arn:aws:dynamodb:${aws:region}:${aws:accountId}:table/prod-masters-explorer
- !GetAtt ExplorerTable.Arn

notifyFollowers:
handler: explorer/notifyFollowers.handler
Expand Down
2 changes: 1 addition & 1 deletion scripts/delete_games.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import boto3

db = boto3.resource('dynamodb')
table = db.Table('prod-games')
table = db.Table('dev-games')


def main():
Expand Down
74 changes: 74 additions & 0 deletions scripts/delete_twic_duplicate_1549.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import boto3
from boto3.dynamodb.conditions import Key
from boto3.dynamodb.types import TypeDeserializer
import traceback
import json

db = boto3.resource('dynamodb')
table = db.Table('dev-games')
td = TypeDeserializer()


def handle_items(items, known_ids, batch):
deleted = 0
for item in items:
if item['id'] not in known_ids and item['headers'].get('TwicArchive', None) == '1549':
batch.delete_item(Key={
'cohort': 'masters',
'id': item['id'],
})
deleted += 1
return deleted



def main():
try:
known_ids = set()

with open('twic_games_1549.json') as file:
for line in file:
game = line.strip()
game = json.loads(game)
game = td.deserialize({'M': game['Item']})
known_ids.add(game['id'])

lastKey = None
deleted = 0

res = table.query(
KeyConditionExpression='cohort = :masters and id > :id',
ExpressionAttributeValues={
':masters': 'masters',
':id': '2024.06.25',
}
)
items = res.get('Items', [])
lastKey = res.get('LastEvaluatedKey', None)

with table.batch_writer() as batch:
deleted += handle_items(items, known_ids, batch)

while lastKey != None:
res = table.query(
KeyConditionExpression='cohort = :masters and id > :id',
ExpressionAttributeValues={
':masters': 'masters',
':id': '2024.06.25',
},
ExclusiveStartKey=lastKey
)
items = res.get('Items', [])
lastKey = res.get('LastEvaluatedKey', None)

deleted += handle_items(items, known_ids, batch)

except Exception as e:
print(e)
traceback.print_exc()

print('Deleted: ', deleted)


if __name__ == '__main__':
main()

0 comments on commit fd59b67

Please sign in to comment.