Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Delete and purge individual submissions #1150

Merged
merged 23 commits into from
Sep 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 14 additions & 6 deletions lib/bin/purge-forms.js → lib/bin/purge.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,28 @@
// including this file, may be copied, modified, propagated, or distributed
// except according to the terms contained in the LICENSE file.
//
// This script checks for (soft-)deleted forms and purges any that were deleted
// over 30 days ago.
// This script checks for (soft-)deleted forms and submissions and purges
// any that were deleted over 30 days ago.
//
// It also accepts command line arguments that can force the purging of
// forms and submissions that were deleted less than 30 days ago.
//
// It can also be used to purge a specific form or submission
// (that has already been marked deleted).

const { run } = require('../task/task');
const { purgeForms } = require('../task/purge');
const { purgeTask } = require('../task/purge');

const { program } = require('commander');
program.option('-f, --force', 'Force any soft-deleted form to be purged right away.');
program.option('-m, --mode <value>', 'Mode of purging. Can be "forms", "submissions", or "all". Default is "all".', 'all');
program.option('-i, --formId <integer>', 'Purge a specific form based on its id.', parseInt);
program.option('-p, --projectId <integer>', 'Restrict purging to a specific project.', parseInt);
program.option('-x, --xmlFormId <value>', 'Restrict purging to specific form based on xmlFormId (must be used with project id).');
program.option('-x, --xmlFormId <value>', 'Restrict purging to specific form based on xmlFormId (must be used with projectId).');
program.option('-s, --instanceId <value>', 'Restrict purging to a specific submission based on instanceId (use with projectId and xmlFormId).');

program.parse();

const options = program.opts();

run(purgeForms(options.force, options.formId, options.projectId, options.xmlFormId)
.then((count) => `Forms purged: ${count}`));
run(purgeTask(options));
23 changes: 23 additions & 0 deletions lib/model/migrations/20240914-01-add-submission-delete-verb.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// Copyright 2024 ODK Central Developers
// See the NOTICE file at the top-level directory of this distribution and at
// https://github.com/getodk/central-backend/blob/master/NOTICE.
// This file is part of ODK Central. It is subject to the license terms in
// the LICENSE file found in the top-level directory of this distribution and at
// https://www.apache.org/licenses/LICENSE-2.0. No part of ODK Central,
// including this file, may be copied, modified, propagated, or distributed
// except according to the terms contained in the LICENSE file.

const up = (db) => db.raw(`
UPDATE roles
SET verbs = verbs || '["submission.delete", "submission.restore"]'::jsonb
WHERE system in ('admin', 'manager')
`);

const down = (db) => db.raw(`
UPDATE roles
SET verbs = (verbs - 'submission.delete' - 'submission.restore')
WHERE system in ('admin', 'manager')
`);

module.exports = { up, down };

27 changes: 27 additions & 0 deletions lib/model/migrations/20240914-02-remove-orphaned-client-audits.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright 2024 ODK Central Developers
// See the NOTICE file at the top-level directory of this distribution and at
// https://github.com/getodk/central-backend/blob/master/NOTICE.
// This file is part of ODK Central. It is subject to the license terms in
// the LICENSE file found in the top-level directory of this distribution and at
// https://www.apache.org/licenses/LICENSE-2.0. No part of ODK Central,
// including this file, may be copied, modified, propagated, or distributed
// except according to the terms contained in the LICENSE file.

// From earlier form-purging, there could have been leftover client audits
// that were not properly purged because of how they joined to submissions.
// This migration deletes those client audit rows, allowing the blobs to finally
// be de-referenced and also eventually purged (by the puring cron job).

const up = (db) => db.raw(`
DELETE FROM client_audits
WHERE "blobId" NOT IN (
SELECT "blobId" FROM submission_attachments
WHERE "blobId" IS NOT NULL
AND "isClientAudit" IS true
);
`);

const down = () => {};

module.exports = { up, down };

4 changes: 2 additions & 2 deletions lib/model/query/audits.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ const actionCondition = (action) => {
// The backup action was logged by a backup script that has been removed.
// Even though the script has been removed, the audit log entries it logged
// have not, so we should continue to exclude those.
return sql`action not in ('entity.create', 'entity.bulk.create', 'entity.error', 'entity.update.version', 'entity.update.resolve', 'entity.delete', 'submission.create', 'submission.update', 'submission.update.version', 'submission.attachment.update', 'submission.reprocess', 'backup', 'analytics')`;
return sql`action not in ('entity.create', 'entity.bulk.create', 'entity.error', 'entity.update.version', 'entity.update.resolve', 'entity.delete', 'submission.create', 'submission.update', 'submission.update.version', 'submission.attachment.update', 'submission.reprocess', 'submission.delete', 'submission.restore', 'backup', 'analytics')`;
else if (action === 'user')
return sql`action in ('user.create', 'user.update', 'user.delete', 'user.assignment.create', 'user.assignment.delete', 'user.session.create')`;
else if (action === 'field_key')
Expand All @@ -48,7 +48,7 @@ const actionCondition = (action) => {
else if (action === 'form')
return sql`action in ('form.create', 'form.update', 'form.delete', 'form.restore', 'form.purge', 'form.attachment.update', 'form.submission.export', 'form.update.draft.set', 'form.update.draft.delete', 'form.update.publish')`;
else if (action === 'submission')
return sql`action in ('submission.create', 'submission.update', 'submission.update.version', 'submission.attachment.update', 'submission.reprocess')`;
return sql`action in ('submission.create', 'submission.update', 'submission.update.version', 'submission.attachment.update', 'submission.reprocess', 'submission.delete', 'submission.restore', 'submission.purge')`;
else if (action === 'dataset')
return sql`action in ('dataset.create', 'dataset.update')`;
else if (action === 'entity')
Expand Down
7 changes: 6 additions & 1 deletion lib/model/query/entities.js
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,12 @@ const _get = (includeSource) => {
`}
${!includeSource ? sql`` : sql`
LEFT JOIN entity_def_sources ON entity_defs."sourceId" = entity_def_sources."id"
LEFT JOIN submission_defs ON submission_defs.id = entity_def_sources."submissionDefId"
LEFT JOIN (
SELECT submission_defs.* FROM submission_defs
JOIN submissions ON submission_defs."submissionId" = submissions.id
JOIN forms ON submissions."formId" = forms.id
WHERE submissions."deletedAt" IS NULL AND forms."deletedAt" IS NULL
) as submission_defs on submission_defs.id = entity_def_sources."submissionDefId"
LEFT JOIN (
SELECT submissions.*, submission_defs."userAgent" FROM submissions
JOIN submission_defs ON submissions.id = submission_defs."submissionId" AND root
Expand Down
15 changes: 11 additions & 4 deletions lib/model/query/forms.js
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ const _trashedFilter = (force, id, projectId, xmlFormId) => {
// 3. Update actees table for the specific form to leave some useful information behind
// 4. Delete the forms and their resources from the database
// 5. Purge unattached blobs
const purge = (force = false, id = null, projectId = null, xmlFormId = null) => ({ oneFirst, Blobs }) => {
const purge = (force = false, id = null, projectId = null, xmlFormId = null) => ({ oneFirst }) => {
if (xmlFormId != null && projectId == null)
throw Problem.internal.unknown({ error: 'Must also specify projectId when using xmlFormId' });
return oneFirst(sql`
Expand All @@ -415,6 +415,15 @@ with redacted_audits as (
from forms
where audits."acteeId" = forms."acteeId"
and ${_trashedFilter(force, id, projectId, xmlFormId)}
), deleted_client_audits as (
delete from client_audits
using submission_attachments, submission_defs, submissions, forms
where client_audits."blobId" = submission_attachments."blobId"
and submission_attachments."submissionDefId" = submission_defs.id
and submission_attachments."isClientAudit" = true
and submission_defs."submissionId" = submissions.id
and submissions."formId" = forms.id
and ${_trashedFilter(force, id, projectId, xmlFormId)}
ktuite marked this conversation as resolved.
Show resolved Hide resolved
), purge_audits as (
insert into audits ("action", "acteeId", "loggedAt", "processed")
select 'form.purge', "acteeId", clock_timestamp(), clock_timestamp()
Expand All @@ -437,9 +446,7 @@ with redacted_audits as (
where ${_trashedFilter(force, id, projectId, xmlFormId)}
returning 1
)
select count(*) from deleted_forms`)
.then((count) => Blobs.purgeUnattached()
.then(() => Promise.resolve(count)));
select count(*) from deleted_forms`);
};

////////////////////////////////////////////////////////////////////////////////
Expand Down
88 changes: 86 additions & 2 deletions lib/model/query/submissions.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@
// including this file, may be copied, modified, propagated, or distributed
// except according to the terms contained in the LICENSE file.

const config = require('config');
const { map } = require('ramda');
const { sql } = require('slonik');
const { Frame, table } = require('../frame');
const { Actor, Form, Submission } = require('../frames');
const { odataFilter, odataOrderBy } = require('../../data/odata-filter');
const { odataToColumnMap, odataSubTableToColumnMap } = require('../../data/submission');
const { unjoiner, extender, equals, page, updater, QueryOptions, insertMany } = require('../../util/db');
const { unjoiner, extender, equals, page, updater, QueryOptions, insertMany, markDeleted, markUndeleted } = require('../../util/db');
const { blankStringToNull, construct } = require('../../util/util');
const Problem = require('../../util/problem');
const { streamEncBlobs } = require('../../util/blob');
Expand Down Expand Up @@ -200,6 +201,17 @@ const getById = (submissionId) => ({ maybeOne }) =>
maybeOne(sql`select * from submissions where id=${submissionId} and "deletedAt" is null`)
.then(map(construct(Submission)));

const getDeleted = (projectId, formId, instanceId) => ({ maybeOne }) =>
maybeOne(sql` select submissions.*
from submissions
join forms on submissions."formId" = forms.id
where forms."projectId" = ${projectId}
and submissions."formId" = ${formId}
and submissions."instanceId" = ${instanceId}
and submissions."deletedAt" IS NOT NULL
`)
.then(map(construct(Submission)));

const joinOnSkiptoken = (skiptoken, formId, draft) => {
if (skiptoken == null) return sql``;
// In the case of a subtable, we fetch all submissions without pagination: we
Expand Down Expand Up @@ -386,14 +398,86 @@ const getForExport = (formId, instanceId, draft, options = QueryOptions.none) =>
maybeOne(_export(formId, draft, [], options.withCondition({ 'submissions.instanceId': instanceId })))
.then(map(_exportUnjoiner));

////////////////////////////////////////////////////////////////////////////////
// DELETE SUBMISSION

const del = (submission) => ({ run }) =>
run(markDeleted(submission));
del.audit = (submission, form) => (log) => log('submission.delete', { acteeId: form.acteeId }, { instanceId: submission.instanceId });

const restore = (submission) => ({ run }) =>
run(markUndeleted(submission));
restore.audit = (submission, form) => (log) => log('submission.restore', { acteeId: form.acteeId }, { instanceId: submission.instanceId });

////////////////////////////////////////////////////////////////////////////////
// PURGING SOFT-DELETED SUBMISSIONS

const DAY_RANGE = config.has('default.taskSchedule.purge')
? config.get('default.taskSchedule.purge')
: 30; // Default is 30 days


// Submission purging and the trash filter can target a specific submission
// or all deleted submissions, but can't be used to filter-purge by project or form.
const _trashedFilter = (force, projectId, xmlFormId, instanceId) => {
const idFilter = ((instanceId != null) && (projectId != null) && (xmlFormId != null)
? sql`and submissions."instanceId" = ${instanceId}
and forms."projectId" = ${projectId}
and forms."xmlFormId" = ${xmlFormId}`
: sql``);
return (force
? sql`submissions."deletedAt" is not null ${idFilter}`
: sql`submissions."deletedAt" < current_date - cast(${DAY_RANGE} as int) ${idFilter}`);
};

const purge = (force = false, projectId = null, xmlFormId = null, instanceId = null) => ({ oneFirst }) => {
if ((instanceId != null || projectId != null || xmlFormId != null) && !(instanceId != null && projectId != null && xmlFormId != null)) {
throw Problem.internal.unknown({ error: 'Must specify either all or none of projectId, xmlFormId, and instanceId' });
}
return oneFirst(sql`
with redacted_audits as (
update audits set notes = ''
from submissions
join forms on submissions."formId" = forms.id
where (audits.details->>'submissionId')::int = submissions.id
and ${_trashedFilter(force, projectId, xmlFormId, instanceId)}
), deleted_client_audits as (
delete from client_audits
using submission_attachments, submission_defs, submissions, forms
where client_audits."blobId" = submission_attachments."blobId"
and submission_attachments."submissionDefId" = submission_defs.id
and submission_attachments."isClientAudit" = true
and submission_defs."submissionId" = submissions.id
and submissions."formId" = forms.id
and ${_trashedFilter(force, projectId, xmlFormId, instanceId)}
), purge_audits as (
insert into audits ("action", "loggedAt", "processed", "details")
select 'submission.purge', clock_timestamp(), clock_timestamp(), jsonb_build_object('submissions_deleted', "count")
ktuite marked this conversation as resolved.
Show resolved Hide resolved
from (
select count(*) as count
from submissions
join forms on forms.id = submissions."formId"
where ${_trashedFilter(force, projectId, xmlFormId, instanceId)}
) as del_sub_count
where del_sub_count.count > 0
), deleted_submissions as (
delete from submissions
using forms
where submissions."formId" = forms.id
and ${_trashedFilter(force, projectId, xmlFormId, instanceId)}
returning 1
)
select count(*) from deleted_submissions`);
};

module.exports = {
createNew, createVersion,
update, clearDraftSubmissions, clearDraftSubmissionsForProject,
update, del, restore, purge, clearDraftSubmissions, clearDraftSubmissionsForProject,
setSelectMultipleValues, getSelectMultipleValuesForExport,
getByIdsWithDef, getSubAndDefById,
getByIds, getAllForFormByIds, getById, countByFormId, verifyVersion,
getDefById, getCurrentDefByIds, getAnyDefByFormAndInstanceId, getDefsByFormAndLogicalId, getDefBySubmissionAndInstanceId, getRootForInstanceId,
getDeleted,
streamForExport, getForExport
};

4 changes: 2 additions & 2 deletions lib/resources/entities.js
Original file line number Diff line number Diff line change
Expand Up @@ -156,13 +156,13 @@ module.exports = (service, endpoint) => {

}));

service.delete('/projects/:projectId/datasets/:name/entities/:uuid', endpoint(async ({ Datasets, Entities }, { auth, params, queryOptions }) => {
service.delete('/projects/:projectId/datasets/:name/entities/:uuid', endpoint(async ({ Datasets, Entities }, { auth, params }) => {

const dataset = await Datasets.get(params.projectId, params.name, true).then(getOrNotFound);

await auth.canOrReject('entity.delete', dataset);

const entity = await Entities.getById(dataset.id, params.uuid, queryOptions).then(getOrNotFound);
const entity = await Entities.getById(dataset.id, params.uuid).then(getOrNotFound);

await Entities.del(entity, dataset);

Expand Down
16 changes: 16 additions & 0 deletions lib/resources/submissions.js
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,22 @@ module.exports = (service, endpoint) => {
.then(getOrNotFound)
.then((submission) => Submissions.update(form, submission, Submission.fromApi(body))))));

service.delete('/projects/:projectId/forms/:formId/submissions/:instanceId', endpoint(async ({ Forms, Submissions }, { params, auth }) => {
const form = await Forms.getByProjectAndXmlFormId(params.projectId, params.formId, false).then(getOrNotFound);
await auth.canOrReject('submission.delete', form);
const submission = await Submissions.getByIdsWithDef(params.projectId, params.formId, params.instanceId, false).then(getOrNotFound);
await Submissions.del(submission, form);
return success();
}));

service.post('/projects/:projectId/forms/:formId/submissions/:instanceId/restore', endpoint(async ({ Forms, Submissions }, { params, auth }) => {
const form = await Forms.getByProjectAndXmlFormId(params.projectId, params.formId, false).then(getOrNotFound);
await auth.canOrReject('submission.restore', form);
const submission = await Submissions.getDeleted(params.projectId, form.id, params.instanceId).then(getOrNotFound);
await Submissions.restore(submission, form);
return success();
}));


const dataOutputs = (base, draft, getForm) => {

Expand Down
30 changes: 27 additions & 3 deletions lib/task/purge.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,31 @@

const { task } = require('./task');

const purgeForms = task.withContainer(({ Forms }) => (force = false, formId = null, projectId = null, xmlFormId = null) =>
Forms.purge(force, formId, projectId, xmlFormId));
const purgeTask = task.withContainer((container) => async (options = {}) => {
// Form/submission purging will happen within its own transaction
const message = await container.db.transaction(async trxn => {
const { Forms, Submissions } = container.with({ db: trxn });
try {
if (options.mode === 'submissions' || options.instanceId) {
const count = await Submissions.purge(options.force, options.projectId, options.xmlFormId, options.instanceId);
return `Submissions purged: ${count}`;
} else if (options.mode === 'forms' || (options.formId || options.xmlFormId)) {
const count = await Forms.purge(options.force, options.formId, options.projectId, options.xmlFormId);
return `Forms purged: ${count}`;
} else {
const formCount = await Forms.purge(options.force, options.formId, options.projectId, options.xmlFormId);
const submissionCount = await Submissions.purge(options.force, options.projectId, options.xmlFormId, options.instanceId);
return `Forms purged: ${formCount}, Submissions purged: ${submissionCount}`;
}
} catch (error) {
return error?.problemDetails?.error;
}
});

module.exports = { purgeForms };
// Purging unattached blobs is outside of the above transaction because it
// may interact with an external blob store.
await container.Blobs.purgeUnattached();
return message;
});

module.exports = { purgeTask };
Loading