From 53b122cdd235ccdc6e23d67873b778b56b98f8f0 Mon Sep 17 00:00:00 2001 From: Mathieu Agopian Date: Wed, 11 Sep 2024 11:31:07 +0200 Subject: [PATCH] Merge branch 'master' into elasticsearch --- CHANGELOG.md | 19 +++ docs/adapting-settings.md | 24 ++++ docs/harvesting.md | 4 +- js/views/dataset.vue | 31 +++- js/views/reuse.vue | 26 ++-- publiccode.yml | 26 ++++ requirements/develop.pip | 11 +- requirements/doc.pip | 4 +- requirements/install.in | 2 +- requirements/install.pip | 2 +- requirements/report.pip | 4 +- requirements/test.pip | 7 +- tasks/__init__.py | 2 +- udata/__init__.py | 2 +- udata/api/commands.py | 12 +- udata/api/oauth2.py | 48 ++++--- udata/api_fields.py | 42 +++++- udata/commands/fixtures.py | 102 ++++++++------ udata/commands/tests/test_fixtures.py | 35 +++-- udata/core/activity/api.py | 15 +- udata/core/dataservices/api.py | 43 +++++- udata/core/dataservices/models.py | 41 +++++- udata/core/dataset/events.py | 5 +- udata/core/organization/api.py | 8 +- udata/core/organization/api_fields.py | 11 +- udata/core/organization/csv.py | 1 + udata/core/organization/rdf.py | 5 +- udata/core/owned.py | 16 +++ udata/core/reuse/api.py | 5 +- udata/core/reuse/models.py | 3 +- udata/core/site/api.py | 27 +--- udata/core/user/api.py | 23 ++- udata/cors.py | 21 +-- udata/settings.py | 1 + udata/tests/api/test_activities_api.py | 69 +++++++++ udata/tests/api/test_auth_api.py | 133 +++++++++++++++++- udata/tests/api/test_dataservices_api.py | 82 ++++++++++- udata/tests/api/test_reuses_api.py | 115 +++++++++++++++ udata/tests/api/test_tags_api.py | 4 +- udata/tests/api/test_user_api.py | 11 ++ udata/tests/dataset/test_dataset_events.py | 28 ++++ udata/tests/dataset/test_dataset_rdf.py | 8 +- udata/tests/frontend/test_csv.py | 2 +- .../organization/test_organization_rdf.py | 37 ++++- udata/tests/test_cors.py | 38 ++++- udata/tests/test_owned.py | 101 ++++++++++++- udata/utils.py | 17 +++ 47 files changed, 1101 insertions(+), 172 deletions(-) create mode 100644 publiccode.yml create mode 100644 udata/tests/api/test_activities_api.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 21df9ea966..0c02568058 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,11 +2,29 @@ ## Current (in progress) +- Allow OAuth clients without secrets [#3138](https://github.com/opendatateam/udata/pull/3138) +- Add a `archived` button for datasets and reuses on frontend admin [#3104](https://github.com/opendatateam/udata/pull/3104) +- **breaking change** Return all the reuses available to a user on the /reuses endpoint, including the private and deleted ones they own [#3140](https://github.com/opendatateam/udata/pull/3140). +- Fix undelete reuse and dataservices [#3141](https://github.com/opendatateam/udata/pull/3141) +- Add a minimal publiccode.yml [#3144](https://github.com/opendatateam/udata/pull/3144) +- Fix the boolean filters in the API for the "new system" endpoints [#3139](https://github.com/opendatateam/udata/pull/3139) +- Update authlib dependency from 0.14.3 to 1.3.1 [#3135](https://github.com/opendatateam/udata/pull/3135) +- Add CORS on resource redirect [#3145](https://github.com/opendatateam/udata/pull/3145) + +## 9.1.4 (2024-08-26) + - Fix many linting issues reported by ruff [#3118](https://github.com/opendatateam/udata/pull/3118) - Import the dataservice's organization from the fixtures [#3121](https://github.com/opendatateam/udata/pull/3121) - Convert reuse to new API system [#3066](https://github.com/opendatateam/udata/pull/3066) - Fix circular import error [#3128](https://github.com/opendatateam/udata/pull/3128) - Add an option to specify the port when using `inv serve` [#3123](https://github.com/opendatateam/udata/pull/3123) +- Add a new `related_to` filter parameter to the activities API endpoint [#3127](https://github.com/opendatateam/udata/pull/3127) +- Properly import the `Discussion.closed_by` from the fixtures [#3125](https://github.com/opendatateam/udata/pull/3125) +- Send an API token to Hydra when publishing resource events [#3130](https://github.com/opendatateam/udata/pull/3130) +- Add `last_login_at` to org members API [#3133](https://github.com/opendatateam/udata/pull/3133) +- Always add Vary even for non CORS requests [#3132](https://github.com/opendatateam/udata/pull/3132) +- Add acronym in organization csv catalog [#3134](https://github.com/opendatateam/udata/pull/3134) +- Limit the number of user suggestions [#3131](https://github.com/opendatateam/udata/pull/3131) ## 9.1.3 (2024-08-01) @@ -16,6 +34,7 @@ - Update to the version v2.0.0 of udata-fixtures (with the dataservices) - Add type hints [#3111](https://github.com/opendatateam/udata/pull/3111) - Make sure requests v2.32.3 is used everywhere consistently [#3116](https://github.com/opendatateam/udata/pull/3116) +- Expose a dataservice in its organization's catalog, and expose a dataservice's catalog [#3122](https://github.com/opendatateam/udata/pull/3122) ## 9.1.2 (2024-07-29) diff --git a/docs/adapting-settings.md b/docs/adapting-settings.md index 2aedcfbd82..b345d413b4 100644 --- a/docs/adapting-settings.md +++ b/docs/adapting-settings.md @@ -641,3 +641,27 @@ FS_ROOT = '/srv/http/www.data.dev/fs' [flask-mongoengine-doc]: https://flask-mongoengine.readthedocs.org/ [authlib-doc]: https://docs.authlib.org/en/latest/flask/2/authorization-server.html#server [udata-search-service]: https://github.com/opendatateam/udata-search-service + +## Resources modifications publishing + +udata may notify external services (ex: [hydra](https://github.com/datagouv/hydra)) about resources modification on the platform. +install, or any other service. + + +### PUBLISH_ON_RESOURCE_EVENTS + +**default**: `False` + +Publish resource events to an external service. + +### RESOURCES_ANALYSER_URI + +**default**: `http://localhost:8000` + +URI of the external service receiving the resource events. + +### RESOURCES_ANALYSER_API_KEY + +**default**: `api_key_to_change` + +API key sent in the headers of the endpoint requests as a Bearer token. diff --git a/docs/harvesting.md b/docs/harvesting.md index e19e66f8b9..aa3ad7cb2d 100644 --- a/docs/harvesting.md +++ b/docs/harvesting.md @@ -26,7 +26,7 @@ def inner_harvest(): def inner_process_dataset(item: HarvestItem, args1, args2, args3): dataset = self.get_dataset(item.remote_id) - update_dataset(dataset, args1, args2) + update_dataset(dataset, args1, args2) return dataset ``` @@ -207,7 +207,7 @@ class RandomBackend(BaseBackend): dataset.title = faker.sentence() dataset.description = faker.text() - dataset.tags = list(set(faker.words(nb=faker.pyint()))) + dataset.tags = list(set(faker.tags(nb=faker.pyint()))) # Resources for i in range(faker.pyint()): diff --git a/js/views/dataset.vue b/js/views/dataset.vue index 11db04214b..0947ee9fb2 100644 --- a/js/views/dataset.vue +++ b/js/views/dataset.vue @@ -111,6 +111,19 @@ export default { icon: 'send', method: this.transfer_request }); + if (!this.dataset.archived) { + actions.push({ + label: this._('Archive'), + icon: 'archive', + method: this.archive + }) + } else { + actions.push({ + label: this._('Unarchive'), + icon: 'undo', + method: this.unarchive + }); + } if(!this.dataset.deleted) { actions.push({ label: this._('Delete'), @@ -169,6 +182,22 @@ export default { edit() { this.$go({name: 'dataset-edit', params: {oid: this.dataset.id}}); }, + archive() { + this.dataset.archived = new Date().toISOString(); + API.datasets.update_dataset({dataset: this.dataset.id, payload: this.dataset}, + (response) => { + this.dataset.on_fetched(response); + } + ); + }, + unarchive() { + this.dataset.archived = null; + API.datasets.update_dataset({dataset: this.dataset.id, payload: this.dataset}, + (response) => { + this.dataset.on_fetched(response); + } + ); + }, confirm_delete() { this.$root.$modal( require('components/dataset/delete-modal.vue'), @@ -201,7 +230,7 @@ export default { class: _class, label }); - } else if (existing) { + } else if (!value && existing) { this.badges.splice(this.badges.indexOf(existing), 1); } } diff --git a/js/views/reuse.vue b/js/views/reuse.vue index ffa0eeeb0c..c1032e9a2b 100644 --- a/js/views/reuse.vue +++ b/js/views/reuse.vue @@ -240,6 +240,18 @@ export default { this.$root.$modal(require('components/badges/modal.vue'), { subject: this.reuse }) + }, + addOrRemoveBadge(id, value, _class, label) { + const existing = this.badges.find(b => b.id === id); + if (value && !existing) { + this.badges.push({ + id, + class: _class, + label + }); + } else if (!value && existing) { + this.badges.splice(this.badges.indexOf(existing), 1); + } } }, route: { @@ -258,16 +270,10 @@ export default { } }, 'reuse.deleted': function (deleted) { - if (deleted) { - this.badges = [ - { - class: 'danger', - label: this._('Deleted') - } - ] - } else { - this.badges = [] - } + this.addOrRemoveBadge('deleted', deleted, 'danger', this._('Deleted')); + }, + 'reuse.archived': function(archived) { + this.addOrRemoveBadge('archived', archived, 'warning', this._('Archived')); } } } diff --git a/publiccode.yml b/publiccode.yml new file mode 100644 index 0000000000..ac45c3e829 --- /dev/null +++ b/publiccode.yml @@ -0,0 +1,26 @@ +publiccodeYmlVersion: "0.2" +name: udata +url: https://github.com/opendatateam/udata +landingURL: http://udata.readthedocs.org +creationDate: 2014-04-25 +latestRelease: + date: "" + version: "" +logo: https://github.com/opendatateam.png +usedBy: [] +fundedBy: + - name: Direction interministérielle du numérique + url: https://lannuaire.service-public.fr/gouvernement/d1a97841-b4bf-46df-a089-1003e4e266b4 +softwareType: "" +description: + en: + shortDescription: Customizable and skinnable social platform dedicated to open data. + documentation: "" +legal: + license: agpl-3.0 + authorsFile: "" +maintenance: + type: community + contacts: + - name: "" + email: "" diff --git a/requirements/develop.pip b/requirements/develop.pip index 57975a2771..d44ec27f91 100644 --- a/requirements/develop.pip +++ b/requirements/develop.pip @@ -27,7 +27,7 @@ attrs==23.2.0 # -c requirements/test.pip # jsonschema # referencing -authlib==0.14.3 +authlib==1.3.1 # via # -c requirements/install.pip # -c requirements/test.pip @@ -155,7 +155,6 @@ cryptography==2.8 # -c requirements/test.pip # -r requirements/install.in # authlib - # secretstorage decorator==5.1.1 # via ipython distlib==0.3.8 @@ -170,11 +169,13 @@ docutils==0.20.1 # via readme-renderer elasticsearch==7.15.0 # via + # -c requirements/install.pip # -c requirements/test.pip # -r requirements/install.in # elasticsearch-dsl elasticsearch-dsl==7.4.0 # via + # -c requirements/install.pip # -c requirements/test.pip # -r requirements/install.in email-validator==2.2.0 @@ -350,10 +351,6 @@ jaraco-classes==3.3.1 # via keyring jedi==0.19.1 # via ipython -jeepney==0.8.0 - # via - # keyring - # secretstorage jinja2==3.1.2 # via # -c requirements/install.pip @@ -606,8 +603,6 @@ s3transfer==0.6.2 # -c requirements/install.pip # -c requirements/test.pip # boto3 -secretstorage==3.3.3 - # via keyring sentry-sdk[flask]==2.9.0 # via # -c requirements/install.pip diff --git a/requirements/doc.pip b/requirements/doc.pip index 5437a5cbac..78dfc12548 100644 --- a/requirements/doc.pip +++ b/requirements/doc.pip @@ -25,7 +25,7 @@ attrs==23.2.0 # -c requirements/test.pip # jsonschema # referencing -authlib==0.14.3 +authlib==1.3.1 # via # -c requirements/install.pip # -c requirements/test.pip @@ -154,11 +154,13 @@ dnspython==2.6.1 # pymongo elasticsearch==7.15.0 # via + # -c requirements/install.pip # -c requirements/test.pip # -r requirements/install.in # elasticsearch-dsl elasticsearch-dsl==7.4.0 # via + # -c requirements/install.pip # -c requirements/test.pip # -r requirements/install.in email-validator==2.2.0 diff --git a/requirements/install.in b/requirements/install.in index ac936ccbf6..847fd84f87 100644 --- a/requirements/install.in +++ b/requirements/install.in @@ -1,4 +1,4 @@ -authlib==0.14.3 +authlib==1.3.1 awesome-slugify==1.6.5 Babel==2.12.1 bcrypt==3.1.7 diff --git a/requirements/install.pip b/requirements/install.pip index 1f40728dc7..b978bdcbf6 100644 --- a/requirements/install.pip +++ b/requirements/install.pip @@ -14,7 +14,7 @@ attrs==23.2.0 # via # jsonschema # referencing -authlib==0.14.3 +authlib==1.3.1 # via -r requirements/install.in awesome-slugify==1.6.5 # via -r requirements/install.in diff --git a/requirements/report.pip b/requirements/report.pip index d11d821472..2a36816975 100644 --- a/requirements/report.pip +++ b/requirements/report.pip @@ -25,7 +25,7 @@ attrs==23.2.0 # -c requirements/test.pip # jsonschema # referencing -authlib==0.14.3 +authlib==1.3.1 # via # -c requirements/install.pip # -c requirements/test.pip @@ -157,11 +157,13 @@ dnspython==2.6.1 # pymongo elasticsearch==7.15.0 # via + # -c requirements/install.pip # -c requirements/test.pip # -r requirements/install.in # elasticsearch-dsl elasticsearch-dsl==7.4.0 # via + # -c requirements/install.pip # -c requirements/test.pip # -r requirements/install.in email-validator==2.2.0 diff --git a/requirements/test.pip b/requirements/test.pip index d5c1d9fae2..fc3eb29dd8 100644 --- a/requirements/test.pip +++ b/requirements/test.pip @@ -21,7 +21,7 @@ attrs==23.2.0 # -c requirements/install.pip # jsonschema # referencing -authlib==0.14.3 +authlib==1.3.1 # via # -c requirements/install.pip # -r requirements/install.in @@ -128,10 +128,13 @@ dnspython==2.6.1 # pymongo elasticsearch==7.15.0 # via + # -c requirements/install.pip # -r requirements/install.in # elasticsearch-dsl elasticsearch-dsl==7.4.0 - # via -r requirements/install.in + # via + # -c requirements/install.pip + # -r requirements/install.in email-validator==2.2.0 # via # -c requirements/install.pip diff --git a/tasks/__init__.py b/tasks/__init__.py index eae04b2292..eba91fb21f 100644 --- a/tasks/__init__.py +++ b/tasks/__init__.py @@ -197,7 +197,7 @@ def qa(ctx): def serve(ctx, host="localhost", port="7000"): """Run a development server""" with ctx.cd(ROOT): - ctx.run(f"python manage.py serve -d -r -h {host} -p {port}") + ctx.run(f"python manage.py serve -d -r -h {host} -p {port}", pty=True) @task diff --git a/udata/__init__.py b/udata/__init__.py index eb02b5a16e..4aa14fe760 100644 --- a/udata/__init__.py +++ b/udata/__init__.py @@ -4,5 +4,5 @@ udata """ -__version__ = "9.1.4.dev" +__version__ = "9.1.5.dev" __description__ = "Open data portal" diff --git a/udata/api/commands.py b/udata/api/commands.py index 7028e93aa6..b5aa3d5579 100644 --- a/udata/api/commands.py +++ b/udata/api/commands.py @@ -4,6 +4,7 @@ import click from flask import current_app, json from flask_restx import schemas +from werkzeug.security import gen_salt from udata.api import api from udata.api.oauth2 import OAuth2Client @@ -77,12 +78,15 @@ def validate(): @click.option( "-r", "--response-types", multiple=True, default=["code"], help="Client's response types" ) -def create_oauth_client(client_name, user_email, uri, grant_types, scope, response_types): +@click.option("-p", "--public", is_flag=True, help="Public client (SPA)") +def create_oauth_client(client_name, user_email, uri, grant_types, scope, response_types, public): """Creates an OAuth2Client instance in DB""" user = User.objects(email=user_email).first() if user is None: exit_with_error("No matching user to email") + client_secret = gen_salt(50) if not public else None + client = OAuth2Client.objects.create( name=client_name, owner=user, @@ -90,11 +94,15 @@ def create_oauth_client(client_name, user_email, uri, grant_types, scope, respon scope=scope, response_types=response_types, redirect_uris=uri, + secret=client_secret, ) click.echo(f"New OAuth client: {client.name}") click.echo(f"Client's ID {client.id}") - click.echo(f"Client's secret {client.secret}") + if public: + click.echo("Client is public and has no secret.") + else: + click.echo(f"Client's secret {client.secret}") click.echo(f"Client's grant_types {client.grant_types}") click.echo(f"Client's response_types {client.response_types}") click.echo(f"Client's URI {client.redirect_uris}") diff --git a/udata/api/oauth2.py b/udata/api/oauth2.py index b5a6fc108e..e739ef6a88 100644 --- a/udata/api/oauth2.py +++ b/udata/api/oauth2.py @@ -15,6 +15,7 @@ """ import fnmatch +import time from datetime import datetime, timedelta from authlib.integrations.flask_oauth2 import AuthorizationServer, ResourceProtector @@ -31,7 +32,6 @@ from flask import current_app, render_template, request from flask_security.utils import verify_password from werkzeug.exceptions import Unauthorized -from werkzeug.security import gen_salt from udata.app import csrf from udata.auth import current_user, login_required, login_user @@ -59,7 +59,7 @@ class OAuth2Client(ClientMixin, db.Datetimed, db.Document): - secret = db.StringField(default=lambda: gen_salt(50)) + secret = db.StringField(default=None) name = db.StringField(required=True) description = db.StringField() @@ -111,7 +111,7 @@ def check_redirect_uri(self, redirect_uri): def check_client_secret(self, client_secret): return self.secret == client_secret - def check_token_endpoint_auth_method(self, method): + def check_endpoint_auth_method(self, method, _endpoint): if not self.has_client_secret(): return method == "none" return method in ("client_secret_post", "client_secret_basic") @@ -149,6 +149,9 @@ class OAuth2Token(db.Document): def __str__(self): return "".format(self) + def check_client(self, client): + return self.client == client + def get_scope(self): return self.scope @@ -161,6 +164,13 @@ def get_expires_at(self): def get_client_id(self): return str(self.client.id) + def is_expired(self): + now = time.time() + return self.get_expires_at() < now + + def is_revoked(self): + return self.revoked + def is_refresh_token_valid(self): if self.revoked: return False @@ -198,7 +208,7 @@ def get_scope(self): class AuthorizationCodeGrant(grants.AuthorizationCodeGrant): - TOKEN_ENDPOINT_AUTH_METHODS = ["client_secret_basic", "client_secret_post"] + TOKEN_ENDPOINT_AUTH_METHODS = ["none", "client_secret_basic", "client_secret_post"] def save_authorization_code(self, code, request): code_challenge = request.data.get("code_challenge") @@ -238,6 +248,8 @@ def authenticate_user(self, username, password): class RefreshTokenGrant(grants.RefreshTokenGrant): + INCLUDE_NEW_REFRESH_TOKEN = True + def authenticate_refresh_token(self, refresh_token): item = OAuth2Token.objects(refresh_token=refresh_token).first() if item and item.is_refresh_token_valid(): @@ -252,17 +264,19 @@ def revoke_old_credential(self, credential): class RevokeToken(RevocationEndpoint): - def query_token(self, token, token_type_hint, client): - qs = OAuth2Token.objects(client=client) + CLIENT_AUTH_METHODS = ["none", "client_secret_basic"] + + def query_token(self, token_string, token_type_hint): + qs = OAuth2Token.objects() if token_type_hint == "access_token": - return qs.filter(access_token=token).first() + return qs.filter(access_token=token_string).first() elif token_type_hint == "refresh_token": - return qs.filter(refresh_token=token).first() + return qs.filter(refresh_token=token_string).first() else: - qs = qs(db.Q(access_token=token) | db.Q(refresh_token=token)) + qs = qs(db.Q(access_token=token_string) | db.Q(refresh_token=token_string)) return qs.first() - def revoke_token(self, token): + def revoke_token(self, token, _request): token.revoked = True token.save() @@ -295,7 +309,7 @@ def revoke_token(): def authorize(*args, **kwargs): if request.method == "GET": try: - grant = oauth.validate_consent_request(end_user=current_user) + grant = oauth.get_consent_grant(end_user=current_user) except OAuth2Error as error: return error.error # Bypass authorization screen for internal clients @@ -324,13 +338,15 @@ def query_client(client_id): def save_token(token, request): scope = token.pop("scope", "") + client = request.client + user = request.user or client.owner if request.grant_type == "refresh_token": - credential = request.credential - credential.update(scope=scope, **token) + old_token = OAuth2Token.objects( + refresh_token=request.refresh_token.refresh_token, client=client, user=user, scope=scope + ).first() + old_token.update(**token) else: - client = request.client - user = request.user or client.owner - OAuth2Token.objects.create(client=client, user=user.id, scope=scope, **token) + OAuth2Token.objects.create(client=client, user=user, scope=scope, **token) def check_credentials(): diff --git a/udata/api_fields.py b/udata/api_fields.py index 92eebc8f90..46803fe6d9 100644 --- a/udata/api_fields.py +++ b/udata/api_fields.py @@ -2,6 +2,7 @@ import mongoengine import mongoengine.fields as mongo_fields from bson import ObjectId +from flask_restx.inputs import boolean from flask_storage.mongo import ImageField as FlaskStorageImageField import udata.api.fields as custom_restx_fields @@ -92,12 +93,14 @@ def constructor_read(**kwargs): def constructor_write(**kwargs): return restx_fields.List(field_write, **kwargs) + elif isinstance( field, (mongo_fields.GenericReferenceField, mongoengine.fields.GenericLazyReferenceField) ): def constructor(**kwargs): return restx_fields.Nested(lazy_reference, **kwargs) + elif isinstance(field, mongo_fields.ReferenceField): # For reference we accept while writing a String representing the ID of the referenced model. # For reading, if the user supplied a `nested_fields` (RestX model), we use it to convert @@ -120,6 +123,7 @@ def constructor_read(**kwargs): def constructor(**kwargs): return restx_fields.Nested(nested_fields, **kwargs) + elif hasattr(field.document_type_obj, "__read_fields__"): def constructor_read(**kwargs): @@ -127,6 +131,7 @@ def constructor_read(**kwargs): def constructor_write(**kwargs): return restx_fields.Nested(field.document_type_obj.__write_fields__, **kwargs) + else: raise ValueError( f"EmbeddedDocumentField `{key}` requires a `nested_fields` param to serialize/deserialize or a `@generate_fields()` definition." @@ -187,7 +192,10 @@ def wrapper(cls): sortable_key = info.get("sortable", False) if sortable_key: sortables.append( - {"key": sortable_key if isinstance(sortable_key, str) else key, "value": key} + { + "key": sortable_key if isinstance(sortable_key, str) else key, + "value": key, + } ) filterable = info.get("filterable", None) @@ -208,7 +216,7 @@ def wrapper(cls): if "type" not in filterable: filterable["type"] = str if isinstance(field, mongo_fields.BooleanField): - filterable["type"] = bool + filterable["type"] = boolean # We may add more information later here: # - type of mongo query to execute (right now only simple =) @@ -305,7 +313,11 @@ def make_lambda(method): parser.add_argument("q", type=str, location="args") for filterable in filterables: - parser.add_argument(filterable["key"], type=filterable["type"], location="args") + parser.add_argument( + filterable["key"], + type=filterable["type"], + location="args", + ) cls.__index_parser__ = parser @@ -353,6 +365,30 @@ def apply_sort_filters_and_pagination(base_query): phrase_query = " ".join([f'"{elem}"' for elem in args["q"].split(" ")]) base_query = base_query.search_text(phrase_query) + for filterable in filterables: + if args.get(filterable["key"]) is not None: + for constraint in filterable["constraints"]: + if constraint == "objectid" and not ObjectId.is_valid( + args[filterable["key"]] + ): + api.abort(400, f'`{filterable["key"]}` must be an identifier') + + base_query = base_query.filter( + **{ + filterable["column"]: args[filterable["key"]], + } + ) + + if sort_by: + if negate: + sort_by = "-" + sort_by + + base_query = base_query.order_by(sort_by) + + if searchable and args.get("q"): + phrase_query = " ".join([f'"{elem}"' for elem in args["q"].split(" ")]) + base_query = base_query.search_text(phrase_query) + for filterable in filterables: if args.get(filterable["key"]): for constraint in filterable["constraints"]: diff --git a/udata/commands/fixtures.py b/udata/commands/fixtures.py index 860ebb7a3a..0a1d874a08 100644 --- a/udata/commands/fixtures.py +++ b/udata/commands/fixtures.py @@ -26,6 +26,7 @@ from udata.core.organization.models import Member, Organization from udata.core.reuse.factories import ReuseFactory from udata.core.user.factories import UserFactory +from udata.core.user.models import User log = logging.getLogger(__name__) @@ -55,22 +56,21 @@ "quality", ], "resource": ["latest", "preview_url", "last_modified"], - "organization": ["members", "page", "uri", "logo_thumbnail"], - "reuse": ["datasets", "image_thumbnail", "page", "uri", "organization", "owner"], + "organization": ["class", "page", "uri", "logo_thumbnail"], + "reuse": ["datasets", "image_thumbnail", "page", "uri", "owner"], "community": [ "dataset", - "organization", "owner", "latest", "last_modified", "preview_url", ], - "discussion": ["subject", "user", "url", "class"], - "message": ["posted_by"], + "discussion": ["subject", "url", "class"], + "user": ["uri", "page", "class", "avatar_thumbnail", "email"], + "posted_by": ["uri", "page", "class", "avatar_thumbnail", "email"], "dataservice": [ "datasets", "license", - "organization", "owner", "self_api_url", "self_web_url", @@ -80,6 +80,8 @@ def remove_unwanted_keys(obj: dict, filter_type: str) -> dict: """Remove UNWANTED_KEYS from a dict.""" + if filter_type not in UNWANTED_KEYS: + return obj for unwanted_key in UNWANTED_KEYS[filter_type]: if unwanted_key in obj: del obj[unwanted_key] @@ -150,6 +152,29 @@ def generate_fixtures_file(data_source: str, results_filename: str) -> None: print(f"Fixtures saved to file {results_filename}") +def get_or_create(data, key, model, factory): + """Try getting the object. If it doesn't exist yet, create it with the provided factory.""" + if key not in data or data[key] is None: + return + data[key] = remove_unwanted_keys(data[key], key) + obj = model.objects(id=data[key]["id"]).first() + if not obj: + obj = factory(**data[key]) + return obj + + +def get_or_create_organization(data): + return get_or_create(data, "organization", Organization, OrganizationFactory) + + +def get_or_create_owner(data): + return get_or_create(data, "owner", User, UserFactory) + + +def get_or_create_user(data): + return get_or_create(data, "user", User, UserFactory) + + @cli.command() @click.argument("source", default=DEFAULT_FIXTURE_FILE) def import_fixtures(source): @@ -165,52 +190,45 @@ def import_fixtures(source): user = UserFactory() dataset = fixture["dataset"] dataset = remove_unwanted_keys(dataset, "dataset") - if not fixture["organization"]: - dataset = DatasetFactory(**dataset, owner=user) - else: - org = Organization.objects(id=fixture["organization"]["id"]).first() - if not org: - organization = fixture["organization"] - organization = remove_unwanted_keys(organization, "organization") - org = OrganizationFactory(**organization, members=[Member(user=user)]) + if fixture["organization"]: + organization = fixture["organization"] + organization["members"] = [ + Member(user=get_or_create_user(member), role=member["role"]) + for member in organization["members"] + ] + fixture["organization"] = organization + org = get_or_create_organization(fixture) dataset = DatasetFactory(**dataset, organization=org) + else: + dataset = DatasetFactory(**dataset, owner=user) for resource in fixture["resources"]: resource = remove_unwanted_keys(resource, "resource") res = ResourceFactory(**resource) dataset.add_resource(res) for reuse in fixture["reuses"]: reuse = remove_unwanted_keys(reuse, "reuse") - ReuseFactory(**reuse, datasets=[dataset], owner=user) + reuse["owner"] = get_or_create_owner(reuse) + reuse["organization"] = get_or_create_organization(reuse) + ReuseFactory(**reuse, datasets=[dataset]) for community in fixture["community_resources"]: community = remove_unwanted_keys(community, "community") - CommunityResourceFactory(**community, dataset=dataset, owner=user) + community["owner"] = get_or_create_owner(community) + community["organization"] = get_or_create_organization(community) + CommunityResourceFactory(**community, dataset=dataset) for discussion in fixture["discussions"]: discussion = remove_unwanted_keys(discussion, "discussion") - messages = discussion.pop("discussion") - for message in messages: - message = remove_unwanted_keys(message, "message") - DiscussionFactory( - **discussion, - subject=dataset, - user=user, - discussion=[ - MessageDiscussionFactory(**message, posted_by=user) for message in messages - ], - ) + discussion["closed_by"] = get_or_create(discussion, "closed_by", User, UserFactory) + for message in discussion["discussion"]: + message["posted_by"] = get_or_create(message, "posted_by", User, UserFactory) + discussion["discussion"] = [ + MessageDiscussionFactory(**message) for message in discussion["discussion"] + ] + discussion["user"] = get_or_create_user(discussion) + DiscussionFactory(**discussion, subject=dataset) for dataservice in fixture["dataservices"]: dataservice = remove_unwanted_keys(dataservice, "dataservice") - if not dataservice["contact_point"]: - DataserviceFactory(**dataservice, datasets=[dataset], organization=org) - else: - contact_point = ContactPoint.objects( - id=dataservice["contact_point"]["id"] - ).first() - if not contact_point: - contact_point = ContactPointFactory(**dataservice["contact_point"]) - dataservice.pop("contact_point") - DataserviceFactory( - **dataservice, - datasets=[dataset], - organization=org, - contact_point=contact_point, - ) + dataservice["contact_point"] = get_or_create( + dataservice, "contact_point", ContactPoint, ContactPointFactory + ) + dataservice["organization"] = get_or_create_organization(dataservice) + DataserviceFactory(**dataservice, datasets=[dataset]) diff --git a/udata/commands/tests/test_fixtures.py b/udata/commands/tests/test_fixtures.py index d61ba7740b..15c06b9989 100644 --- a/udata/commands/tests/test_fixtures.py +++ b/udata/commands/tests/test_fixtures.py @@ -27,21 +27,28 @@ def test_generate_fixtures_file_then_import(self, app, cli, api, monkeypatch): """Test generating fixtures from the current env, then importing them back.""" assert models.Dataset.objects.count() == 0 # Start with a clean slate. user = UserFactory() - org = OrganizationFactory(**{}, members=[Member(user=user)]) + admin = UserFactory() + org = OrganizationFactory( + members=[Member(user=user, role="editor"), Member(user=admin, role="admin")] + ) # Set the same slug we're 'exporting' from the FIXTURE_DATASET_SLUG config, see the # @pytest.mark.options above. - dataset = DatasetFactory(**{}, slug="some-test-dataset-slug", organization=org) - res = ResourceFactory(**{}) + dataset = DatasetFactory(slug="some-test-dataset-slug", organization=org) + res = ResourceFactory() dataset.add_resource(res) - ReuseFactory(**{}, datasets=[dataset], owner=user) - CommunityResourceFactory(**{}, dataset=dataset, owner=user) + ReuseFactory(datasets=[dataset], owner=user) + CommunityResourceFactory(dataset=dataset, owner=user) DiscussionFactory( **{}, subject=dataset, user=user, - discussion=[MessageDiscussionFactory(**{}, posted_by=user)], + discussion=[ + MessageDiscussionFactory(posted_by=user), + MessageDiscussionFactory(posted_by=admin), + ], + closed_by=admin, ) - DataserviceFactory(**{}, datasets=[dataset]) + DataserviceFactory(datasets=[dataset], organization=org) with NamedTemporaryFile(mode="w+", delete=True) as fixtures_fd: # Get the fixtures from the local instance. @@ -69,13 +76,25 @@ def test_generate_fixtures_file_then_import(self, app, cli, api, monkeypatch): # Then load them in the database to make sure they're correct. result = cli("import-fixtures", fixtures_fd.name) assert models.Organization.objects(slug=org.slug).count() > 0 + result_org = models.Organization.objects.get(slug=org.slug) + assert result_org.members[0].user.id == user.id + assert result_org.members[0].role == "editor" + assert result_org.members[1].user.id == admin.id + assert result_org.members[1].role == "admin" assert models.Dataset.objects.count() > 0 assert models.Discussion.objects.count() > 0 + result_discussion = models.Discussion.objects.first() + assert result_discussion.user.id == user.id + assert result_discussion.closed_by.id == admin.id + assert len(result_discussion.discussion) == 2 + assert result_discussion.discussion[0].posted_by.id == user.id + assert result_discussion.discussion[1].posted_by.id == admin.id assert models.CommunityResource.objects.count() > 0 assert models.User.objects.count() > 0 assert models.Dataservice.objects.count() > 0 # Make sure we also import the dataservice organization - assert models.Dataservice.objects(organization__exists=True).count() > 0 + result_dataservice = models.Dataservice.objects.first() + assert result_dataservice.organization == org def test_import_fixtures_from_default_file(self, cli): """Test importing fixtures from udata.commands.fixture.DEFAULT_FIXTURE_FILE.""" diff --git a/udata/core/activity/api.py b/udata/core/activity/api.py index e5be521f5c..06f3745b56 100644 --- a/udata/core/activity/api.py +++ b/udata/core/activity/api.py @@ -1,5 +1,6 @@ import logging +from bson import ObjectId from mongoengine.errors import DoesNotExist from udata.api import API, api, fields @@ -61,13 +62,19 @@ help="Filter activities for that particular organization", location="args", ) +activity_parser.add_argument( + "related_to", + type=str, + help="Filter activities for that particular object id (ex : reuse, dataset, etc.)", + location="args", +) @api.route("/activity", endpoint="activity") class SiteActivityAPI(API): @api.doc("activity") @api.expect(activity_parser) - @api.marshal_list_with(activity_page_fields) + @api.marshal_with(activity_page_fields) def get(self): """Fetch site activity, optionally filtered by user of org.""" args = activity_parser.parse_args() @@ -79,6 +86,12 @@ def get(self): if args["user"]: qs = qs(actor=args["user"]) + if args["related_to"]: + if not ObjectId.is_valid(args["related_to"]): + api.abort(400, "`related_to` arg must be an identifier") + + qs = qs(related_to=args["related_to"]) + qs = qs.order_by("-created_at") qs = qs.paginate(args["page"], args["page_size"]) diff --git a/udata/core/dataservices/api.py b/udata/core/dataservices/api.py index 49a44b52b1..a8c55eb087 100644 --- a/udata/core/dataservices/api.py +++ b/udata/core/dataservices/api.py @@ -1,18 +1,23 @@ from datetime import datetime import mongoengine -from flask import request +from flask import make_response, redirect, request, url_for from flask_login import current_user from udata.api import API, api from udata.api_fields import patch from udata.core.dataset.permissions import OwnablePermission from udata.core.followers.api import FollowAPI +from udata.rdf import RDF_EXTENSIONS, graph_response, negociate_content from .models import Dataservice +from .permissions import DataserviceEditPermission +from .rdf import dataservice_to_rdf ns = api.namespace("dataservices", "Dataservices related operations (beta)") +common_doc = {"params": {"dataservice": "The dataservice ID or slug"}} + @ns.route("/", endpoint="dataservices") class DataservicesAPI(API): @@ -58,7 +63,10 @@ def get(self, dataservice): @api.expect(Dataservice.__write_fields__) @api.marshal_with(Dataservice.__read_fields__) def patch(self, dataservice): - if dataservice.deleted_at: + if dataservice.deleted_at and not ( + # Allow requests containing "deleted_at: None" to undelete. + "deleted_at" in request.json and request.json.get("deleted_at") is None + ): api.abort(410, "dataservice has been deleted") OwnablePermission(dataservice).test() @@ -87,6 +95,37 @@ def delete(self, dataservice): return "", 204 +@ns.route("//rdf", endpoint="dataservice_rdf", doc=common_doc) +@api.response(404, "Dataservice not found") +@api.response(410, "Dataservice has been deleted") +class DataserviceRdfAPI(API): + @api.doc("rdf_dataservice") + def get(self, dataservice): + format = RDF_EXTENSIONS[negociate_content()] + url = url_for("api.dataservice_rdf_format", dataservice=dataservice.id, format=format) + return redirect(url) + + +@ns.route( + "//rdf.", endpoint="dataservice_rdf_format", doc=common_doc +) +@api.response(404, "Dataservice not found") +@api.response(410, "Dataservice has been deleted") +class DataserviceRdfFormatAPI(API): + @api.doc("rdf_dataservice_format") + def get(self, dataservice, format): + if not DataserviceEditPermission(dataservice).can(): + if dataservice.private: + api.abort(404) + elif dataservice.deleted_at: + api.abort(410) + + resource = dataservice_to_rdf(dataservice) + # bypass flask-restplus make_response, since graph_response + # is handling the content negociation directly + return make_response(*graph_response(resource, format)) + + @ns.route("//followers/", endpoint="dataservice_followers") @ns.doc( get={"id": "list_dataservice_followers"}, diff --git a/udata/core/dataservices/models.py b/udata/core/dataservices/models.py index 89b85a2540..30afc7efc6 100644 --- a/udata/core/dataservices/models.py +++ b/udata/core/dataservices/models.py @@ -1,6 +1,7 @@ from datetime import datetime -from elasticsearch_dsl import Search, query +from elasticsearch_dsl import query +from mongoengine import Q import udata.core.contact_point.api_fields as contact_api_fields import udata.core.dataset.api_fields as datasets_api_fields @@ -68,6 +69,42 @@ def visible(self): def hidden(self): return self(db.Q(private=True) | db.Q(deleted_at__ne=None) | db.Q(archived_at__ne=None)) + def filter_by_dataset_pagination(self, datasets: list[Dataset], page: int): + """Paginate the dataservices on the datasets provided. + + This is a workaround, used (at least) in the catalogs for sites and organizations. + We paginate those kinda weirdly, on their datasets. So a given organization or site + catalog will only list a `page_size` number of datasets, but we'd still want to display + the site's or org's dataservices. + We can't "double paginate", so instead: + - only if it's the first page, list all the dataservices that serve no dataset + - list all the dataservices that serve the datasets in this page + """ + # We need to add Dataservice to the catalog. + # In the best world, we want: + # - Keep the correct number of datasets on the page (if the requested page size is 100, we should have 100 datasets) + # - Have simple MongoDB queries + # - Do not duplicate the datasets (each dataset is present once in the catalog) + # - Do not duplicate the dataservices (each dataservice is present once in the catalog) + # - Every referenced dataset for one dataservices present on the page (hard to do) + # + # Multiple solutions are possible but none check all the constraints. + # The selected one is to put all the dataservices referencing at least one of the dataset on + # the page at the end of it. It means dataservices could be duplicated (present on multiple pages) + # and these dataservices may referenced some datasets not present in the current page. It's working + # if somebody is doing the same thing as us (keeping the list of all the datasets IDs for the entire catalog then + # listing all dataservices in a second pass) + # Another option is to do some tricky Mongo requests to order/group datasets by their presence in some dataservices but + # it could be really hard to do with a n..n relation. + # Let's keep this solution simple right now and iterate on it in the future. + dataservices_filter = Q(datasets__in=[d.id for d in datasets]) + + # On the first page, add all dataservices without datasets + if page == 1: + dataservices_filter = dataservices_filter | Q(datasets__size=0) + + return self(dataservices_filter) + @generate_fields() class HarvestMetadata(db.EmbeddedDocument): @@ -210,7 +247,7 @@ class Dataservice(WithMetrics, Owned, db.Document): readonly=True, searchable=True, ) - deleted_at = field(db.DateTimeField(), readonly=True) + deleted_at = field(db.DateTimeField()) archived_at = field(db.DateTimeField(), readonly=True) datasets = field( diff --git a/udata/core/dataset/events.py b/udata/core/dataset/events.py index 9689e4f9d8..987a96b4c8 100644 --- a/udata/core/dataset/events.py +++ b/udata/core/dataset/events.py @@ -49,7 +49,10 @@ def publish(url, document, resource_id, action): "dataset_id": str(document.id), "document": resource, } - r = requests.post(url, json=payload) + headers = {} + if current_app.config["RESOURCES_ANALYSER_API_KEY"]: + headers = {"Authorization": f"Bearer {current_app.config['RESOURCES_ANALYSER_API_KEY']}"} + r = requests.post(url, json=payload, headers=headers) r.raise_for_status() diff --git a/udata/core/organization/api.py b/udata/core/organization/api.py index f534403b83..3faba1fb4e 100644 --- a/udata/core/organization/api.py +++ b/udata/core/organization/api.py @@ -10,6 +10,7 @@ from udata.core.badges.fields import badge_fields from udata.core.contact_point.api import ContactPointApiParser from udata.core.contact_point.api_fields import contact_point_page_fields +from udata.core.dataservices.models import Dataservice from udata.core.dataset.api import DatasetApiParser from udata.core.dataset.api_fields import dataset_page_fields from udata.core.dataset.models import Dataset @@ -175,7 +176,12 @@ def get(self, org, format): page = int(params.get("page", 1)) page_size = int(params.get("page_size", 100)) datasets = Dataset.objects(organization=org).visible().paginate(page, page_size) - catalog = build_org_catalog(org, datasets, format=format) + dataservices = ( + Dataservice.objects(organization=org) + .visible() + .filter_by_dataset_pagination(datasets, page) + ) + catalog = build_org_catalog(org, datasets, dataservices, format=format) # bypass flask-restplus make_response, since graph_response # is handling the content negociation directly return make_response(*graph_response(catalog, format)) diff --git a/udata/core/organization/api_fields.py b/udata/core/organization/api_fields.py index 4f5d387944..017e884629 100644 --- a/udata/core/organization/api_fields.py +++ b/udata/core/organization/api_fields.py @@ -45,7 +45,7 @@ from udata.core.user.api_fields import user_ref_fields # noqa -def check_can_access_email(): +def check_can_access_user_private_info(): # This endpoint is secure, only organization member has access. if request.endpoint == "api.request_membership": return True @@ -65,8 +65,13 @@ def check_can_access_email(): user_ref_fields, { "email": fields.Raw( - attribute=lambda o: o.email if check_can_access_email() else None, - description="The user email (only present on show organization endpoint if the current user has edit permission on the org)", + attribute=lambda o: o.email if check_can_access_user_private_info() else None, + description="The user email (only present on show organization endpoint if the current user is member of the organization: admin or editor)", + readonly=True, + ), + "last_login_at": fields.Raw( + attribute=lambda o: o.last_login_at if check_can_access_user_private_info() else None, + description="The user last connection date (only present on show organization endpoint if the current user is member of the organization: admin or editor)", readonly=True, ), }, diff --git a/udata/core/organization/csv.py b/udata/core/organization/csv.py index aebb4fa6e4..9a56b3f72e 100644 --- a/udata/core/organization/csv.py +++ b/udata/core/organization/csv.py @@ -11,6 +11,7 @@ class OrganizationCsvAdapter(csv.Adapter): fields = ( "id", "name", + "acronym", "slug", ("url", "external_url"), "description", diff --git a/udata/core/organization/rdf.py b/udata/core/organization/rdf.py index 26d089da7a..1dce7404b0 100644 --- a/udata/core/organization/rdf.py +++ b/udata/core/organization/rdf.py @@ -7,6 +7,7 @@ from rdflib import BNode, Graph, Literal, URIRef from rdflib.namespace import FOAF, RDF, RDFS +from udata.core.dataservices.rdf import dataservice_to_rdf from udata.core.dataset.rdf import dataset_to_rdf from udata.rdf import DCAT, DCT, namespace_manager, paginate_catalog from udata.uris import endpoint_for @@ -35,7 +36,7 @@ def organization_to_rdf(org, graph=None): return o -def build_org_catalog(org, datasets, format=None): +def build_org_catalog(org, datasets, dataservices, format=None): graph = Graph(namespace_manager=namespace_manager) org_catalog_url = url_for("api.organization_rdf", org=org.id, _external=True) @@ -47,6 +48,8 @@ def build_org_catalog(org, datasets, format=None): for dataset in datasets: catalog.add(DCAT.dataset, dataset_to_rdf(dataset, graph)) + for dataservice in dataservices: + catalog.add(DCAT.dataservice, dataservice_to_rdf(dataservice, graph)) values = {"org": org.id} diff --git a/udata/core/owned.py b/udata/core/owned.py index aa5ce1f429..90a4979df8 100644 --- a/udata/core/owned.py +++ b/udata/core/owned.py @@ -24,6 +24,22 @@ def owned_by(self, *owners): qs |= Q(owner=owner) | Q(organization=owner) return self(qs) + def visible_by_user(self, user: User, visible_query: Q): + """Return EVERYTHING visible to the user.""" + if user.is_anonymous: + return self(visible_query) + + if user.sysadmin: + return self() + + owners: list[User | Organization] = list(user.organizations) + [user.id] + # We create a new queryset because we want a pristine self._query_obj. + owned_qs: OwnedQuerySet = self.__class__(self._document, self._collection_obj).owned_by( + *owners + ) + + return self(visible_query | owned_qs._query_obj) + def check_owner_is_current_user(owner): from udata.auth import admin_permission, current_user diff --git a/udata/core/reuse/api.py b/udata/core/reuse/api.py index cb56624e39..74f8fb0604 100644 --- a/udata/core/reuse/api.py +++ b/udata/core/reuse/api.py @@ -99,8 +99,9 @@ class ReuseListAPI(API): @api.expect(Reuse.__index_parser__) @api.marshal_with(Reuse.__page_fields__) def get(self): - query = Reuse.objects(deleted=None, private__ne=True) - + query = Reuse.objects.visible_by_user( + current_user, mongoengine.Q(private__ne=True, deleted=None) + ) return Reuse.apply_sort_filters_and_pagination(query) @api.secure diff --git a/udata/core/reuse/models.py b/udata/core/reuse/models.py index d456691929..b5c1f4a46c 100644 --- a/udata/core/reuse/models.py +++ b/udata/core/reuse/models.py @@ -104,7 +104,7 @@ class Reuse(db.Datetimed, WithMetrics, BadgeMixin, Owned, db.Document): ) # badges = db.ListField(db.EmbeddedDocumentField(ReuseBadge)) - private = field(db.BooleanField(default=False)) + private = field(db.BooleanField(default=False), filterable={}) ext = db.MapField(db.GenericEmbeddedDocumentField()) extras = field(db.ExtrasField()) @@ -116,7 +116,6 @@ class Reuse(db.Datetimed, WithMetrics, BadgeMixin, Owned, db.Document): ) deleted = field( db.DateTimeField(), - readonly=True, ) archived = field( db.DateTimeField(), diff --git a/udata/core/site/api.py b/udata/core/site/api.py index bcc50dcb51..9fe8928086 100644 --- a/udata/core/site/api.py +++ b/udata/core/site/api.py @@ -1,6 +1,5 @@ from bson import ObjectId from flask import json, make_response, redirect, request, url_for -from mongoengine import Q from udata.api import API, api, fields from udata.auth import admin_permission @@ -107,31 +106,7 @@ def get(self, format): if "tag" in params: datasets = datasets.filter(tags=params.get("tag", "")) datasets = datasets.paginate(page, page_size) - - # We need to add Dataservice to the catalog. - # In the best world, we want: - # - Keep the correct number of datasets on the page (if the requested page size is 100, we should have 100 datasets) - # - Have simple MongoDB queries - # - Do not duplicate the datasets (each dataset is present once in the catalog) - # - Do not duplicate the dataservices (each dataservice is present once in the catalog) - # - Every referenced dataset for one dataservices present on the page (hard to do) - # - # Multiple solutions are possible but none check all the constraints. - # The selected one is to put all the dataservices referencing at least one of the dataset on - # the page at the end of it. It means dataservices could be duplicated (present on multiple pages) - # and these dataservices may referenced some datasets not present in the current page. It's working - # if somebody is doing the same thing as us (keeping the list of all the datasets IDs for the entire catalog then - # listing all dataservices in a second pass) - # Another option is to do some tricky Mongo requests to order/group datasets by their presence in some dataservices but - # it could be really hard to do with a n..n relation. - # Let's keep this solution simple right now and iterate on it in the future. - dataservices_filter = Q(datasets__in=[d.id for d in datasets]) - - # On the first page, add all dataservices without datasets - if page == 1: - dataservices_filter = dataservices_filter | Q(datasets__size=0) - - dataservices = Dataservice.objects.visible().filter(dataservices_filter) + dataservices = Dataservice.objects.visible().filter_by_dataset_pagination(datasets, page) catalog = build_catalog(current_site, datasets, dataservices=dataservices, format=format) # bypass flask-restplus make_response, since graph_response diff --git a/udata/core/user/api.py b/udata/core/user/api.py index 7a787e8f07..b59e454271 100644 --- a/udata/core/user/api.py +++ b/udata/core/user/api.py @@ -1,3 +1,5 @@ +from typing import Optional + from flask_security import current_user, logout_user from slugify import slugify @@ -359,8 +361,27 @@ def post(self, id): suggest_parser.add_argument( "q", help="The string to autocomplete/suggest", location="args", required=True ) + + +def suggest_size(value: str) -> Optional[int]: + """Parse an integer that must be between 1 and 20.""" + help_message = "The size must be an integer between 1 and 20." + try: + parsed = int(value) + except ValueError: + raise ValueError(help_message) + + if parsed < 1 or parsed > 20: + raise ValueError(help_message) + return parsed + + suggest_parser.add_argument( - "size", type=int, help="The amount of suggestion to fetch", location="args", default=10 + "size", + type=suggest_size, + help="The amount of suggestion to fetch (between 1 and 20)", + location="args", + default=10, ) diff --git a/udata/cors.py b/udata/cors.py index 77cecf5c73..94a267784d 100644 --- a/udata/cors.py +++ b/udata/cors.py @@ -1,6 +1,6 @@ import logging -from flask import request +from flask import g, request from werkzeug.datastructures import Headers log = logging.getLogger(__name__) @@ -15,10 +15,10 @@ def add_vary(headers: Headers, header: str): def add_actual_request_headers(headers: Headers) -> Headers: origin = request.headers.get("Origin", None) + add_vary(headers, "Origin") + if origin: headers.set("Access-Control-Allow-Origin", origin) - add_vary(headers, "Origin") - headers.set("Access-Control-Allow-Credentials", "true") return headers @@ -32,19 +32,24 @@ def is_preflight_request() -> bool: def is_allowed_cors_route(): + if g and hasattr(g, "lang_code"): + path: str = request.path.removeprefix(f"/{g.lang_code}") + else: + path: str = request.path return ( - request.path.endswith((".js", ".css", ".woff", ".woff2", ".png", ".jpg", ".jpeg", ".svg")) - or request.path.startswith("/api") - or request.path.startswith("/oauth") + path.endswith((".js", ".css", ".woff", ".woff2", ".png", ".jpg", ".jpeg", ".svg")) + or path.startswith("/api") + or path.startswith("/oauth") + or path.startswith("/datasets/r/") ) def add_preflight_request_headers(headers: Headers) -> Headers: origin = request.headers.get("Origin", None) + add_vary(headers, "Origin") + if origin: headers.set("Access-Control-Allow-Origin", origin) - add_vary(headers, "Origin") - headers.set("Access-Control-Allow-Credentials", "true") # The API allows all methods, so just copy the browser requested methods from the request headers. diff --git a/udata/settings.py b/udata/settings.py index 9b3815821b..9448c73a0b 100644 --- a/udata/settings.py +++ b/udata/settings.py @@ -522,6 +522,7 @@ class Defaults(object): FIXTURE_DATASET_SLUGS = [] PUBLISH_ON_RESOURCE_EVENTS = False RESOURCES_ANALYSER_URI = "http://localhost:8000" + RESOURCES_ANALYSER_API_KEY = None # Datasets quality settings ########################################################################### diff --git a/udata/tests/api/test_activities_api.py b/udata/tests/api/test_activities_api.py new file mode 100644 index 0000000000..797d7d286f --- /dev/null +++ b/udata/tests/api/test_activities_api.py @@ -0,0 +1,69 @@ +import pytest +from flask import url_for +from werkzeug.test import TestResponse + +from udata.core.activity.models import Activity +from udata.core.dataset.factories import DatasetFactory +from udata.core.dataset.models import Dataset +from udata.core.reuse.factories import ReuseFactory +from udata.core.reuse.models import Reuse +from udata.core.user.factories import UserFactory +from udata.mongo import db +from udata.tests.helpers import assert200, assert400 + +pytestmark = [ + pytest.mark.usefixtures("clean_db"), +] + + +class FakeDatasetActivity(Activity): + key = "fakeDataset" + related_to = db.ReferenceField(Dataset, required=True) + + +class FakeReuseActivity(Activity): + key = "fakeReuse" + related_to = db.ReferenceField(Reuse, required=True) + + +class ActivityAPITest: + modules = [] + + def test_activity_api_list(self, api) -> None: + """It should fetch an activity list from the API""" + activities: list[Activity] = [ + FakeDatasetActivity.objects.create(actor=UserFactory(), related_to=DatasetFactory()), + FakeReuseActivity.objects.create(actor=UserFactory(), related_to=ReuseFactory()), + ] + + response: TestResponse = api.get(url_for("api.activity")) + assert200(response) + assert len(response.json["data"]) == len(activities) + + def test_activity_api_list_filter_by_bogus_related_to(self, api) -> None: + """It should return a 400 error if the `related_to` parameter isn't a valid ObjectId.""" + response: TestResponse = api.get(url_for("api.activity", related_to="foobar")) + assert400(response) + + def test_activity_api_list_filtered_by_related_to(self, api) -> None: + """It should only return activities that correspond to the `related_to` parameter.""" + dataset1: Dataset = DatasetFactory() + dataset2: Dataset = DatasetFactory() + reuse: Reuse = ReuseFactory() + _activities: list[Activity] = [ + FakeDatasetActivity.objects.create(actor=UserFactory(), related_to=dataset1), + FakeDatasetActivity.objects.create(actor=UserFactory(), related_to=dataset1), + FakeDatasetActivity.objects.create(actor=UserFactory(), related_to=dataset2), + FakeReuseActivity.objects.create(actor=UserFactory(), related_to=reuse), + ] + + response: TestResponse = api.get(url_for("api.activity", related_to=dataset1.id)) + assert200(response) + len(response.json["data"]) == 2 + assert response.json["data"][0]["related_to"] == dataset1.title + assert response.json["data"][1]["related_to"] == dataset1.title + + response: TestResponse = api.get(url_for("api.activity", related_to=reuse.id)) + assert200(response) + len(response.json["data"]) == 1 + assert response.json["data"][0]["related_to"] == reuse.title diff --git a/udata/tests/api/test_auth_api.py b/udata/tests/api/test_auth_api.py index 8888ade293..834d4ae4ba 100644 --- a/udata/tests/api/test_auth_api.py +++ b/udata/tests/api/test_auth_api.py @@ -59,6 +59,7 @@ def oauth(app, request): name="test-client", owner=UserFactory(), redirect_uris=["https://test.org/callback"], + secret="suchs3cr3t", ) kwargs.update(custom_kwargs) return OAuth2Client.objects.create(**kwargs) @@ -301,7 +302,7 @@ def test_authorization_accept_no_wildcard(self, client, oauth): assert_status(response, 400) assert "error" in response.json - assert "redirect_uri" in response.json["error_description"] + assert "Redirect URI" in response.json["error_description"] @pytest.mark.options(OAUTH2_ALLOW_WILDCARD_IN_REDIRECT_URI=True) @pytest.mark.oauth(redirect_uris=["https://*.test.org/callback"]) @@ -327,7 +328,7 @@ def test_authorization_accept_wrong_wildcard(self, client, oauth): assert_status(response, 400) assert "error" in response.json - assert "redirect_uri" in response.json["error_description"] + assert "Redirect URI" in response.json["error_description"] def test_authorization_grant_token(self, client, oauth): client.login() @@ -360,6 +361,8 @@ def test_authorization_grant_token(self, client, oauth): assert200(response) assert response.content_type == "application/json" assert "access_token" in response.json + tokens = OAuth2Token.objects(access_token=response.json["access_token"]) + assert len(tokens) == 1 # A token has been created and saved. def test_s256_code_challenge_success_client_secret_basic(self, client, oauth): code_verifier = generate_token(48) @@ -454,6 +457,93 @@ def test_s256_code_challenge_success_client_secret_post(self, client, oauth): assert response.content_type == "application/json" assert response.json == {"success": True} + @pytest.mark.oauth(secret=None) + def test_s256_code_challenge_success_no_client_secret(self, client, oauth): + """Authenticate through an OAuth client that has no secret associated (public client)""" + code_verifier = generate_token(48) + code_challenge = create_s256_code_challenge(code_verifier) + + client.login() + + response = client.post( + url_for( + "oauth.authorize", + response_type="code", + client_id=oauth.client_id, + code_challenge=code_challenge, + code_challenge_method="S256", + ), + { + "scope": "default", + "accept": "", + }, + ) + assert "code=" in response.location + + params = dict(url_decode(urlparse.urlparse(response.location).query)) + code = params["code"] + + response = client.post( + url_for("oauth.token"), + { + "grant_type": "authorization_code", + "code": code, + "code_verifier": code_verifier, + "client_id": oauth.client_id, + }, + ) + + assert200(response) + assert response.content_type == "application/json" + assert "access_token" in response.json + + token = response.json["access_token"] + + response = client.post( + url_for("api.fake"), headers={"Authorization": " ".join(["Bearer", token])} + ) + + assert200(response) + assert response.content_type == "application/json" + assert response.json == {"success": True} + + def test_s256_code_challenge_missing_client_secret(self, client, oauth): + """Fail authentication through an OAuth client with missing secret""" + code_verifier = generate_token(48) + code_challenge = create_s256_code_challenge(code_verifier) + + client.login() + + response = client.post( + url_for( + "oauth.authorize", + response_type="code", + client_id=oauth.client_id, + code_challenge=code_challenge, + code_challenge_method="S256", + ), + { + "scope": "default", + "accept": "", + }, + ) + assert "code=" in response.location + + params = dict(url_decode(urlparse.urlparse(response.location).query)) + code = params["code"] + + response = client.post( + url_for("oauth.token"), + { + "grant_type": "authorization_code", + "code": code, + "code_verifier": code_verifier, + "client_id": oauth.client_id, + }, + ) + + assert401(response) + def test_authorization_multiple_grant_token(self, client, oauth): for i in range(3): client.login() @@ -582,23 +672,36 @@ def test_invalid_implicit_grant_token(self, client, oauth): ) assert_status(response, 400) - assert response.json["error"] == "invalid_grant" + assert response.json["error"] == "unsupported_response_type" @pytest.mark.oauth(confidential=True) def test_refresh_token(self, client, oauth): user = UserFactory() - token = OAuth2Token.objects.create( + token_to_be_refreshed = OAuth2Token.objects.create( client=oauth, user=user, access_token="access-token", refresh_token="refresh-token", ) + token_same_user_not_refreshed = OAuth2Token.objects.create( + client=oauth, + user=user, + access_token="same-user-access-token", + refresh_token="same-user-refresh-token", + ) + other_token = OAuth2Token.objects.create( + client=oauth, + user=UserFactory(), + access_token="other-access-token", + refresh_token="other-refresh-token", + ) + tokens_count = OAuth2Token.objects.count() response = client.post( url_for("oauth.token"), { "grant_type": "refresh_token", - "refresh_token": token.refresh_token, + "refresh_token": token_to_be_refreshed.refresh_token, }, headers=basic_header(oauth), ) @@ -607,6 +710,26 @@ def test_refresh_token(self, client, oauth): assert response.content_type == "application/json" assert "access_token" in response.json + # Reload from the DB. + token_to_be_refreshed.reload() + token_same_user_not_refreshed.reload() + other_token.reload() + + assert tokens_count == OAuth2Token.objects.count() # No new token created. + + # The access token has been refreshed. + assert token_to_be_refreshed.access_token != "access-token" + # The refresh token is also updated. + assert token_to_be_refreshed.refresh_token != "refresh-token" + + # No change to the user's other token. + assert token_same_user_not_refreshed.access_token == "same-user-access-token" + assert token_same_user_not_refreshed.refresh_token == "same-user-refresh-token" + + # No change to other token. + assert other_token.access_token == "other-access-token" + assert other_token.refresh_token == "other-refresh-token" + @pytest.mark.parametrize("token_type", ["access_token", "refresh_token"]) def test_revoke_token(self, client, oauth, token_type): user = UserFactory() diff --git a/udata/tests/api/test_dataservices_api.py b/udata/tests/api/test_dataservices_api.py index 0158b3828b..360e89e3ea 100644 --- a/udata/tests/api/test_dataservices_api.py +++ b/udata/tests/api/test_dataservices_api.py @@ -1,6 +1,8 @@ import time from datetime import datetime +from xml.etree.ElementTree import XML +import pytest from flask import url_for from udata.core.badges.models import Badge @@ -12,6 +14,7 @@ from udata.core.organization.models import Member from udata.core.user.factories import UserFactory from udata.i18n import gettext as _ +from udata.tests.helpers import assert200, assert_redirects from . import APITestCase @@ -102,8 +105,29 @@ def test_dataservice_api_create(self): self.assertEqual(dataservice.base_api_url, "https://example.org") self.assertIsNotNone(dataservice.deleted_at) - # response = self.get(url_for('api.dataservice', dataservice=dataservice)) - # self.assert410(response) + # We can access deleted element as the creator + response = self.get(url_for("api.dataservice", dataservice=dataservice)) + self.assert200(response) + + # We cannot access deleted element as random user + self.login() + response = self.get(url_for("api.dataservice", dataservice=dataservice)) + self.assert410(response) + + # We can undelete with a patch + self.login(user) + response = self.patch( + url_for("api.dataservice", dataservice=dataservice), + { + "title": "Undeleted title", + "deleted_at": None, + }, + ) + self.assert200(response) + + dataservice.reload() + self.assertEqual(dataservice.title, "Undeleted title") + self.assertIsNone(dataservice.deleted_at) def test_dataservice_api_index(self): dataset_a = DatasetFactory() @@ -409,3 +433,57 @@ def test_elasticsearch(self): assert dataservices[0]["title"] == dataservice_a.title assert dataservices[1]["title"] == dataservice_c.title + + +@pytest.mark.frontend +class DataserviceRdfViewsTest: + def test_rdf_default_to_jsonld(self, client): + dataservice = DataserviceFactory() + expected = url_for("api.dataservice_rdf_format", dataservice=dataservice.id, format="json") + response = client.get(url_for("api.dataservice_rdf", dataservice=dataservice)) + assert_redirects(response, expected) + + def test_rdf_perform_content_negociation(self, client): + dataservice = DataserviceFactory() + expected = url_for("api.dataservice_rdf_format", dataservice=dataservice.id, format="xml") + url = url_for("api.dataservice_rdf", dataservice=dataservice) + headers = {"accept": "application/xml"} + response = client.get(url, headers=headers) + assert_redirects(response, expected) + + def test_rdf_perform_content_negociation_response(self, client): + """Check we have valid XML as output""" + dataservice = DataserviceFactory() + url = url_for("api.dataservice_rdf", dataservice=dataservice) + headers = {"accept": "application/xml"} + response = client.get(url, headers=headers, follow_redirects=True) + element = XML(response.data) + assert element.tag == "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF" + + def test_dataservice_rdf_json_ld(self, client): + dataservice = DataserviceFactory() + for fmt in "json", "jsonld": + url = url_for("api.dataservice_rdf_format", dataservice=dataservice, format=fmt) + response = client.get(url, headers={"Accept": "application/ld+json"}) + assert200(response) + assert response.content_type == "application/ld+json" + assert response.json["@context"]["@vocab"] == "http://www.w3.org/ns/dcat#" + + @pytest.mark.parametrize( + "fmt,mime", + [ + ("n3", "text/n3"), + ("nt", "application/n-triples"), + ("ttl", "application/x-turtle"), + ("xml", "application/rdf+xml"), + ("rdf", "application/rdf+xml"), + ("owl", "application/rdf+xml"), + ("trig", "application/trig"), + ], + ) + def test_dataservice_rdf_formats(self, client, fmt, mime): + dataservice = DataserviceFactory() + url = url_for("api.dataservice_rdf_format", dataservice=dataservice, format=fmt) + response = client.get(url, headers={"Accept": mime}) + assert200(response) + assert response.content_type == mime diff --git a/udata/tests/api/test_reuses_api.py b/udata/tests/api/test_reuses_api.py index 7780107eda..b1927913ee 100644 --- a/udata/tests/api/test_reuses_api.py +++ b/udata/tests/api/test_reuses_api.py @@ -2,6 +2,7 @@ import pytest from flask import url_for +from werkzeug.test import TestResponse from udata.core.badges.factories import badge_factory from udata.core.dataset.factories import DatasetFactory @@ -25,6 +26,11 @@ ] +def reuse_in_response(response: TestResponse, reuse: Reuse) -> bool: + only_reuse = [r for r in response.json["data"] if r["id"] == str(reuse.id)] + return len(only_reuse) > 0 + + class ReuseAPITest: modules = [] @@ -83,6 +89,12 @@ def test_reuse_api_list_with_filters(self, api): assert len(response.json["data"]) == 1 assert response.json["data"][0]["id"] == str(featured_reuse.id) + response = api.get(url_for("api.reuses", featured="false")) + assert200(response) + # Keep only featured reuses (if any) + data = [reuse for reuse in response.json["data"] if reuse["featured"]] + assert len(data) == 0 # It did not return any featured reuse + # filter on topic response = api.get(url_for("api.reuses", topic=topic_reuse.topic)) assert200(response) @@ -113,6 +125,104 @@ def test_reuse_api_list_with_filters(self, api): response = api.get(url_for("api.reuses", organization="org-id")) assert400(response) + def test_reuse_api_list_filter_private(self, api) -> None: + """Should filters reuses results based on the `private` filter""" + user = UserFactory() + public_reuse: Reuse = ReuseFactory() + private_reuse: Reuse = ReuseFactory(private=True, owner=user) + + # Only public reuses for non-authenticated user. + response: TestResponse = api.get(url_for("api.reuses")) + assert200(response) + assert len(response.json["data"]) == 1 + assert reuse_in_response(response, public_reuse) + + # With an authenticated user. + api.login(user) + # all the reuses (by default) + response = api.get(url_for("api.reuses")) + assert200(response) + assert len(response.json["data"]) == 2 # Return everything + assert reuse_in_response(response, public_reuse) + assert reuse_in_response(response, private_reuse) + + # only public + response = api.get(url_for("api.reuses", private="false")) + assert200(response) + assert len(response.json["data"]) == 1 # Don't return the private reuse + assert reuse_in_response(response, public_reuse) + + # only private + response = api.get(url_for("api.reuses", private="true")) + assert200(response) + assert len(response.json["data"]) == 1 # Return only the private + assert reuse_in_response(response, private_reuse) + + def test_reuse_api_list_filter_private_only_owned_by_user(self, api) -> None: + """Should only return private reuses that are owned.""" + user = UserFactory() + member = Member(user=user, role="editor") + org = OrganizationFactory(members=[member]) + private_owned: Reuse = ReuseFactory(private=True, owner=user) + private_owned_through_org: Reuse = ReuseFactory(private=True, organization=org) + private_not_owned: Reuse = ReuseFactory(private=True) + + # Only public reuses for non-authenticated user. + response: TestResponse = api.get(url_for("api.reuses")) + assert200(response) + assert len(response.json["data"]) == 0 + + # With an authenticated user. + api.login(user) + response = api.get(url_for("api.reuses")) + assert200(response) + assert len(response.json["data"]) == 2 # Only the owned reuses + assert reuse_in_response(response, private_owned) + assert reuse_in_response(response, private_owned_through_org) + assert not reuse_in_response(response, private_not_owned) + + # Still no private returned if `private=False` + response = api.get(url_for("api.reuses", private=False)) + assert200(response) + assert len(response.json["data"]) == 0 + + # Still only return owned private reuses + response = api.get(url_for("api.reuses", private=True)) + assert200(response) + assert len(response.json["data"]) == 2 # Only the owned reuses + assert reuse_in_response(response, private_owned) + assert reuse_in_response(response, private_owned_through_org) + assert not reuse_in_response(response, private_not_owned) + + def test_reuse_api_list_filter_private_only_owned_by_user_no_user(self, api) -> None: + """Shouldn't return any private reuses for non logged in users.""" + user = UserFactory() + member = Member(user=user, role="editor") + org = OrganizationFactory(members=[member]) + public_owned: Reuse = ReuseFactory(owner=user) + public_not_owned: Reuse = ReuseFactory() + _private_owned: Reuse = ReuseFactory(private=True, owner=user) + _private_owned_through_org: Reuse = ReuseFactory(private=True, organization=org) + _private_not_owned: Reuse = ReuseFactory(private=True) + + response: TestResponse = api.get(url_for("api.reuses")) + assert200(response) + assert len(response.json["data"]) == 2 + assert reuse_in_response(response, public_owned) + assert reuse_in_response(response, public_not_owned) + + # Still no private returned if `private=False` + response = api.get(url_for("api.reuses", private=False)) + assert200(response) + assert len(response.json["data"]) == 2 + assert reuse_in_response(response, public_owned) + assert reuse_in_response(response, public_not_owned) + + # Still no private returned if `private=True` + response = api.get(url_for("api.reuses", private=True)) + assert200(response) + assert len(response.json["data"]) == 0 + def test_reuse_api_get(self, api): """It should fetch a reuse from the API""" reuse = ReuseFactory() @@ -227,6 +337,11 @@ def test_reuse_api_delete(self, api): assert Reuse.objects.count() == 1 assert Reuse.objects[0].deleted is not None + response = api.put(url_for("api.reuse", reuse=reuse), {"deleted": None}) + assert200(response) + assert Reuse.objects.count() == 1 + assert Reuse.objects[0].deleted is None + def test_reuse_api_delete_deleted(self, api): """It should not delete a deleted reuse from the API and raise 410""" api.login() diff --git a/udata/tests/api/test_tags_api.py b/udata/tests/api/test_tags_api.py index 7d05efddbd..27023cad1b 100644 --- a/udata/tests/api/test_tags_api.py +++ b/udata/tests/api/test_tags_api.py @@ -13,7 +13,7 @@ class TagsAPITest: def test_suggest_tags_api(self, api): """It should suggest tags""" for i in range(3): - tags = [faker.word(), faker.word(), "test", "test-{0}".format(i)] + tags = [faker.tag(), faker.tag(), "test", "test-{0}".format(i)] ReuseFactory(tags=tags, visible=True) DatasetFactory(tags=tags, visible=True) @@ -33,7 +33,7 @@ def test_suggest_tags_api(self, api): def test_suggest_tags_api_with_unicode(self, api): """It should suggest tags""" for i in range(3): - tags = [faker.word(), faker.word(), "testé", "testé-{0}".format(i)] + tags = [faker.tag(), faker.tag(), "testé", "testé-{0}".format(i)] ReuseFactory(tags=tags, visible=True) DatasetFactory(tags=tags, visible=True) diff --git a/udata/tests/api/test_user_api.py b/udata/tests/api/test_user_api.py index af1263a876..91bcff5ed6 100644 --- a/udata/tests/api/test_user_api.py +++ b/udata/tests/api/test_user_api.py @@ -141,6 +141,17 @@ def test_suggest_users_api_no_dedup(self): self.assertEqual(suggestion["first_name"], "test") self.assertEqual(suggestion["last_name"], "homonym") + def test_suggest_users_api_size_validation(self): + """It should validate that the size parameter is between 1 and 20.""" + response = self.get(url_for("api.suggest_users"), qs={"q": "foobar", "size": "0"}) + self.assert400(response) + self.assertIn("between 1 and 20", response.json["errors"]["size"]) + + response = self.get(url_for("api.suggest_users"), qs={"q": "foobar", "size": "21"}) + + self.assert400(response) + self.assertIn("between 1 and 20", response.json["errors"]["size"]) + def test_user_api_full_text_search_first_name(self): """It should find users based on first name""" self.login(AdminFactory()) diff --git a/udata/tests/dataset/test_dataset_events.py b/udata/tests/dataset/test_dataset_events.py index ec49f19fce..cd25314eea 100644 --- a/udata/tests/dataset/test_dataset_events.py +++ b/udata/tests/dataset/test_dataset_events.py @@ -15,6 +15,29 @@ @pytest.mark.usefixtures("enable_resource_event") class DatasetEventsTest: @patch("requests.post") + @pytest.mark.options(RESOURCES_ANALYSER_API_KEY=None) + def test_publish_message_resource_created_no_api_key(self, mock_req): + dataset = DatasetFactory() + resource = ResourceFactory() + expected_signals = (Dataset.on_resource_added,) + + with assert_emit(*expected_signals): + dataset.add_resource(resource) + + expected_value = { + "resource_id": str(resource.id), + "dataset_id": str(dataset.id), + "document": serialize_resource_for_event(resource), + } + + mock_req.assert_called_with( + f"{current_app.config['RESOURCES_ANALYSER_URI']}/api/resource/created/", + json=expected_value, + headers={}, # No RESOURCES_ANALYSER_API_KEY, no headers. + ) + + @patch("requests.post") + @pytest.mark.options(RESOURCES_ANALYSER_API_KEY="foobar-api-key") def test_publish_message_resource_created(self, mock_req): dataset = DatasetFactory() resource = ResourceFactory() @@ -32,9 +55,11 @@ def test_publish_message_resource_created(self, mock_req): mock_req.assert_called_with( f"{current_app.config['RESOURCES_ANALYSER_URI']}/api/resource/created/", json=expected_value, + headers={"Authorization": "Bearer foobar-api-key"}, ) @patch("requests.post") + @pytest.mark.options(RESOURCES_ANALYSER_API_KEY="foobar-api-key") def test_publish_message_resource_modified(self, mock_req): resource = ResourceFactory(schema=Schema(url="http://localhost/my-schema")) dataset = DatasetFactory(resources=[resource]) @@ -54,6 +79,7 @@ def test_publish_message_resource_modified(self, mock_req): mock_req.assert_called_with( f"{current_app.config['RESOURCES_ANALYSER_URI']}/api/resource/updated/", json=expected_value, + headers={"Authorization": "Bearer foobar-api-key"}, ) # Mocking requests call doesn't call the JSON encoder @@ -62,6 +88,7 @@ def test_publish_message_resource_modified(self, mock_req): complexjson.dumps(expected_value) @patch("requests.post") + @pytest.mark.options(RESOURCES_ANALYSER_API_KEY="foobar-api-key") def test_publish_message_resource_removed(self, mock_req): resource = ResourceFactory() dataset = DatasetFactory(resources=[resource]) @@ -79,4 +106,5 @@ def test_publish_message_resource_removed(self, mock_req): mock_req.assert_called_with( f"{current_app.config['RESOURCES_ANALYSER_URI']}/api/resource/deleted/", json=expected_value, + headers={"Authorization": "Bearer foobar-api-key"}, ) diff --git a/udata/tests/dataset/test_dataset_rdf.py b/udata/tests/dataset/test_dataset_rdf.py index 62a212adff..6844e3d38e 100644 --- a/udata/tests/dataset/test_dataset_rdf.py +++ b/udata/tests/dataset/test_dataset_rdf.py @@ -84,7 +84,7 @@ def test_all_dataset_fields(self): resources = ResourceFactory.build_batch(3) org = OrganizationFactory(name="organization") dataset = DatasetFactory( - tags=faker.words(nb=3), + tags=faker.tags(nb=3), resources=resources, frequency="daily", acronym="acro", @@ -295,7 +295,7 @@ def test_all_fields(self): title = faker.sentence() acronym = faker.word() description = faker.paragraph() - tags = faker.words(nb=3) + tags = faker.tags(nb=3) start = faker.past_date(start_date="-30d") end = faker.future_date(end_date="+30d") g.set((node, RDF.type, DCAT.Dataset)) @@ -347,8 +347,8 @@ def test_theme_and_tags(self): node = BNode() g = Graph() - tags = faker.words(nb=3) - themes = faker.words(nb=3) + tags = faker.tags(nb=3) + themes = faker.tags(nb=3) g.add((node, RDF.type, DCAT.Dataset)) g.add((node, DCT.title, Literal(faker.sentence()))) for tag in tags: diff --git a/udata/tests/frontend/test_csv.py b/udata/tests/frontend/test_csv.py index 5b87fe91ae..3291a72810 100644 --- a/udata/tests/frontend/test_csv.py +++ b/udata/tests/frontend/test_csv.py @@ -59,7 +59,7 @@ class Meta: title = factory.LazyAttribute(lambda o: faker.sentence()) description = factory.LazyAttribute(lambda o: faker.paragraph()) - tags = factory.LazyAttribute(lambda o: [faker.word() for _ in range(1, randint(1, 4))]) + tags = factory.LazyAttribute(lambda o: faker.tags(randint(1, 4))) sub = factory.SubFactory(NestedFactory) diff --git a/udata/tests/organization/test_organization_rdf.py b/udata/tests/organization/test_organization_rdf.py index c12a795c27..f85b712d7d 100644 --- a/udata/tests/organization/test_organization_rdf.py +++ b/udata/tests/organization/test_organization_rdf.py @@ -4,6 +4,8 @@ from rdflib.resource import Resource as RdfResource from udata import api +from udata.core.dataservices.factories import DataserviceFactory +from udata.core.dataservices.models import Dataservice from udata.core.dataset.factories import DatasetFactory from udata.core.dataset.models import Dataset from udata.core.organization.factories import OrganizationFactory @@ -55,7 +57,8 @@ def test_catalog(self): uri = url_for("api.organization_rdf", org=origin_org.id, _external=True) datasets = DatasetFactory.create_batch(3, organization=origin_org) - catalog = build_org_catalog(origin_org, datasets) + dataservices = DataserviceFactory.create_batch(3, organization=origin_org) + catalog = build_org_catalog(origin_org, datasets, dataservices) graph = catalog.graph @@ -69,6 +72,7 @@ def test_catalog(self): self.assertEqual(str(catalog.identifier), uri) self.assertEqual(len(list(catalog.objects(DCAT.dataset))), len(datasets)) + self.assertEqual(len(list(catalog.objects(DCAT.dataservice))), len(dataservices)) org = catalog.value(DCT.publisher) self.assertEqual(org.value(RDF.type).identifier, FOAF.Organization) @@ -80,6 +84,8 @@ def test_catalog(self): graph = catalog.graph graph_datasets = graph.subjects(RDF.type, DCAT.Dataset) self.assertEqual(len(list(graph_datasets)), len(datasets)) + graph_dataservices = graph.subjects(RDF.type, DCAT.DataService) + self.assertEqual(len(list(graph_dataservices)), len(dataservices)) def test_catalog_pagination(self): origin_org = OrganizationFactory() @@ -102,11 +108,23 @@ def test_catalog_pagination(self): page_size=page_size, _external=True, ) + # First create a dataset and it's associated dataservice, which should be listed + # last, and thus on the second page. + extra_dataset = DatasetFactory.create(organization=origin_org) + _extra_dataservice = DataserviceFactory.create( + datasets=[extra_dataset], organization=origin_org + ) + + # Create `total` datasets that should be listed on the first page up to `page_size` DatasetFactory.create_batch(total, organization=origin_org) + # And all the dataservices with no datasets, which will all be listed on the first page. + # See DataserviceQuerySet.filter_by_dataset_pagination. + DataserviceFactory.create_batch(total, organization=origin_org) # First page datasets = Dataset.objects.paginate(1, page_size) - catalog = build_org_catalog(origin_org, datasets, format="json") + dataservices = Dataservice.objects.filter_by_dataset_pagination(datasets, 1) + catalog = build_org_catalog(origin_org, datasets, dataservices, format="json") graph = catalog.graph self.assertIsInstance(catalog, RdfResource) @@ -115,9 +133,12 @@ def test_catalog_pagination(self): self.assertIn(DCAT.Catalog, types) self.assertIn(HYDRA.Collection, types) - self.assertEqual(catalog.value(HYDRA.totalItems), Literal(total)) + self.assertEqual(catalog.value(HYDRA.totalItems), Literal(total + 1)) self.assertEqual(len(list(catalog.objects(DCAT.dataset))), page_size) + # All dataservices that are not linked to a dataset are listed in the first page. + # See DataserviceQuerySet.filter_by_dataset_pagination. + self.assertEqual(len(list(catalog.objects(DCAT.dataservice))), total) paginations = list(graph.subjects(RDF.type, HYDRA.PartialCollectionView)) self.assertEqual(len(paginations), 1) @@ -130,7 +151,8 @@ def test_catalog_pagination(self): # Second page datasets = Dataset.objects.paginate(2, page_size) - catalog = build_org_catalog(origin_org, datasets, format="json") + dataservices = Dataservice.objects.filter_by_dataset_pagination(datasets, 2) + catalog = build_org_catalog(origin_org, datasets, dataservices, format="json") graph = catalog.graph self.assertIsInstance(catalog, RdfResource) @@ -139,9 +161,12 @@ def test_catalog_pagination(self): self.assertIn(DCAT.Catalog, types) self.assertIn(HYDRA.Collection, types) - self.assertEqual(catalog.value(HYDRA.totalItems), Literal(total)) + self.assertEqual(catalog.value(HYDRA.totalItems), Literal(total + 1)) - self.assertEqual(len(list(catalog.objects(DCAT.dataset))), 1) + # 5 datasets total, 3 on the first page, 2 on the second. + self.assertEqual(len(list(catalog.objects(DCAT.dataset))), 2) + # 1 extra_dataservice, listed on the same page as its associated extra_dataset. + self.assertEqual(len(list(catalog.objects(DCAT.dataservice))), 1) paginations = list(graph.subjects(RDF.type, HYDRA.PartialCollectionView)) self.assertEqual(len(paginations), 1) diff --git a/udata/tests/test_cors.py b/udata/tests/test_cors.py index 04410363a0..cdb0b11fd4 100644 --- a/udata/tests/test_cors.py +++ b/udata/tests/test_cors.py @@ -2,7 +2,8 @@ from flask import url_for -from udata.core.dataset.factories import DatasetFactory +from udata import assets +from udata.core.dataset.factories import DatasetFactory, ResourceFactory from udata.tests.api import APITestCase from udata.tests.helpers import assert_status @@ -10,6 +11,41 @@ class CorsTest(APITestCase): modules = [] + def test_cors_on_allowed_routes(self): + cors_headers = { + "Origin": "http://localhost", + "Access-Control-Request-Method": "GET", + } + + dataset = DatasetFactory(resources=[ResourceFactory()]) + + # API Swagger + response = self.get(url_for("api.specs"), headers=cors_headers) + assert_status(response, 200) + assert "Access-Control-Allow-Origin" in response.headers + + # API Dataset + response = self.get(url_for("api.dataset", dataset=dataset.id), headers=cors_headers) + assert_status(response, 200) + assert "Access-Control-Allow-Origin" in response.headers + + # Resource permalink + response = self.get(f"/fr/datasets/r/{dataset.resources[0].id}", headers=cors_headers) + assert_status(response, 404) # The route is defined in udata-front + assert "Access-Control-Allow-Origin" in response.headers + + # Oauth + response = self.get("/oauth/", headers=cors_headers) + assert_status(response, 404) # Oauth is defined in udata-front + assert "Access-Control-Allow-Origin" in response.headers + + # Static + response = self.get( + assets.cdn_for("static", filename="my_static.css"), headers=cors_headers + ) + assert_status(response, 404) # Not available in APITestCase + assert "Access-Control-Allow-Origin" in response.headers + def test_cors_redirects(self): dataset = DatasetFactory(title="Old title") old_slug = dataset.slug diff --git a/udata/tests/test_owned.py b/udata/tests/test_owned.py index 946d10fe5f..19b473c02e 100644 --- a/udata/tests/test_owned.py +++ b/udata/tests/test_owned.py @@ -3,14 +3,25 @@ import udata.core.owned as owned from udata.core.organization.factories import OrganizationFactory from udata.core.organization.models import Organization -from udata.core.user.factories import UserFactory +from udata.core.user.factories import AdminFactory, UserFactory from udata.core.user.models import User +from udata.models import Member from udata.mongo import db from udata.tests import DBTestMixin, TestCase +class CustomQuerySet(owned.OwnedQuerySet): + def visible(self): + return self(private__ne=True) + + class Owned(owned.Owned, db.Document): name = db.StringField() + private = db.BooleanField() + + meta = { + "queryset_class": CustomQuerySet, + } class OwnedPostSave(owned.Owned, db.Document): @@ -166,3 +177,91 @@ def test_owned_by_org_or_user(self): for owned_ in excluded: self.assertNotIn(owned_, result) + + def test_visible_by_user(self) -> None: + admin: User = AdminFactory() + user: User = UserFactory() + member = Member(user=user, role="editor") + other_user: User = UserFactory() + org: Organization = OrganizationFactory(members=[member]) + other_org: Organization = OrganizationFactory() + owned_by_user: Owned = Owned.objects.create(owner=user, name="owned_by_user") + owned_by_org: Owned = Owned.objects.create(organization=org, name="owned_by_org") + owned_by_other_user: Owned = Owned.objects.create( + owner=other_user, name="owned_by_other_user" + ) + owned_by_other_org: Owned = Owned.objects.create( + organization=other_org, name="owned_by_other_org" + ) + private_owned_by_user: Owned = Owned.objects.create( + owner=user, private=True, name="private_owned_by_user" + ) + private_owned_by_org: Owned = Owned.objects.create( + organization=org, private=True, name="private_owned_by_org" + ) + private_owned_by_other_user: Owned = Owned.objects.create( + owner=other_user, private=True, name="private_owned_by_other_user" + ) + private_owned_by_other_org: Owned = Owned.objects.create( + organization=other_org, private=True, name="private_owned_by_other_org" + ) + + visible_by_user: list[Owned] = [ + owned_by_user, + owned_by_org, + owned_by_other_user, + owned_by_other_org, + private_owned_by_user, + private_owned_by_org, + ] + visible_by_other_user: list[Owned] = [ + private_owned_by_other_user, + private_owned_by_other_org, + ] + + # Admin can view everything. + result: owned.OwnedQuerySet = Owned.objects.visible_by_user( + admin, Owned.objects.visible()._query_obj + ) + # 4 public + 1 private owned by user + 1 private owned by the user's org. + self.assertEqual(len(result), 8) + for owned_ in visible_by_user + visible_by_other_user: + self.assertIn(owned_, result) + + result = Owned.objects.visible_by_user(user, Owned.objects.visible()._query_obj) + # 4 public + 1 private owned by user + 1 private owned by the user's org. + self.assertEqual(len(result), 6) + for owned_ in visible_by_user: + self.assertIn(owned_, result) + + # `.visible_by_user` does not reset other queries. + result = Owned.objects(name="owned_by_user").visible_by_user( + user, Owned.objects.visible()._query_obj + ) + self.assertEqual(len(result), 1) + self.assertIn(owned_by_user, result) + result = Owned.objects.visible_by_user(user, Owned.objects.visible()._query_obj).filter( + name="owned_by_user" + ) + self.assertEqual(len(result), 1) + self.assertIn(owned_by_user, result) + + result = Owned.objects(name="private_owned_by_user").visible_by_user( + user, Owned.objects.visible()._query_obj + ) + self.assertEqual(len(result), 1) + self.assertIn(private_owned_by_user, result) + result = Owned.objects.visible_by_user(user, Owned.objects.visible()._query_obj).filter( + name="private_owned_by_user" + ) + self.assertEqual(len(result), 1) + self.assertIn(private_owned_by_user, result) + + result = Owned.objects(name="private_owned_by_other_user").visible_by_user( + user, Owned.objects.visible()._query_obj + ) + self.assertEqual(len(result), 0) + result = Owned.objects.visible_by_user(user, Owned.objects.visible()._query_obj).filter( + name="private_owned_by_other_user" + ) + self.assertEqual(len(result), 0) diff --git a/udata/utils.py b/udata/utils.py index 92a6bca751..b29b2c5cc7 100644 --- a/udata/utils.py +++ b/udata/utils.py @@ -18,6 +18,8 @@ from faker.providers.lorem.la import Provider as LoremProvider from flask import abort +from udata import tags + def get_by(lst, field, value): """Find an object in a list given a field value""" @@ -277,6 +279,21 @@ def is_uuid(uuid_string: str, version: int = 4) -> bool: faker = Faker("fr_FR") # Use a unicode/utf-8 based locale +def generate_tags(nb=3) -> [str]: + return [generate_tag() for _ in range(nb)] + + +def generate_tag() -> str: + fake_tag: str = faker.word() + while len(fake_tag) < tags.MIN_TAG_LENGTH: + fake_tag = faker.word() + return fake_tag + + +faker.tag = generate_tag +faker.tags = generate_tags + + def faker_provider(provider): faker.add_provider(provider) factory.Faker.add_provider(provider)