diff --git a/arches/app/datatypes/base.py b/arches/app/datatypes/base.py index bdc87d96cd8..30381ff8fe8 100644 --- a/arches/app/datatypes/base.py +++ b/arches/app/datatypes/base.py @@ -1,15 +1,20 @@ -import json, urllib +import json +import logging +import urllib + from django.urls import reverse +from django.utils.translation import gettext as _ + from arches.app.models import models -from arches.app.models.system_settings import settings from arches.app.search.elasticsearch_dsl_builder import Dsl, Bool, Terms, Exists, Nested -from django.utils.translation import gettext as _ -import logging logger = logging.getLogger(__name__) class BaseDataType(object): + rest_framework_model_field = None + """Django model field if the datatype were to be a real table column.""" + def __init__(self, model=None): self.datatype_model = model self.datatype_name = model.datatype if model else None @@ -336,7 +341,7 @@ def get_default_language_value_from_localized_node(self, tile, nodeid): """ return tile.data[str(nodeid)] - def post_tile_save(self, tile, nodeid, request): + def post_tile_save(self, tile, nodeid, request=None): """ Called after the tile is saved to the database @@ -532,3 +537,13 @@ def validate_node(self, node): a GraphValidationError """ pass + + def get_base_orm_lookup(self, node): + """This expression gets the tile data for a specific node. It can be + overridden to extract something more specific, especially where the + node value is JSON and only certain k/v pairs are useful to query. + """ + return f"data__{node.pk}" + + def to_python(self, tile_val): + return tile_val diff --git a/arches/app/datatypes/concept_types.py b/arches/app/datatypes/concept_types.py index 54ea077edd0..2bc2fc8b75c 100644 --- a/arches/app/datatypes/concept_types.py +++ b/arches/app/datatypes/concept_types.py @@ -2,11 +2,14 @@ import uuid import csv import logging + +from django.contrib.postgres.fields import ArrayField from django.core.exceptions import ObjectDoesNotExist +from django.db.models import fields from django.utils.translation import gettext as _ -from arches.app.models import models -from arches.app.models import concept from django.core.cache import cache + +from arches.app.models import models from arches.app.models.system_settings import settings from arches.app.datatypes.base import BaseDataType from arches.app.datatypes.datatypes import DataTypeFactory, get_value_from_jsonld @@ -32,7 +35,6 @@ from rdflib.namespace import RDF, RDFS, XSD, DC, DCTERMS, SKOS from arches.app.models.concept import ConceptValue from arches.app.models.concept import Concept -from io import StringIO archesproject = Namespace(settings.ARCHES_NAMESPACE_FOR_DATA_EXPORT) cidoc_nm = Namespace("http://www.cidoc-crm.org/cidoc-crm/") @@ -41,6 +43,8 @@ class BaseConceptDataType(BaseDataType): + rest_framework_model_field = fields.UUIDField(null=True) + def __init__(self, model=None): super(BaseConceptDataType, self).__init__(model=model) self.value_lookup = {} @@ -253,6 +257,8 @@ def validate( return errors def transform_value_for_tile(self, value, **kwargs): + if isinstance(value, uuid.UUID): + return str(value) try: stripped = value.strip() uuid.UUID(stripped) @@ -409,6 +415,8 @@ def ignore_keys(self): class ConceptListDataType(BaseConceptDataType): + rest_framework_model_field = ArrayField(base_field=fields.UUIDField(), null=True) + def validate( self, value, @@ -425,13 +433,20 @@ def validate( if value is not None: validate_concept = DataTypeFactory().get_instance("concept") for v in value: - val = v.strip() + if isinstance(v, uuid.UUID): + val = str(v) + else: + val = v.strip() errors += validate_concept.validate(val, row_number) return errors def transform_value_for_tile(self, value, **kwargs): ret = [] - for val in csv.reader([value], delimiter=",", quotechar='"'): + if not isinstance(value, list): + value = [value] + if all(isinstance(inner, uuid.UUID) for inner in value): + return [str(inner) for inner in value] + for val in csv.reader(value, delimiter=",", quotechar='"'): lines = [line for line in val] for v in lines: try: diff --git a/arches/app/datatypes/core/non_localized_string.py b/arches/app/datatypes/core/non_localized_string.py index d77e209a6a2..eb5350f1be3 100644 --- a/arches/app/datatypes/core/non_localized_string.py +++ b/arches/app/datatypes/core/non_localized_string.py @@ -1,10 +1,11 @@ +from django.conf import settings +from django.db.models import fields from django.utils.translation import gettext as _ from rdflib import URIRef, Literal, ConjunctiveGraph as Graph from rdflib.namespace import RDF from arches.app.datatypes.base import BaseDataType from arches.app.datatypes.core.util import get_value_from_jsonld -from django.conf import settings from arches.app.search.elasticsearch_dsl_builder import ( Bool, Exists, @@ -18,6 +19,8 @@ class NonLocalizedStringDataType(BaseDataType): + rest_framework_model_field = fields.CharField(null=True) + def validate( self, value, diff --git a/arches/app/datatypes/datatypes.py b/arches/app/datatypes/datatypes.py index b658af5329e..f39fdbe57db 100644 --- a/arches/app/datatypes/datatypes.py +++ b/arches/app/datatypes/datatypes.py @@ -1,4 +1,5 @@ import copy +import itertools import uuid import json import decimal @@ -15,9 +16,6 @@ from datetime import datetime from mimetypes import MimeTypes -from django.core.files.images import get_image_dimensions -from django.db.models import fields - from arches.app.const import ExtensionType from arches.app.datatypes.base import BaseDataType from arches.app.models import models @@ -47,18 +45,18 @@ from arches.app.search.search_engine_factory import SearchEngineInstance as se from arches.app.search.search_term import SearchTerm from arches.app.search.mappings import RESOURCES_INDEX + +from django.contrib.postgres.fields import ArrayField from django.core.cache import cache from django.core.files import File -from django.core.files.base import ContentFile -from django.core.files.storage import FileSystemStorage, default_storage +from django.core.files.images import get_image_dimensions +from django.core.files.storage import default_storage from django.core.exceptions import ObjectDoesNotExist -from django.core.exceptions import ValidationError -from django.db import connection, transaction +from django.db import connection +from django.db.models import fields +from django.db.models.fields.json import JSONField from django.utils.translation import get_language, gettext as _ -from elasticsearch import Elasticsearch -from elasticsearch.exceptions import NotFoundError - # One benefit of shifting to python3.x would be to use # importlib.util.LazyLoader to load rdflib (and other lesser # used but memory soaking libs) @@ -118,6 +116,8 @@ def get_instance(self, datatype): class StringDataType(BaseDataType): + rest_framework_model_field = JSONField(null=True) + def validate( self, value, @@ -458,6 +458,8 @@ def pre_structure_tile_data(self, tile, nodeid, **kwargs): class NumberDataType(BaseDataType): + rest_framework_model_field = fields.FloatField(null=True) + def validate( self, value, @@ -574,6 +576,8 @@ def default_es_mapping(self): class BooleanDataType(BaseDataType): + rest_framework_model_field = fields.BooleanField(null=True) + def validate( self, value, @@ -675,6 +679,8 @@ def default_es_mapping(self): class DateDataType(BaseDataType): + rest_framework_model_field = fields.DateField(null=True) + def validate( self, value, @@ -886,6 +892,8 @@ def get_display_value(self, tile, node, **kwargs): class EDTFDataType(BaseDataType): + rest_framework_model_field = fields.CharField(null=True) + def transform_value_for_tile(self, value, **kwargs): transformed_value = ExtendedDateFormat(value) if transformed_value.edtf is None: @@ -893,6 +901,8 @@ def transform_value_for_tile(self, value, **kwargs): return str(transformed_value.edtf) def pre_tile_save(self, tile, nodeid): + # TODO: This is likely to be duplicative once we clean this up: + # https://github.com/archesproject/arches/issues/10851#issuecomment-2427305853 tile.data[nodeid] = self.transform_value_for_tile(tile.data[nodeid]) def validate( @@ -1057,6 +1067,8 @@ def default_es_mapping(self): class FileListDataType(BaseDataType): + rest_framework_model_field = ArrayField(base_field=fields.CharField(), null=True) + def __init__(self, model=None): super(FileListDataType, self).__init__(model=model) self.node_lookup = {} @@ -1258,7 +1270,7 @@ def to_json(self, tile, node): if data: return self.compile_json(tile, node, file_details=data[str(node.nodeid)]) - def post_tile_save(self, tile, nodeid, request): + def post_tile_save(self, tile, nodeid, request=None): if request is not None: # this does not get called when saving data from the mobile app previously_saved_tile = models.TileModel.objects.filter(pk=tile.tileid) @@ -2013,6 +2025,8 @@ class ResourceInstanceDataType(BaseDataType): """ + rest_framework_model_field = fields.UUIDField(null=True) + def validate( self, value, @@ -2060,14 +2074,14 @@ def validate( raise ObjectDoesNotExist() except ObjectDoesNotExist: message = _( - "The related resource with id '{0}' is not in the system.".format( + "The related resource with id '{0}' is not in the system".format( resourceid ) ) errors.append({"type": "ERROR", "message": message}) except (ValueError, TypeError): message = _( - "The related resource with id '{0}' is not a valid uuid.".format( + "The related resource with id '{0}' is not a valid uuid".format( str(value) ) ) @@ -2090,7 +2104,7 @@ def pre_tile_save(self, tile, nodeid): for relationship in relationships: relationship["resourceXresourceId"] = str(uuid.uuid4()) - def post_tile_save(self, tile, nodeid, request): + def post_tile_save(self, tile, nodeid, request=None): ret = False sql = """ SELECT * FROM __arches_create_resource_x_resource_relationships('%s') as t; @@ -2217,6 +2231,14 @@ def get_search_terms(self, nodevalue, nodeid=None): return terms def transform_value_for_tile(self, value, **kwargs): + def from_id_string(uuid_string): + nonlocal kwargs + return { + "resourceId": uuid_string, + "inverseOntology": kwargs.get("inverseOntology", ""), + "inverseOntologyProperty": kwargs.get("inverseOntologyProperty", ""), + } + try: return json.loads(value) except ValueError: @@ -2228,7 +2250,16 @@ def transform_value_for_tile(self, value, **kwargs): except TypeError: # data should come in as json but python list is accepted as well if isinstance(value, list): - return value + if all(isinstance(inner, models.ResourceInstance) for inner in value): + return [from_id_string(str(instance.pk)) for instance in value] + elif all(isinstance(inner, uuid.UUID) for inner in value): + return [from_id_string(str(uid)) for uid in value] + elif all(isinstance(inner, str) for inner in value): + return [from_id_string(uid) for uid in value] + else: + return value + if isinstance(value, models.ResourceInstance): + return [from_id_string(str(value.pk))] def transform_export_values(self, value, *args, **kwargs): return json.dumps(value) @@ -2354,8 +2385,23 @@ def default_es_mapping(self): } return mapping + def _get_base_orm_lookup(self, node): + """Filter down to the resourceId.""" + return f"data__{node.pk}__0__resourceId" + + def values_match(self, value1, value2): + if not isinstance(value1, list) or not isinstance(value2, list): + return value1 == value2 + copy1 = [{**inner_val} for inner_val in value1] + copy2 = [{**inner_val} for inner_val in value2] + for inner_val in itertools.chain(copy1, copy2): + inner_val.pop("resourceXresourceId", None) + return copy1 == copy2 + class ResourceInstanceListDataType(ResourceInstanceDataType): + rest_framework_model_field = ArrayField(base_field=fields.UUIDField(), null=True) + def to_json(self, tile, node): from arches.app.models.resource import ( Resource, @@ -2383,6 +2429,18 @@ def to_json(self, tile, node): def collects_multiple_values(self): return True + def _get_base_orm_lookup(self, node): + """Undo the override in ResourceInstanceDataType. TODO: write a better lookup. + Currently the unpacking into UUID[] is done in to_python(), but this isn't + useful for querying.""" + return f"data__{node.pk}" + + def to_python(self, tile_val): + if tile_val is None: + return tile_val + resource_ids = [inner["resourceId"] if inner else None for inner in tile_val] + return resource_ids + class NodeValueDataType(BaseDataType): def validate( diff --git a/arches/app/datatypes/url.py b/arches/app/datatypes/url.py index b027d88af2a..71936d287ec 100644 --- a/arches/app/datatypes/url.py +++ b/arches/app/datatypes/url.py @@ -27,6 +27,7 @@ from rdflib import ConjunctiveGraph as Graph from rdflib import URIRef, Literal, Namespace from rdflib.namespace import RDF, RDFS, XSD, DC, DCTERMS +from django.db.models import fields from django.utils.translation import gettext as _ archesproject = Namespace(settings.ARCHES_NAMESPACE_FOR_DATA_EXPORT) @@ -70,6 +71,8 @@ class URLDataType(BaseDataType): URL Datatype to store an optionally labelled hyperlink to a (typically) external resource """ + rest_framework_model_field = fields.URLField(null=True) + URL_REGEX = re.compile( r"https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)" ) diff --git a/arches/app/models/functions.py b/arches/app/models/functions.py new file mode 100644 index 00000000000..0b9444eb613 --- /dev/null +++ b/arches/app/models/functions.py @@ -0,0 +1,7 @@ +from django.db import models + + +class UUID4(models.Func): + function = "uuid_generate_v4" + arity = 0 + output_field = models.UUIDField() diff --git a/arches/app/models/migrations/11043_tile_nodegroup_add_related_names.py b/arches/app/models/migrations/11043_tile_nodegroup_add_related_names.py new file mode 100644 index 00000000000..c55144cc4d5 --- /dev/null +++ b/arches/app/models/migrations/11043_tile_nodegroup_add_related_names.py @@ -0,0 +1,40 @@ +# Generated by Django 5.1.2 on 2024-10-25 12:44 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("models", "11042_update__arches_staging_to_tile"), + ] + + operations = [ + migrations.AlterField( + model_name="nodegroup", + name="parentnodegroup", + field=models.ForeignKey( + blank=True, + db_column="parentnodegroupid", + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="children", + related_query_name="child", + to="models.nodegroup", + ), + ), + migrations.AlterField( + model_name="tilemodel", + name="parenttile", + field=models.ForeignKey( + blank=True, + db_column="parenttileid", + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="children", + related_query_name="child", + to="models.tilemodel", + ), + ), + ] diff --git a/arches/app/models/migrations/11044_make_further_fields_blank.py b/arches/app/models/migrations/11044_make_further_fields_blank.py new file mode 100644 index 00000000000..054022cca47 --- /dev/null +++ b/arches/app/models/migrations/11044_make_further_fields_blank.py @@ -0,0 +1,23 @@ +# Generated by Django 5.1.2 on 2024-10-29 11:46 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("models", "11043_tile_nodegroup_add_related_names"), + ] + + operations = [ + migrations.AlterField( + model_name="resourceinstance", + name="resourceinstanceid", + field=models.UUIDField(blank=True, primary_key=True, serialize=False), + ), + migrations.AlterField( + model_name="tilemodel", + name="tileid", + field=models.UUIDField(blank=True, primary_key=True, serialize=False), + ), + ] diff --git a/arches/app/models/migrations/11045_generate_resource_ids.py b/arches/app/models/migrations/11045_generate_resource_ids.py new file mode 100644 index 00000000000..0a98b2e44f7 --- /dev/null +++ b/arches/app/models/migrations/11045_generate_resource_ids.py @@ -0,0 +1,24 @@ +# Generated by Django 5.1.3 on 2024-11-06 12:10 + +import arches.app.models.functions +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("models", "11044_make_further_fields_blank"), + ] + + operations = [ + migrations.AlterField( + model_name="resourceinstance", + name="resourceinstanceid", + field=models.UUIDField( + blank=True, + db_default=arches.app.models.functions.UUID4(), + primary_key=True, + serialize=False, + ), + ), + ] diff --git a/arches/app/models/models.py b/arches/app/models/models.py index e88b74266f3..33841e68921 100644 --- a/arches/app/models/models.py +++ b/arches/app/models/models.py @@ -1,45 +1,40 @@ -# This is an auto-generated Django model module. -# You'll have to do the following manually to clean this up: -# * Rearrange models' order -# * Make sure each model has one field with primary_key=True -# * Remove `managed = False` lines if you wish to allow Django to create, modify, and delete the table -# Feel free to rename the models, but don't rename db_table values or field names. -# -# Also note: You'll have to insert the output of 'django-admin sqlcustom [app_label]' -# into your database. - - import sys +import itertools import json import uuid import datetime import logging import traceback -import django.utils.timezone +from collections import defaultdict +from itertools import zip_longest +from operator import attrgetter, itemgetter from arches.app.const import ExtensionType from arches.app.utils.module_importer import get_class_from_modulename from arches.app.utils.thumbnail_factory import ThumbnailGeneratorInstance from arches.app.models.fields.i18n import I18n_TextField, I18n_JSONField -from arches.app.models.utils import add_to_update_fields +from arches.app.models.functions import UUID4 +from arches.app.models.querysets import ResourceInstanceQuerySet, TileQuerySet +from arches.app.models.utils import ( + add_to_update_fields, + field_names, + pop_arches_model_kwargs, +) from arches.app.utils.betterJSONSerializer import JSONSerializer from arches.app.utils import import_class_from_string from django.contrib.auth.models import Group, User from django.contrib.gis.db import models from django.core.exceptions import ObjectDoesNotExist from django.db import connection -from django.db.models import JSONField from django.core.exceptions import ValidationError from django.core.serializers.json import DjangoJSONEncoder from django.core.validators import RegexValidator, validate_slug +from django.db import transaction from django.db.models import JSONField, Max, Q +from django.db.models import Value as ORMValue from django.db.models.constraints import UniqueConstraint -from django.utils import translation +from django.utils import timezone, translation from django.utils.translation import gettext_lazy as _ -from django.contrib.auth.models import User -from django.contrib.auth.models import Group -from django.core.validators import validate_slug -from django.core.exceptions import ValidationError # can't use "arches.app.models.system_settings.SystemSettings" because of circular refernce issue # so make sure the only settings we use in this file are ones that are static (fixed at run time) @@ -715,6 +710,8 @@ class NodeGroup(models.Model): blank=True, null=True, on_delete=models.CASCADE, + related_name="children", + related_query_name="child", ) # Allows nodegroups within nodegroups def __init__(self, *args, **kwargs): @@ -1200,7 +1197,9 @@ class Meta: class ResourceInstance(models.Model): - resourceinstanceid = models.UUIDField(primary_key=True) + resourceinstanceid = models.UUIDField( + primary_key=True, blank=True, db_default=UUID4() + ) graph = models.ForeignKey(GraphModel, db_column="graphid", on_delete=models.CASCADE) graph_publication = models.ForeignKey( GraphXPublishedGraph, @@ -1217,15 +1216,6 @@ class ResourceInstance(models.Model): to="models.ResourceInstanceLifecycleState", related_name="resource_instances", ) - - def get_initial_resource_instance_lifecycle_state(self, *args, **kwargs): - try: - return ( - self.graph.resource_instance_lifecycle.get_initial_resource_instance_lifecycle_state() - ) - except (ObjectDoesNotExist, AttributeError): - return None - # This could be used as a lock, but primarily addresses the issue that a creating user # may not yet match the criteria to edit a ResourceInstance (via Set/LogicalSet) simply # because the details may not yet be complete. Only one user can create, as it is an @@ -1237,6 +1227,44 @@ def get_initial_resource_instance_lifecycle_state(self, *args, **kwargs): User, on_delete=models.SET_NULL, blank=True, null=True ) + objects = ResourceInstanceQuerySet.as_manager() + + class Meta: + managed = True + db_table = "resource_instances" + permissions = (("no_access_to_resourceinstance", "No Access"),) + + def __init__(self, *args, **kwargs): + arches_model_kwargs, other_kwargs = pop_arches_model_kwargs( + kwargs, self._meta.get_fields() + ) + super().__init__(*args, **other_kwargs) + + for kwarg, value in arches_model_kwargs.items(): + setattr(self, kwarg, value) + + def __repr__(self): + return f"<{self.graph.name}: {self.name} ({self.pk})>" + + def __str__(self): + return repr(self) + + @classmethod + def as_model(cls, graph_slug=None, *, resource_ids=None, defer=None, only=None): + """Return a chainable QuerySet for a requested graph's instances, + with tile data annotated onto node and nodegroup aliases.""" + return cls.objects.with_nodegroups( + graph_slug, resource_ids=resource_ids, defer=defer, only=only + ) + + def get_initial_resource_instance_lifecycle_state(self, *args, **kwargs): + try: + return ( + self.graph.resource_instance_lifecycle.get_initial_resource_instance_lifecycle_state() + ) + except (ObjectDoesNotExist, AttributeError): + return None + def get_instance_creator_and_edit_permissions(self, user=None): creatorid = None can_edit = None @@ -1264,30 +1292,338 @@ def get_instance_creator(self) -> int: return creatorid - def save(self, *args, **kwargs): + def save_edit(self, user=None): + """Intended to replace proxy model method eventually.""" + from arches.app.models.resource import Resource + + edit_type = "update" + if self._state.adding: + edit_type = "create" + else: + return # TODO: should we save an edit log update? + + # Until save_edit() is a static method, work around it. + ephemeral_proxy_instance = Resource() + ephemeral_proxy_instance.graphid = self.graph_id + ephemeral_proxy_instance.resourceinstanceid = str(self.pk) + ephemeral_proxy_instance.save_edit(user=user, edit_type=edit_type) + + def save(self, index=False, user=None, **kwargs): try: self.graph_publication = self.graph.publication except ResourceInstance.graph.RelatedObjectDoesNotExist: pass + else: + add_to_update_fields(kwargs, "graph_publication") + + if not self.principaluser_id and user: + self.principaluser = user + add_to_update_fields(kwargs, "principaluser") if not hasattr(self, "resource_instance_lifecycle_state"): self.resource_instance_lifecycle_state = ( self.get_initial_resource_instance_lifecycle_state() ) + add_to_update_fields(kwargs, "resource_instance_lifecycle_state") - add_to_update_fields(kwargs, "resource_instance_lifecycle_state") - add_to_update_fields(kwargs, "graph_publication") - super(ResourceInstance, self).save(*args, **kwargs) + if getattr(self, "_fetched_root_nodes", False): + self._save_tiles_for_pythonic_model(user=user, index=index, **kwargs) + self.save_edit(user=user) + else: + super().save(**kwargs) - def __init__(self, *args, **kwargs): - super(ResourceInstance, self).__init__(*args, **kwargs) - if not self.resourceinstanceid: - self.resourceinstanceid = uuid.uuid4() + def clean(self): + """Raises a compound ValidationError with any failing tile values.""" + if getattr(self, "_fetched_root_nodes", False): + self._update_tiles_from_pythonic_model_values() + + def _save_tiles_for_pythonic_model(self, user=None, index=False, **kwargs): + """Raises a compound ValidationError with any failing tile values. + + It's not exactly idiomatic for a Django project to clean() + values during a save(), but we can't easily express this logic + in a "pure" DRF field validator, because: + - the node values are phantom fields. + - we have other entry points besides DRF. + """ + from arches.app.datatypes.datatypes import DataTypeFactory + from arches.app.models.resource import Resource + from arches.app.models.tile import Tile - class Meta: - managed = True - db_table = "resource_instances" - permissions = (("no_access_to_resourceinstance", "No Access"),) + datatype_factory = DataTypeFactory() + to_insert, to_update, to_delete = ( + self._update_tiles_from_pythonic_model_values() + ) + + # Instantiate proxy models for now, but TODO: expose this + # functionality on vanilla models, and in bulk. + upserts = to_insert | to_update + upsert_proxies = Tile.objects.filter(pk__in=[tile.pk for tile in upserts]) + delete_proxies = Tile.objects.filter(pk__in=[tile.pk for tile in to_delete]) + + with transaction.atomic(): + # Interact with the database in bulk as much as possible, but + # run certain side effects from Tile.save() one-at-a-time until + # proxy model methods can be refactored. Then run in bulk. + for upsert_proxy, vanilla_instance in zip( + upsert_proxies, upserts, strict=True + ): + upsert_proxy._existing_data = upsert_proxy.data + upsert_proxy._existing_provisionaledits = upsert_proxy.provisionaledits + + # Sync proxy instance fields. + for field in field_names(vanilla_instance): + setattr(upsert_proxy, field, getattr(vanilla_instance, field)) + + # Run tile lifecycle updates on proxy instance. + upsert_proxy._Tile__preSave() + upsert_proxy.check_for_missing_nodes() + upsert_proxy.check_for_constraint_violation() + ( + oldprovisionalvalue, + newprovisionalvalue, + provisional_edit_log_details, + ) = vanilla_instance._apply_provisional_edit( + upsert_proxy, + self._existing_data, + self._existing_provisionaledits, + user=user, + ) + # Remember the values needed for the edit log updates later. + upsert_proxy._oldprovisionalvalue = oldprovisionalvalue + upsert_proxy._newprovisionalvalue = newprovisionalvalue + upsert_proxy._provisional_edit_log_details = ( + provisional_edit_log_details + ) + upsert_proxy._existing_data = self.data + + for upsert_proxy in delete_proxies: + upsert_proxy._Tile__preDelete() + + insert_proxies = TileModel.objects.none() + if to_insert: + inserted = TileModel.objects.bulk_create(to_insert) + # Pay the cost of a second TileModel -> Tile transform until refactored. + update_proxies = upsert_proxies.difference(insert_proxies) + insert_proxies = Tile.objects.filter(pk__in=[t.pk for t in inserted]) + upsert_proxies = update_proxies | insert_proxies + if to_update: + TileModel.objects.bulk_update( + to_update, {"data", "parenttile", "provisionaledits"} + ) + if to_delete: + TileModel.objects.filter(pk__in=[t.pk for t in to_delete]).delete() + + super().save(**kwargs) + + for upsert_tile in upserts: + for root_node in self._fetched_root_nodes: + if upsert_tile.nodegroup_id == root_node.nodegroup_id: + for node in root_node.nodegroup.node_set.all(): + datatype = datatype_factory.get_instance(node.datatype) + datatype.post_tile_save(upsert_tile, str(node.pk)) + break + + for upsert_proxy in upsert_proxies: + upsert_proxy._Tile__postSave() + + # Save edits: could be done in bulk once above side effects are un-proxied. + for upsert_proxy in upsert_proxies: + if self._state.adding: + upsert_proxy.save_edit( + user=user, + edit_type="tile create", + old_value={}, + new_value=self.data, + newprovisionalvalue=upsert_proxy._newprovisionalvalue, + provisional_edit_log_details=upsert_proxy._provisional_edit_log_details, + transaction_id=None, + # TODO: get this information upstream somewhere. + new_resource_created=False, + note=None, + ) + else: + upsert_proxy.save_edit( + user=user, + edit_type="tile edit", + old_value=upsert_proxy._existing_data, + new_value=self.data, + newprovisionalvalue=upsert_proxy._newprovisionalvalue, + oldprovisionalvalue=upsert_proxy._oldprovisionalvalue, + provisional_edit_log_details=upsert_proxy._provisional_edit_log_details, + transaction_id=None, + ) + + # Instantiate proxy model for now, but refactor & expose this on vanilla model + proxy_resource = Resource.objects.get(pk=self.pk) + proxy_resource.save_descriptors() + if index: + proxy_resource.index() + + def _update_tiles_from_pythonic_model_values(self): + """Move values from model instance to prefetched tiles, and validate. + Raises ValidationError if new data fails datatype validation. + """ + # TODO: put all this state in a helper dataclass to ease passing it around. + errors_by_node_alias = defaultdict(list) + to_insert = set() + to_update = set() + to_delete = set() + + original_tile_data_by_tile_id = {} + for root_node in self._fetched_root_nodes: + self._update_tile_for_root_node( + root_node, + original_tile_data_by_tile_id, + to_insert, + to_update, + to_delete, + errors_by_node_alias, + ) + + if errors_by_node_alias: + del self._annotated_tiles + raise ValidationError( + { + alias: ValidationError([e["message"] for e in errors]) + for alias, errors in errors_by_node_alias.items() + } + ) + + return to_insert, to_update, to_delete + + def _update_tile_for_root_node( + self, + root_node, + original_tile_data_by_tile_id, + to_insert, + to_update, + to_delete, + errors_by_node_alias, + ): + NOT_PROVIDED = object() + + new_tiles = getattr(self, root_node.alias, NOT_PROVIDED) + if new_tiles is NOT_PROVIDED: + return + if root_node.nodegroup.cardinality == "1": + if new_tiles is None: + new_tiles = [] + else: + new_tiles = [new_tiles] + if all(isinstance(tile, TileModel) for tile in new_tiles): + new_tiles.sort(key=attrgetter("sortorder")) + else: + # TODO: figure out best layer for this and remove if/else. + # TODO: nullguard or make not nullable. + new_tiles.sort(key=itemgetter("sortorder")) + db_tiles = [ + t for t in self._annotated_tiles if t.nodegroup_alias == root_node.alias + ] + for db_tile, new_tile in zip_longest( + db_tiles, new_tiles, fillvalue=NOT_PROVIDED + ): + if new_tile is NOT_PROVIDED: + to_delete.add(db_tile) + continue + if db_tile is NOT_PROVIDED: + new_tile_obj = TileModel.get_blank_tile_from_nodegroup( + nodegroup=root_node.nodegroup, + resourceid=self.pk, + # TODO: ensure this deserializes correctly. + parenttile=getattr(new_tile, "parenttile", None), + ) + new_tile_obj._nodegroup_alias = root_node.alias + if db_tiles: + new_tile_obj.sortorder = max(t.sortorder or 0 for t in db_tiles) + 1 + new_tile_obj._incoming_tile = new_tile + to_insert.add(new_tile_obj) + else: + original_tile_data_by_tile_id[db_tile.pk] = {**db_tile.data} + db_tile._incoming_tile = new_tile + to_update.add(db_tile) + + upserts = to_insert | to_update + for tile in upserts: + self._validate_and_patch_from_tile_values( + tile, root_node, errors_by_node_alias + ) + # Remove blank tiles. + # TODO: also check for unsaved children? + if not any(tile.data.values()) and not tile.children.count(): + if tile._state.adding: + to_insert.remove(tile) + else: + to_update.remove(tile) + to_delete.add(tile) + if ( + original_data := original_tile_data_by_tile_id.pop(tile.pk, None) + ) and tile._tile_update_is_noop(original_data): + to_update.remove(tile) + + @staticmethod + def _validate_and_patch_from_tile_values(tile, *, root_node, errors_by_node_alias): + """Validate data found on ._incoming_tile and move it to .data. + Update errors_by_node_alias in place.""" + from arches.app.datatypes.datatypes import DataTypeFactory + + NOT_PROVIDED = object() + datatype_factory = DataTypeFactory() + for node in root_node.nodegroup.node_set.all(): + node_id_str = str(node.pk) + # TODO: remove this switch and deserialize this in DRF. + if isinstance(tile._incoming_tile, TileModel): + value_to_validate = getattr( + tile._incoming_tile, node.alias, NOT_PROVIDED + ) + else: + value_to_validate = tile._incoming_tile.get(node.alias, NOT_PROVIDED) + if value_to_validate is NOT_PROVIDED: + continue + datatype_instance = datatype_factory.get_instance(node.datatype) + # TODO: move this to Tile.full_clean()? + # https://github.com/archesproject/arches/issues/10851#issuecomment-2427305853 + if value_to_validate is None: + tile.data[node_id_str] = None + continue + try: + transformed = datatype_instance.transform_value_for_tile( + value_to_validate, **node.config + ) + except ValueError: # BooleanDataType raises. + # validate() will handle. + transformed = value_to_validate + + # Patch the transformed data into the tile.data. + tile.data[node_id_str] = transformed + + datatype_instance.clean(tile, node_id_str) + + if errors := datatype_instance.validate(transformed, node=node): + errors_by_node_alias[node.alias].extend(errors) + + try: + datatype_instance.pre_tile_save(tile, node_id_str) + except TypeError: # GeoJSONDataType raises. + errors_by_node_alias[node.alias].append( + datatype_instance.create_error_message( + tile.data[node_id_str], None, None, None + ) + ) + + def refresh_from_db(self, using=None, fields=None, from_queryset=None): + if not from_queryset and ( + root_nodes := getattr(self, "_fetched_root_nodes", set()) + ): + aliases = [n.alias for n in root_nodes] + from_queryset = self.__class__.as_model(self.graph.slug, only=aliases) + super().refresh_from_db(using, fields, from_queryset) + # Copy over annotations and annotated tiles. + refreshed_resource = from_queryset[0] + for field in itertools.chain(aliases, ["_annotated_tiles"]): + setattr(self, field, getattr(refreshed_resource, field)) + else: + super().refresh_from_db(using, fields, from_queryset) class ResourceInstanceLifecycle(models.Model): @@ -1527,7 +1863,7 @@ class TileModel(models.Model): # Tile """ - tileid = models.UUIDField(primary_key=True) + tileid = models.UUIDField(primary_key=True, blank=True) resourceinstance = models.ForeignKey( ResourceInstance, db_column="resourceinstanceid", on_delete=models.CASCADE ) @@ -1537,29 +1873,109 @@ class TileModel(models.Model): # Tile blank=True, null=True, on_delete=models.CASCADE, + related_name="children", + related_query_name="child", ) data = JSONField(blank=True, null=True, db_column="tiledata") nodegroup_id = models.UUIDField(db_column="nodegroupid", null=True) sortorder = models.IntegerField(blank=True, null=True, default=0) provisionaledits = JSONField(blank=True, null=True, db_column="provisionaledits") + objects = TileQuerySet.as_manager() + class Meta: managed = True db_table = "tiles" def __init__(self, *args, **kwargs): - super(TileModel, self).__init__(*args, **kwargs) + arches_model_kwargs, other_kwargs = pop_arches_model_kwargs( + kwargs, self._meta.get_fields() + ) + super().__init__(*args, **other_kwargs) + + for kwarg, value in arches_model_kwargs.items(): + setattr(self, kwarg, value) if not self.tileid: self.tileid = uuid.uuid4() + def __repr__(self): + return f"<{self.nodegroup_alias} ({self.pk})>" + + def __str__(self): + return repr(self) + @property def nodegroup(self): return NodeGroup.objects.filter(pk=self.nodegroup_id).first() + @property + def nodegroup_alias(self): + if nodegroup_alias := getattr(self, "_nodegroup_alias", None): + return nodegroup_alias + if root_node := Node.objects.filter(pk=self.nodegroup_id).first(): + self._nodegroup_alias = root_node.alias + return root_node.alias + return None + + @classmethod + def as_nodegroup(cls, root_node_alias, *, graph_slug, defer=None, only=None): + """ + Entry point for filtering arches data by nodegroups (instead of grouping by + resource.) + + >>> statements = TileModel.as_nodegroup("statement", graph_slug="concept") + >>> results = statements.filter(statement_content__en__value__startswith="F") # TODO: make more ergonomic + >>> for result in results: + print(result.resourceinstance) + print("\t", result.statement_content["en"]["value"]) # TODO: unwrap? + + + Fluorescence stimulated by x-rays; ... + + Fine-quality calf or lamb parchment ... + """ + + root_node = cls._root_node(graph_slug, root_node_alias) + + def accumulate_nodes_below(nodegroup, acc): + acc.extend(list(nodegroup.node_set.all())) + for child_nodegroup in nodegroup.children.all(): + accumulate_nodes_below(child_nodegroup, acc) + + branch_nodes = [] + accumulate_nodes_below(root_node.nodegroup, acc=branch_nodes) + + return ( + cls.objects.filter(nodegroup_id=root_node.pk) + .with_node_values( + branch_nodes, defer=defer, only=only, lhs="pk", outer_ref="tileid" + ) + .annotate(_nodegroup_alias=ORMValue(root_node_alias)) + ) + + @staticmethod + def _root_node(graph_slug, root_node_alias): + from arches.app.models.models import Node + + qs = ( + Node.objects.filter(graph__slug=graph_slug, alias=root_node_alias) + .select_related("nodegroup") + .prefetch_related("nodegroup__node_set") + # Prefetching to a depth of 2 seems like a good trade-off for now. + .prefetch_related("nodegroup__children") + .prefetch_related("nodegroup__children__children") + ) + # TODO: get last + # https://github.com/archesproject/arches/issues/11565 + ret = qs.filter(source_identifier=None).first() + if ret is None: + raise Node.DoesNotExist(f"graph: {graph_slug} node: {root_node_alias}") + return ret + def is_fully_provisional(self): return bool(self.provisionaledits and not any(self.data.values())) - def save(self, *args, **kwargs): + def save(self, index=False, user=None, **kwargs): if self.sortorder is None or self.is_fully_provisional(): for node in Node.objects.filter(nodegroup_id=self.nodegroup_id).exclude( datatype="semantic" @@ -1576,13 +1992,226 @@ def save(self, *args, **kwargs): if not self.tileid: self.tileid = uuid.uuid4() add_to_update_fields(kwargs, "tileid") - super(TileModel, self).save(*args, **kwargs) # Call the "real" save() method. - def serialize(self, fields=None, exclude=["nodegroup"], **kwargs): + if getattr(self, "_fetched_root_nodes", False): + self._save_from_pythonic_model_values(user=user, index=index, **kwargs) + else: + super().save(**kwargs) + + def _save_from_pythonic_model_values(self, *, user=None, index=False, **kwargs): + from arches.app.datatypes.datatypes import DataTypeFactory + from arches.app.models.resource import Resource + from arches.app.models.tile import Tile + + should_save = self._update_tile_from_pythonic_model_values() + if not should_save: + return + + # Instantiate a proxy model and sync data to it, to run all side effects. + # Explanation: this is basically Tile.save() but with the serialized + # graph and tile fetching skipped. Hence why we might + # TODO: expose on vanilla model. + proxy = Tile.objects.get(pk=self.pk) + # TODO: handle create. + # Capture these to avoid re-querying in _apply_provisional_edit(). + existing_data = proxy.data + existing_provisional_edits = proxy.provisionaledits + for field in field_names(self): + setattr(proxy, field, getattr(self, field)) + + datatype_factory = DataTypeFactory() + with transaction.atomic(): + proxy._Tile__preSave() + proxy.check_for_missing_nodes() + proxy.check_for_constraint_violation() + oldprovisionalvalue, newprovisionalvalue, provisional_edit_log_details = ( + self._apply_provisional_edit( + proxy, existing_data, existing_provisional_edits, user=user + ) + ) + + super().save(**kwargs) + + for node in self._root_node.nodegroup.node_set.all(): + datatype = datatype_factory.get_instance(node.datatype) + datatype.post_tile_save(self, str(node.pk)) + proxy._Tile__postSave() + + if self._state.adding: + proxy.save_edit( + user=user, + edit_type="tile create", + old_value={}, + new_value=self.data, + newprovisionalvalue=newprovisionalvalue, + provisional_edit_log_details=provisional_edit_log_details, + transaction_id=None, + # TODO: get this information upstream somewhere. + new_resource_created=False, + note=None, + ) + else: + proxy.save_edit( + user=user, + edit_type="tile edit", + old_value=existing_data, + new_value=self.data, + newprovisionalvalue=newprovisionalvalue, + oldprovisionalvalue=oldprovisionalvalue, + provisional_edit_log_details=provisional_edit_log_details, + transaction_id=None, + ) + + # TODO: add unique constraint for TileModel re: sortorder + # TODO: determine whether this should be skippable, and how. + self.refresh_from_db( + using=kwargs.get("using", None), + fields=kwargs.get("update_fields", None), + ) + + # TODO: refactor & expose this on vanilla model, at which point + # we may want to refresh_from_db() here. + proxy_resource = Resource.objects.get(pk=self.resourceinstance_id) + proxy_resource.save_descriptors() + if index: + proxy_resource.index() + + def _update_tile_from_pythonic_model_values(self): + original_data = {**self.data} + + self._incoming_tile = {} + model_fields = field_names(self) + for tile_attr, tile_value in vars(self).items(): + if tile_attr.startswith("_") or tile_attr in model_fields: + continue + self._incoming_tile[tile_attr] = tile_value + + errors_by_alias = defaultdict(list) + # TODO: move this somewhere else. + ResourceInstance._validate_and_patch_from_tile_values( + self, self._root_node, errors_by_alias + ) + if not any(self.data.values()): + raise ValidationError(_("Tile is blank.")) + if self._tile_update_is_noop(original_data): + return False + if errors_by_alias: + raise ValidationError( + { + alias: ValidationError([e["message"] for e in errors]) + for alias, errors in errors_by_alias.items() + } + ) + return True + + def _tile_update_is_noop(self, original_data): + """Skipping no-op tile saves avoids regenerating RxR rows, at least + given the current implementation that doesn't serialize them.""" + from arches.app.datatypes.datatypes import DataTypeFactory + + # TODO: this currently prevents you from being able to *only* + # change parenttile and sortorder, but at least for sortorder + # that's probably good. Determine DX here. + + datatype_factory = DataTypeFactory() + for node in self._root_node.nodegroup.node_set.all(): + if node.datatype == "semantic": + continue + old = original_data[str(node.nodeid)] + datatype_instance = datatype_factory.get_instance(node.datatype) + new = self.data[str(node.nodeid)] + if not datatype_instance.values_match(old, new): + return False + + return True + + def _apply_provisional_edit( + self, proxy, existing_data, existing_provisional_edits, *, user=None + ): + # TODO: decompose this out of Tile.save() and call *that*. + # this section moves the data over from self.data to self.provisionaledits if certain users permissions are in force + # then self.data is restored from the previously saved tile data + from arches.app.models.tile import Tile + from arches.app.utils.permission_backend import user_is_resource_reviewer + + oldprovisionalvalue = None + newprovisionalvalue = None + creating_new_tile = self._state.adding + existing_instance = Tile( + data={**existing_data}, provisional_edits={**existing_provisional_edits} + ) + existing_instance._state.adding = creating_new_tile + if user is not None and not user_is_resource_reviewer(user): + if creating_new_tile: + # the user has previously edited this tile + proxy.apply_provisional_edit( + user, self.data, action="update", existing_model=existing_instance + ) + oldprovisional = proxy.get_provisional_edit(existing_instance, user) + if oldprovisional is not None: + oldprovisionalvalue = oldprovisional["value"] + else: + proxy.apply_provisional_edit(user, data=self.data, action="create") + + newprovisionalvalue = self.data + self.provisionaledits = proxy.provisionaledits + self.data = existing_data + # Also update proxy, which will be used to run further side effects. + proxy.provisionaledits = proxy.provisionaledits + proxy.data = existing_data + + provisional_edit_log_details = { + "user": user, + "provisional_editor": user, + "action": "create tile" if creating_new_tile else "add edit", + } + + return oldprovisionalvalue, newprovisionalvalue, provisional_edit_log_details + + def serialize( + self, fields=None, exclude=("nodegroup", "nodegroup_alias"), **kwargs + ): return JSONSerializer().handle_model( self, fields=fields, exclude=exclude, **kwargs ) + def refresh_from_db(self, using=None, fields=None, from_queryset=None): + if ( + not from_queryset + and (root_nodes := getattr(self, "_fetched_root_nodes", set())) + and self.resourceinstance.graph.slug + ): + aliases = [n.alias for n in root_nodes] + from_queryset = self.__class__.as_nodegroup( + root_node_alias=self._root_node.alias, + graph_slug=self.resourceinstance.graph.slug, + only=aliases, + ) + super().refresh_from_db(using, fields, from_queryset) + # Copy over annotations. + refreshed_tile = from_queryset[0] + for field in aliases: + setattr(self, field, getattr(refreshed_tile, field)) + else: + super().refresh_from_db(using, fields, from_queryset) + + @staticmethod + def get_blank_tile_from_nodegroup( + nodegroup: NodeGroup, resourceid=None, parenttile=None + ): + tile = TileModel( + nodegroup_id=nodegroup.pk, + resourceinstance_id=resourceid, + parenttile=parenttile, + data={}, + ) + + for node in nodegroup.node_set.all(): + tile.data[str(node.nodeid)] = None + + tile.full_clean() + return tile + class Value(models.Model): valueid = models.UUIDField(primary_key=True) @@ -1982,7 +2611,7 @@ class WorkflowHistory(models.Model): stepdata = JSONField(null=False, default=dict) componentdata = JSONField(null=False, default=dict) # `auto_now_add` marks the field as non-editable, which prevents the field from being serialized, so updating to use `default` instead - created = models.DateTimeField(default=django.utils.timezone.now, null=False) + created = models.DateTimeField(default=timezone.now, null=False) user = models.ForeignKey( db_column="userid", null=True, diff --git a/arches/app/models/querysets.py b/arches/app/models/querysets.py new file mode 100644 index 00000000000..2c4ca24d5aa --- /dev/null +++ b/arches/app/models/querysets.py @@ -0,0 +1,241 @@ +from django.db.models import OuterRef, Prefetch, QuerySet + +from arches.app.models.utils import find_root_node, generate_tile_annotations + + +class TileQuerySet(QuerySet): + def with_node_values( + self, nodes, *, defer=None, only=None, lhs=None, outer_ref, depth=1 + ): + """TileModel.as_nodegroup() is the better entrypoint, see docs there.""" + from arches.app.models.models import TileModel + + node_alias_annotations = generate_tile_annotations( + nodes, + defer=defer, + only=only, + model=self.model, + lhs=lhs, + outer_ref=outer_ref, + ) + + prefetches = [] + if depth: + prefetches.append( + Prefetch( + "children", + queryset=TileModel.objects.with_node_values( + nodes, + defer=defer, + only=only, + depth=depth - 1, + lhs="parenttile", + outer_ref="tileid", + ), + ) + ) + + self._fetched_nodes = [n for n in nodes if n.alias in node_alias_annotations] + return ( + self.filter(data__has_any_keys=[n.pk for n in self._fetched_nodes]) + .prefetch_related(*prefetches) + .annotate(**node_alias_annotations) + .order_by("sortorder") + ) + + def _prefetch_related_objects(self): + """Call datatype to_python() methods when materializing the QuerySet. + Discard annotations that do not pertain to this nodegroup. + """ + from arches.app.datatypes.datatypes import DataTypeFactory + + super()._prefetch_related_objects() + + datatype_factory = DataTypeFactory() + NOT_PROVIDED = object() + for tile in self._result_cache: + tile._fetched_root_nodes = set() + for node in self._fetched_nodes: + if node.nodegroup_id == tile.nodegroup_id: + tile._root_node = node + tile._fetched_root_nodes.add(node) + tile_val = getattr(tile, node.alias, NOT_PROVIDED) + if tile_val is not NOT_PROVIDED: + datatype_instance = datatype_factory.get_instance(node.datatype) + python_val = datatype_instance.to_python(tile_val) + setattr(tile, node.alias, python_val) + else: + delattr(tile, node.alias) + for child_tile in tile.children.all(): + setattr(child_tile, tile.nodegroup_alias, child_tile.parenttile) + children = getattr(tile, child_tile.nodegroup_alias, []) + children.append(child_tile) + setattr(tile, child_tile.nodegroup_alias, children) + + def _clone(self): + ret = super()._clone() + if hasattr(self, "_fetched_nodes"): + ret._fetched_nodes = self._fetched_nodes + return ret + + +class ResourceInstanceQuerySet(QuerySet): + def with_nodegroups( + self, graph_slug=None, *, resource_ids=None, defer=None, only=None + ): + """Annotates a ResourceInstance QuerySet with tile data unpacked + and mapped onto nodegroup aliases, e.g.: + + >>> concepts = ResourceInstance.objects.with_nodegroups("concept") + + With slightly fewer keystrokes: + + >>> concepts = ResourceInstance.as_model("concept") + + Or direct certain nodegroups with defer/only as in the QuerySet interface: + + >>> partial_concepts = ResourceInstance.as_model("concept", only=["ng1", "ng2"]) + + Example: + + >>> from arches.app.models.models import * + >>> concepts = ResourceInstance.as_model("concept") + + Django QuerySet methods are available for efficient queries: + >>> concepts.count() + 785 + + Filter on any nested node at the top level ("shallow query"). + In this example, statement_content is a cardinality-N node, thus an array. + # TODO: should name with `_set`? But then would need to check for clashes. + + >>> subset = concepts.filter(statement_content__len__gt=0)[:4] + >>> for concept in subset: + print(concept) + for stmt in concept.statement: + print("\t", stmt) + print("\t\t", stmt.statement_content) + + + + [{'en': {'value': 'Method of acquiring property ... + ... + + Access child and parent tiles by nodegroup aliases: + + >>> has_child = concepts.filter(statement_data_assignment_statement_content__len__gt=0).first() + >>> has_child + (751614c0-de7a-47d7-8e87-a4d18c7337ff)> + >>> has_child.statement_data_assignment_statement + + >>> parent = has_child.statement[0] + >>> parent.statement_data_assignment_statement[0].statement is parent + True + + Provisional edits are completely ignored for the purposes of querying. + """ + from arches.app.models.models import GraphModel, NodeGroup, TileModel + + if resource_ids and not graph_slug: + graph_query = GraphModel.objects.filter(resourceinstance__in=resource_ids) + else: + # TODO: get latest graph. + # https://github.com/archesproject/arches/issues/11565 + graph_query = GraphModel.objects.filter( + slug=graph_slug, source_identifier=None + ) + try: + # Prefetch sibling nodes for use in _prefetch_related_objects() + # and generate_tile_annotations(). + source_graph = graph_query.prefetch_related( + "node_set__nodegroup__node_set" + ).get() + except GraphModel.DoesNotExist as e: + e.add_note(f"No graph found with slug: {graph_slug}") + raise + + nodes = source_graph.node_set.all() + node_alias_annotations = generate_tile_annotations( + nodes, + defer=defer, + only=only, + model=self.model, + lhs=None, # TODO: AWKWARD + outer_ref="resourceinstanceid", + ) + self._fetched_nodes = [n for n in nodes if n.alias in node_alias_annotations] + # TODO: there might be some way to prune unused annotations. + + if resource_ids: + qs = self.filter(pk__in=resource_ids) + else: + qs = self.filter(graph=source_graph) + return qs.prefetch_related( + "graph__node_set__nodegroup", + Prefetch( + "tilemodel_set", + queryset=TileModel.objects.with_node_values( + self._fetched_nodes, + only=[n.alias for n in self._fetched_nodes], + lhs="pk", + outer_ref="tileid", + ).annotate( + cardinality=NodeGroup.objects.filter( + pk=OuterRef("nodegroup_id") + ).values("cardinality") + ), + to_attr="_annotated_tiles", + ), + ).annotate(**node_alias_annotations) + + def _prefetch_related_objects(self): + """Attach annotated tiles to resource instances, at the root, by + nodegroup alias. TODO: consider building as a nested structure. + Discard annotations only used for shallow filtering. + Memoize fetched root node aliases. + """ + super()._prefetch_related_objects() + + root_nodes = [] + for node in self._fetched_nodes: + root_node = find_root_node(node.nodegroup.node_set.all(), node.nodegroup_id) + root_nodes.append(root_node) + + for resource in self._result_cache: + resource._fetched_root_nodes = set() + for node in self._fetched_nodes: + delattr(resource, node.alias) + for root_node in root_nodes: + setattr( + resource, + root_node.alias, + None if root_node.nodegroup.cardinality == "1" else [], + ) + resource._fetched_root_nodes.add(root_node) + annotated_tiles = getattr(resource, "_annotated_tiles", []) + for annotated_tile in annotated_tiles: + for root_node in root_nodes: + if root_node.pk == annotated_tile.nodegroup_id: + ng_alias = root_node.alias + break + else: + raise RuntimeError("missing root node for annotated tile") + + if annotated_tile.cardinality == "n": + tile_array = getattr(resource, ng_alias) + tile_array.append(annotated_tile) + elif root_node.nodegroup.parentnodegroup_id is None: + setattr(resource, ng_alias, annotated_tile) + + for child_tile in annotated_tile.children.all(): + setattr(child_tile, ng_alias, child_tile.parenttile) + children = getattr(annotated_tile, child_tile.nodegroup_alias, []) + if child_tile not in children: + children.append(child_tile) + setattr(annotated_tile, child_tile.nodegroup_alias, children) + + def _clone(self): + ret = super()._clone() + if hasattr(self, "_fetched_nodes"): + ret._fetched_nodes = self._fetched_nodes + return ret diff --git a/arches/app/models/resource.py b/arches/app/models/resource.py index 3d775494103..2a772ccecaf 100644 --- a/arches/app/models/resource.py +++ b/arches/app/models/resource.py @@ -246,9 +246,9 @@ def save_edit( if transaction_id is not None: edit.transactionid = transaction_id edit.edittype = edit_type - edit.save() + edit.save(force_insert=True) - def save(self, *args, **kwargs): + def save(self, **kwargs): """ Saves and indexes a single resource @@ -280,10 +280,7 @@ def save(self, *args, **kwargs): else: user = request.user - if not self.principaluser_id and user: - self.principaluser_id = user.id - - super(Resource, self).save(*args, **kwargs) + super(Resource, self).save(user=user, **kwargs) if should_update_resource_instance_lifecycle_state: self.save_edit( @@ -421,9 +418,9 @@ def bulk_save(resources, transaction_id=None): se.bulk_index(documents) se.bulk_index(term_list) - def index(self, context=None): + def index(self, context=None, fetchTiles=True): """ - Indexes all the nessesary items values of a resource to support search + Indexes all the necessary items values of a resource to support search Keyword Arguments: context -- a string such as "copy" to indicate conditions under which a document is indexed @@ -440,6 +437,7 @@ def index(self, context=None): ) } document, terms = self.get_documents_to_index( + fetchTiles=fetchTiles, datatype_factory=datatype_factory, node_datatypes=node_datatypes, context=context, diff --git a/arches/app/models/serializers.py b/arches/app/models/serializers.py new file mode 100644 index 00000000000..07a2b4855f9 --- /dev/null +++ b/arches/app/models/serializers.py @@ -0,0 +1,118 @@ +from copy import deepcopy + +from django.db.models import F +from rest_framework import renderers +from rest_framework import serializers + +from arches.app.datatypes.datatypes import DataTypeFactory +from arches.app.models.models import Node, TileModel +from arches.app.utils.betterJSONSerializer import JSONSerializer + + +# Workaround for I18n_string fields +renderers.JSONRenderer.encoder_class = JSONSerializer +renderers.JSONOpenAPIRenderer.encoder_class = JSONSerializer + + +class ArchesTileSerializer(serializers.ModelSerializer): + tileid = serializers.UUIDField(validators=[], required=False) + + _nodes = Node.objects.none() + + def get_default_field_names(self, declared_fields, model_info): + field_names = super().get_default_field_names(declared_fields, model_info) + try: + field_names.remove("data") + except ValueError: + pass + aliases = self.__class__.Meta.fields + if aliases == "__all__": + # TODO: latest graph + root_node = ( + Node.objects.filter( + graph__slug=self.__class__.Meta.graph_slug, + alias=self.__class__.Meta.root_node, + graph__source_identifier=None, + ) + .select_related("nodegroup") + .prefetch_related("nodegroup__node_set") + .get() + ) + aliases = ( + root_node.nodegroup.node_set.exclude(nodegroup=None) + .exclude(datatype="semantic") + .values_list("alias", flat=True) + ) + field_names.extend(aliases) + return field_names + + def build_unknown_field(self, field_name, model_class): + graph_slug = self.__class__.Meta.graph_slug + if not self._nodes: + self._nodes = Node.objects.filter( + graph__slug=graph_slug, + # TODO: latest + graph__source_identifier=None, + ) + + for node in self._nodes: + if node.alias == field_name: + break + else: + raise Node.DoesNotExist( + f"Node with alias {field_name} not found in graph {graph_slug}" + ) + + datatype = DataTypeFactory().get_instance(node.datatype) + model_field = deepcopy(datatype.rest_framework_model_field) + if model_field is None: + raise NotImplementedError(f"Field missing for datatype: {node.datatype}") + model_field.model = model_class + model_field.blank = not node.isrequired + + return self.build_standard_field(field_name, model_field) + + +class ArchesModelSerializer(serializers.ModelSerializer): + legacyid = serializers.CharField(max_length=255, required=False, allow_null=True) + + _root_nodes = Node.objects.none() + + def get_fields(self): + graph_slug = self.__class__.Meta.graph_slug + + if self.__class__.Meta.nodegroups == "__all__": + if not self._root_nodes: + self._root_nodes = Node.objects.filter( + graph__slug=graph_slug, + # TODO: latest + graph__source_identifier=None, + nodegroup_id=F("nodeid"), + ).select_related("nodegroup") + for root in self._root_nodes: + if root.alias not in self._declared_fields: + + class TileSerializer(ArchesTileSerializer): + class Meta: + model = TileModel + graph_slug = self.__class__.Meta.graph_slug + root_node = root.alias + fields = self.__class__.Meta.fields + + self._declared_fields[root.alias] = TileSerializer( + many=root.nodegroup.cardinality == "n", required=False + ) + + return super().get_fields() + + def get_default_field_names(self, declared_fields, model_info): + field_names = super().get_default_field_names(declared_fields, model_info) + aliases = self.__class__.Meta.fields + if aliases != "__all__": + raise NotImplementedError # TODO... + nodegroups = self.__class__.Meta.nodegroups + if nodegroups == "__all__": + field_names.extend(self._root_nodes.values_list("alias", flat=True)) + else: + field_names.extend(self.__class__.Meta.nodegroups) + return field_names diff --git a/arches/app/models/tile.py b/arches/app/models/tile.py index 4b5af19946e..f96b93ef97e 100644 --- a/arches/app/models/tile.py +++ b/arches/app/models/tile.py @@ -811,7 +811,7 @@ def __preSave(self, request=None, context=None): ) logger.warning(e) - def __preDelete(self, request): + def __preDelete(self, request=None): try: for function in self._getFunctionClassInstances(): try: diff --git a/arches/app/models/utils.py b/arches/app/models/utils.py index ccfe06daff7..25dbaa3ffb5 100644 --- a/arches/app/models/utils.py +++ b/arches/app/models/utils.py @@ -1,3 +1,8 @@ +from django.contrib.postgres.expressions import ArraySubquery +from django.db.models import OuterRef, Subquery +from django.db.models.expressions import BaseExpression + + def add_to_update_fields(kwargs, field_name): """ Update the `update_field` arg inside `kwargs` (if present) in-place @@ -14,5 +19,99 @@ def add_to_update_fields(kwargs, field_name): kwargs["update_fields"] = new -def field_names(instance): - return {f.name for f in instance._meta.fields} +def field_names(instance_or_class): + return {f.name for f in instance_or_class._meta.fields} + + +def generate_tile_annotations(nodes, *, defer, only, model, lhs, outer_ref): + from arches.app.datatypes.datatypes import DataTypeFactory + from arches.app.models.models import ResourceInstance, TileModel + + if defer and only and (overlap := set(defer).intersection(set(only))): + raise ValueError(f"Got intersecting defer/only args: {overlap}") + datatype_factory = DataTypeFactory() + node_alias_annotations = {} + invalid_names = field_names(model) + is_resource = True + if ResourceInstance in model.mro(): + is_resource = True + elif TileModel in model.mro(): + is_resource = False + else: + raise ValueError(model) + for node in nodes: + if node.datatype == "semantic": + continue + if node.nodegroup_id is None: + continue + if is_resource: + root = find_root_node(node.nodegroup.node_set.all(), node.nodegroup_id) + if (defer and root.alias in defer) or (only and root.alias not in only): + continue + else: + if (defer and node.alias in defer) or (only and node.alias not in only): + continue + if node.alias in invalid_names: + raise ValueError(f'"{node.alias}" clashes with a model field name.') + + datatype_instance = datatype_factory.get_instance(node.datatype) + tile_values_query = get_values_query( + nodegroup=node.nodegroup, + base_lookup=datatype_instance.get_base_orm_lookup(node), + lhs=lhs, + outer_ref=outer_ref, + ) + node_alias_annotations[node.alias] = tile_values_query + + if not node_alias_annotations: + raise ValueError("All fields were excluded.") + # TODO: also add some safety around bad nodegroups. + if not is_resource: + for given_alias in only or []: + if given_alias not in node_alias_annotations: + raise ValueError(f'"{given_alias}" is not a valid node alias.') + + return node_alias_annotations + + +def pop_arches_model_kwargs(kwargs, model_fields): + arches_model_data = {} + for kwarg, value in kwargs.items(): + if kwarg not in model_fields: + arches_model_data[kwarg] = value + without_model_data = {k: v for k, v in kwargs.items() if k not in arches_model_data} + return arches_model_data, without_model_data + + +def find_root_node(prefetched_siblings, nodegroup_id): + for sibling_node in prefetched_siblings: + if sibling_node.pk == nodegroup_id: + return sibling_node + + +def get_values_query( + nodegroup, base_lookup, *, lhs=None, outer_ref=None +) -> BaseExpression: + """Return a tile values query expression for use in a + ResourceInstanceQuerySet or TileQuerySet. + """ + from arches.app.models.models import TileModel + + # TODO: make this a little less fragile. + if lhs is None: + tile_query = TileModel.objects.filter( + nodegroup_id=nodegroup.pk, resourceinstance_id=OuterRef(outer_ref) + ) + elif lhs and outer_ref: + tile_query = TileModel.objects.filter(**{lhs: OuterRef(outer_ref)}) + else: + tile_query = TileModel.objects.filter(nodegroup_id=nodegroup.pk) + if nodegroup.cardinality == "n": + tile_query = tile_query.order_by("sortorder") + + tile_query = tile_query.values(base_lookup) + + if outer_ref == "tileid": + return Subquery(tile_query) + else: + return ArraySubquery(tile_query) diff --git a/arches/app/views/api/mixins.py b/arches/app/views/api/mixins.py new file mode 100644 index 00000000000..99fc831443c --- /dev/null +++ b/arches/app/views/api/mixins.py @@ -0,0 +1,96 @@ +from functools import partial + +from django.core.exceptions import ValidationError as DjangoValidationError +from rest_framework.exceptions import NotFound, ValidationError + +from arches.app.models.models import ResourceInstance, TileModel +from arches.app.utils.permission_backend import ( + user_can_delete_resource, + user_can_edit_resource, + user_can_read_resource, +) + + +class ArchesModelAPIMixin: + def get_queryset(self): + fields = self.serializer_class.Meta.fields + if fields == "__all__": + fields = None + else: + raise NotImplementedError + meta = self.serializer_class.Meta + if ResourceInstance in meta.model.mro(): + only = None if meta.nodegroups == "__all__" else meta.nodegroups + return meta.model.as_model(meta.graph_slug, only=only) + elif TileModel in meta.model.mro(): + return meta.model.as_nodegroup( + meta.root_node, graph_slug=meta.graph_slug, only=fields + ) + raise NotImplementedError + + def get_object(self, user=None, permission_callable=None): + ret = super().get_object() + if permission_callable and not permission_callable(user=user, resource=ret): + raise NotFound + ret.save = partial(ret.save, user=user) + return ret + + def create(self, request, *args, **kwargs): + self.get_object = partial( + self.get_object, + user=request.user, + permission_callable=user_can_edit_resource, + ) + return super().create(request, *args, **kwargs) + + def retrieve(self, request, *args, **kwargs): + self.get_object = partial( + self.get_object, + user=request.user, + permission_callable=user_can_read_resource, + ) + return super().retrieve(request, *args, **kwargs) + + def update(self, request, *args, **kwargs): + self.get_object = partial( + self.get_object, + user=request.user, + permission_callable=user_can_edit_resource, + ) + # TODO: return correct response with updated object. + return super().update(request, *args, **kwargs) + + def destroy(self, request, *args, **kwargs): + self.get_object = partial( + self.get_object, + user=request.user, + permission_callable=user_can_delete_resource, + ) + return super().destroy(request, *args, **kwargs) + + def validate_tile_data_and_save(self, serializer): + """Re-raise ValidationError as DRF ValidationError. + + In 3.0 (2014), DRF decided to stop full_clean()'ing before save(), + which divorces DRF validation needs from model logic needing to + support the Django admin or similar ModelFormish patterns. + The stated reasons were: + - to avoid calling into big & scary full_clean(). + - to force expressing validation logic outside of models. + but adhering to that second point would be difficult in light of + how dynamically these fields are constructed. + + Discussion: + https://github.com/encode/django-rest-framework/discussions/7850 + """ + try: + serializer.save() + except DjangoValidationError as django_error: + # TODO: doesn't handle well inner lists, stringifies them + raise ValidationError(detail=django_error.error_dict) from django_error + + def perform_create(self, serializer): + self.validate_tile_data_and_save(serializer) + + def perform_update(self, serializer): + self.validate_tile_data_and_save(serializer) diff --git a/arches/settings.py b/arches/settings.py index 4645a5dbbce..5a8b2396226 100644 --- a/arches/settings.py +++ b/arches/settings.py @@ -373,6 +373,7 @@ "guardian", "captcha", "revproxy", + "rest_framework", "corsheaders", "oauth2_provider", "django_celery_results", @@ -406,6 +407,15 @@ "django_hosts.middleware.HostsResponseMiddleware" ) +# TODO: choose most appropriate default. +REST_FRAMEWORK = { + # Use Django's standard `django.contrib.auth` permissions, + # or allow read-only access for unauthenticated users. + "DEFAULT_PERMISSION_CLASSES": [ + "rest_framework.permissions.DjangoModelPermissionsOrAnonReadOnly" + ] +} + WEBPACK_LOADER = { "DEFAULT": { "STATS_FILE": os.path.join(ROOT_DIR, "..", "webpack/webpack-stats.json"), diff --git a/pyproject.toml b/pyproject.toml index aa51a2f8120..7f5293fc4f8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ dependencies = [ "django-recaptcha==3.0.0", "django-revproxy==0.12.0", "django-webpack-loader==2.0.1", + "djangorestframework==3.15.2", "edtf==4.0.1", "elasticsearch>=8.3.1,<9.0.0", "filetype==1.2.0", diff --git a/releases/8.0.0.md b/releases/8.0.0.md index 649485bd8e3..ef61f4ceaaa 100644 --- a/releases/8.0.0.md +++ b/releases/8.0.0.md @@ -5,6 +5,7 @@ Arches 8.0.0 Release Notes - 9613 Adds editable_future_graphs and the ability to update Graphs without unpublishing. - 11042 Adds `ResourceInstanceLifecycle`s and `ResourceInstanceLifecycleState`s - Add token-based CSS theming [#11262](https://github.com/archesproject/arches/issues/11262) +- Add query interface, REST serializers for retrieving & updating tile data by node alias [#11595](https://github.com/archesproject/arches/issues/11595) - Support Python 3.13 [#11550](https://github.com/archesproject/arches/pull/11550) ### Performance improvements @@ -32,6 +33,7 @@ Python: psycopg2: 2.9.10 Added: + djangorestframework: 3.15.2 Removed: tomli @@ -68,12 +70,14 @@ JavaScript: 1. Remove "3.10" from the `python-version` matrix in `.github/workflows/main.yml`. -1. In settings.py, add the following key to `DATABASES` to [improve indexing performance](https://github.com/archesproject/arches/issues/11382): - ``` - "OPTIONS": { - "options": "-c cursor_tuple_fraction=1", - }, - ``` +1. In settings.py: + - add the following key to `DATABASES` to [improve indexing performance](https://github.com/archesproject/arches/issues/11382): + ``` + "OPTIONS": { + "options": "-c cursor_tuple_fraction=1", + }, + ``` + - add `rest_framework` to `INSTALLED_APPS` if you wish to use an [ArchesModelSerializer]() to build REST APIs for your resource models. 1. Update your frontend dependencies: ``` diff --git a/tests/models/resource_test.py b/tests/models/resource_test.py index 30df220784f..2c3e8af4d3d 100644 --- a/tests/models/resource_test.py +++ b/tests/models/resource_test.py @@ -352,7 +352,8 @@ def test_delete_acts_on_custom_indices(self, mock): other_resource = Resource(pk=uuid.uuid4()) with sync_overridden_test_settings_to_arches(): self.test_resource.delete_index(other_resource.pk) - self.assertIn(str(other_resource.pk), str(mock._mock_call_args)) + # delete_resources() was called with the correct resource id. + self.assertEqual(other_resource.pk, mock._mock_call_args[1]["resources"].pk) def test_publication_restored_on_save(self): """ diff --git a/tests/models/tile_model_tests.py b/tests/models/tile_model_tests.py index 43c1a9b3923..7e4535144aa 100644 --- a/tests/models/tile_model_tests.py +++ b/tests/models/tile_model_tests.py @@ -23,6 +23,7 @@ from django.contrib.auth.models import User from django.db.utils import ProgrammingError from django.http import HttpRequest +from django.test.utils import captured_stdout from arches.app.models.tile import Tile, TileValidationError from arches.app.models.resource import Resource from arches.app.models.models import ( @@ -393,7 +394,7 @@ def test_tile_cardinality(self): } second_tile = Tile(second_json) - with self.assertRaises(ProgrammingError): + with self.assertRaises(ProgrammingError), captured_stdout(): second_tile.save(index=False, request=request) def test_apply_provisional_edit(self): diff --git a/tests/views/workflow_tests.py b/tests/views/workflow_tests.py index a2b93b29da6..5e75935e76f 100644 --- a/tests/views/workflow_tests.py +++ b/tests/views/workflow_tests.py @@ -4,6 +4,7 @@ from django.contrib.auth.models import Group, User from django.urls import reverse from django.test.client import Client +from django.test.utils import captured_stdout from arches.app.models.models import WorkflowHistory from tests.base_test import ArchesTestCase @@ -138,7 +139,7 @@ def test_post_workflow_history(self): # Non-superuser cannot update someone else's workflow. self.client.force_login(self.editor) - with self.assertLogs("django.request", level="WARNING"): + with self.assertLogs("django.request", level="WARNING"), captured_stdout(): response = self.client.post( reverse( "workflow_history",