diff --git a/modules/common/src/DataResource.php b/modules/common/src/DataResource.php index ff31e3f63c..b04defd8e9 100644 --- a/modules/common/src/DataResource.php +++ b/modules/common/src/DataResource.php @@ -86,7 +86,7 @@ public function __construct($file_path, $mimeType, $perspective = self::DEFAULT_ $this->mimeType = $mimeType; $this->perspective = $perspective; // @todo Create a timestamp property and generate uuid for version. - $this->version = time(); + $this->version = $this->getCurrentTime(); $this->checksum = NULL; } @@ -102,7 +102,7 @@ public function __construct($file_path, $mimeType, $perspective = self::DEFAULT_ * system to create new versions of resources when they deem it necessary. */ public function createNewVersion() { - $newVersion = time(); + $newVersion = $this->getCurrentTime(); if ($newVersion == $this->version) { $newVersion++; } @@ -110,6 +110,24 @@ public function createNewVersion() { return $this->createCommon('version', $newVersion); } + /** + * Use Drupal datetime service if container available,to make this mockable. + * + * @return int + * Current timestamp. + * + * @todo Remove try/catch? + */ + protected function getCurrentTime(): int { + try { + return (int) \Drupal::service('datetime.time')->getCurrentTime(); + } + catch (\Throwable $e) { + // Fall back to php time(). + return time(); + } + } + /** * Create a new perspective. * diff --git a/modules/common/src/EventDispatcherTrait.php b/modules/common/src/EventDispatcherTrait.php index c4bd15890c..5464bdb003 100644 --- a/modules/common/src/EventDispatcherTrait.php +++ b/modules/common/src/EventDispatcherTrait.php @@ -30,7 +30,7 @@ trait EventDispatcherTrait { * @throws \Exception * If any of the subscribers registered and Exception it is thrown. */ - private function dispatchEvent($eventName, $data, $validator = NULL) { + protected function dispatchEvent($eventName, $data, $validator = NULL) { if ($this->useLegacyDispatcher()) { $data = $this->legacyDispatchEvent($eventName, $data, $validator); return $data; diff --git a/modules/common/src/Plugin/DkanApiDocs/CommonApiDocs.php b/modules/common/src/Plugin/DkanApiDocs/CommonApiDocs.php index bd29f733b8..0a1a6e275c 100644 --- a/modules/common/src/Plugin/DkanApiDocs/CommonApiDocs.php +++ b/modules/common/src/Plugin/DkanApiDocs/CommonApiDocs.php @@ -9,7 +9,7 @@ * * @DkanApiDocs( * id = "common_dkan_api_docs", - * description = "Base API docs plugin." + * description = @Translation("Base API docs plugin.") * ) */ class CommonApiDocs extends DkanApiDocsBase { diff --git a/modules/common/src/Plugin/DkanApiDocsBase.php b/modules/common/src/Plugin/DkanApiDocsBase.php index 122a418bbe..7b6c183e68 100644 --- a/modules/common/src/Plugin/DkanApiDocsBase.php +++ b/modules/common/src/Plugin/DkanApiDocsBase.php @@ -10,9 +10,6 @@ /** * Base class for API Docs plugins. - * - * @see \Drupal\plugin_type_example\Annotation\Sandwich - * @see \Drupal\plugin_type_example\SandwichInterface */ abstract class DkanApiDocsBase extends PluginBase implements DkanApiDocsInterface { diff --git a/modules/datastore/src/Service/ResourceLocalizer.php b/modules/datastore/src/Service/ResourceLocalizer.php index ea946ecead..5d6f1ed353 100644 --- a/modules/datastore/src/Service/ResourceLocalizer.php +++ b/modules/datastore/src/Service/ResourceLocalizer.php @@ -10,11 +10,11 @@ use Contracts\FactoryInterface; use Drupal\Core\File\FileSystemInterface; use Drupal\metastore\Exception\AlreadyRegistered; -use Drupal\metastore\Reference\Referencer; use Drupal\metastore\ResourceMapper; use FileFetcher\FileFetcher; use Procrastinator\Result; use Drupal\common\EventDispatcherTrait; +use Drupal\metastore\Plugin\MetastoreReferenceType\ResourceReference; /** * Resource localizer. @@ -120,7 +120,7 @@ private function registerNewPerspectives(DataResource $resource, FileFetcher $fi $dir = "file://" . $this->drupalFiles->getPublicFilesDirectory(); $localFileDrupalUri = str_replace($dir, "public://", $localFilePath); $localUrl = $this->drupalFiles->fileCreateUrl($localFileDrupalUri); - $localUrl = Referencer::hostify($localUrl); + $localUrl = ResourceReference::hostify($localUrl); $new = $resource->createNewPerspective(self::LOCAL_FILE_PERSPECTIVE, $localFilePath); diff --git a/modules/datastore/tests/src/Unit/Controller/MockStorage.php b/modules/datastore/tests/src/Unit/Controller/MockStorage.php index 580a8f9937..a67934d3cb 100644 --- a/modules/datastore/tests/src/Unit/Controller/MockStorage.php +++ b/modules/datastore/tests/src/Unit/Controller/MockStorage.php @@ -14,7 +14,7 @@ public function retrieveContains(string $string, bool $caseSensitive): array { return []; } - public function retrieveByHash($hash, $schemaId) { + public function retrieveByHash($hash) { return []; } diff --git a/modules/metastore/metastore.module b/modules/metastore/metastore.module index fcd55f4969..60a8ff20b0 100644 --- a/modules/metastore/metastore.module +++ b/modules/metastore/metastore.module @@ -62,6 +62,8 @@ function resource_mapper_display() { /** * Helper method to retrieve the static value for a resource's revisioning. + * + * We use a static variable because it's set in an event subscriber. */ function resource_mapper_new_revision() { return drupal_static('metastore_resource_mapper_new_revision', 0); diff --git a/modules/metastore/metastore.schemas.yml b/modules/metastore/metastore.schemas.yml new file mode 100644 index 0000000000..b5a0aa960a --- /dev/null +++ b/modules/metastore/metastore.schemas.yml @@ -0,0 +1,21 @@ +catalog: + schema_path: schema/catalog.json + references: + - { property: dataset, schema: dataset } + +dataset: + schema_path: schema/dataset.json + ui_schema_path: schema/dataset.ui.json + identifier: { property: identifier, type: uuid } + references: + - { property: distribution, schema: distribution } + - { property: publisher, schema: organization } + - { property: keyword, schema: keyword } + - { property: theme, schema: theme } + +distribution: + schema_path: schema/distribution.json + references: + - { property: downloadURL, type: resource } + - { property: describedBy, schema: data-dictionary, type: id } + class: distribution diff --git a/modules/metastore/metastore.services.yml b/modules/metastore/metastore.services.yml index 69d5d61cec..50f4b00995 100644 --- a/modules/metastore/metastore.services.yml +++ b/modules/metastore/metastore.services.yml @@ -36,18 +36,18 @@ services: dkan.metastore.referencer: class: \Drupal\metastore\Reference\Referencer arguments: - - '@config.factory' - - '@dkan.metastore.storage' - calls: - - [setLoggerFactory, ['@logger.factory']] + - '@dkan.metastore.reference_map' dkan.metastore.dereferencer: class: \Drupal\metastore\Reference\Dereferencer arguments: + - '@dkan.metastore.reference_map' + + dkan.metastore.reference_map: + class: \Drupal\metastore\Reference\ReferenceMap + arguments: + - '@plugin.manager.dkan_reference_type' - '@config.factory' - - '@dkan.metastore.storage' - calls: - - [setLoggerFactory, ['@logger.factory']] dkan.metastore.orphan_checker: class: \Drupal\metastore\Reference\OrphanChecker @@ -109,3 +109,7 @@ services: class: \Drupal\metastore\DataDictionary\DataDictionaryDiscovery arguments: - '@config.factory' + + plugin.manager.dkan_reference_type: + class: \Drupal\metastore\Reference\ReferenceTypeManager + parent: default_plugin_manager diff --git a/modules/metastore/src/Annotation/MetastoreReferenceType.php b/modules/metastore/src/Annotation/MetastoreReferenceType.php new file mode 100644 index 0000000000..4942ea0b39 --- /dev/null +++ b/modules/metastore/src/Annotation/MetastoreReferenceType.php @@ -0,0 +1,33 @@ +service->getAll('distribution') as $metadata) { // Attempt to determine the filepath for this distribution's resource. - $dist_file_path = Referencer::hostify($metadata->{'$.data.downloadURL'} ?? ''); + $dist_file_path = ResourceReference::hostify($metadata->{'$.data.downloadURL'} ?? ''); // If the current distribution does is not the excluded distribution, and // it's resource file path matches the supplied file path... if ($metadata->{'$.identifier'} !== $dist_id && !empty($dist_file_path) && $dist_file_path === $file_path) { diff --git a/modules/metastore/src/LifeCycle/LifeCycle.php b/modules/metastore/src/LifeCycle/LifeCycle.php index b912d04c60..f5caa621e3 100644 --- a/modules/metastore/src/LifeCycle/LifeCycle.php +++ b/modules/metastore/src/LifeCycle/LifeCycle.php @@ -3,8 +3,6 @@ namespace Drupal\metastore\LifeCycle; use Drupal\common\EventDispatcherTrait; -use Drupal\common\DataResource; -use Drupal\common\UrlHostTokenResolver; use Drupal\Core\Datetime\DateFormatter; use Drupal\Core\Queue\QueueFactory; use Drupal\metastore\MetastoreItemInterface; @@ -132,7 +130,7 @@ protected function datasetLoad(MetastoreItemInterface $data) { $metadata = $data->getMetaData(); // Dereference dataset properties. - $metadata = $this->dereferencer->dereference($metadata); + $metadata = $this->dereferencer->dereference($metadata, "dataset"); $metadata = $this->addDatasetModifiedDate($metadata, $data->getModifiedDate()); $data->setMetadata($metadata); @@ -157,26 +155,7 @@ protected function distributionLoad(MetastoreItemInterface $data) { return; } - $downloadUrl = $metadata->data->downloadURL; - - if (isset($downloadUrl) && !filter_var($downloadUrl, FILTER_VALIDATE_URL)) { - $resourceIdentifier = $downloadUrl; - $ref = NULL; - $original = NULL; - [$ref, $original] = $this->retrieveDownloadUrlFromResourceMapper($resourceIdentifier); - - $downloadUrl = isset($original) ? $original : ""; - - $refProperty = "%Ref:downloadURL"; - $metadata->data->{$refProperty} = count($ref) == 0 ? NULL : $ref; - } - - if (is_string($downloadUrl)) { - $downloadUrl = UrlHostTokenResolver::resolve($downloadUrl); - } - - $metadata->data->downloadURL = $downloadUrl; - + $metadata->data = $this->dereferencer->dereference($metadata->data, "distribution"); $data->setMetadata($metadata); } @@ -204,54 +183,6 @@ protected function distributionPredelete(MetastoreItemInterface $data) { } } - /** - * Get a download URL. - * - * @param string $resourceIdentifier - * Identifier for resource. - * - * @return array - * Array of reference and original. - */ - private function retrieveDownloadUrlFromResourceMapper(string $resourceIdentifier) { - $reference = []; - $original = NULL; - - $info = DataResource::parseUniqueIdentifier($resourceIdentifier); - - // Load resource object. - $sourceResource = $this->resourceMapper->get($info['identifier'], DataResource::DEFAULT_SOURCE_PERSPECTIVE, $info['version']); - - if (!$sourceResource) { - return [$reference, $original]; - } - - $reference[] = $this->createResourceReference($sourceResource); - $perspective = \resource_mapper_display(); - $resource = $sourceResource; - - if ( - $perspective != DataResource::DEFAULT_SOURCE_PERSPECTIVE && - $new = $this->resourceMapper->get($info['identifier'], $perspective, $info['version']) - ) { - $resource = $new; - $reference[] = $this->createResourceReference($resource); - } - $original = $resource->getFilePath(); - - return [$reference, $original]; - } - - /** - * Private. - */ - private function createResourceReference(DataResource $resource): object { - return (object) [ - "identifier" => $resource->getUniqueIdentifier(), - "data" => $resource, - ]; - } - /** * Dataset pre-save life cycle method. * @@ -287,7 +218,7 @@ protected function referenceMetadata(MetastoreItemInterface $data): void { return $data instanceof MetastoreItemInterface; }); - $metadata = $this->referencer->reference($metadata); + $metadata = $this->referencer->reference($metadata, "dataset"); $data->setMetadata($metadata); @@ -313,6 +244,7 @@ protected function datadictionaryPresave(MetastoreItemInterface $data): void { */ protected function distributionPresave(MetastoreItemInterface $data) { $metadata = $data->getMetaData(); + $metadata->data = $this->referencer->reference($metadata->data, "distribution"); $data->setMetadata($metadata); } diff --git a/modules/metastore/src/Plugin/MetastoreReferenceType/ItemReference.php b/modules/metastore/src/Plugin/MetastoreReferenceType/ItemReference.php new file mode 100644 index 0000000000..3bcd99c2b4 --- /dev/null +++ b/modules/metastore/src/Plugin/MetastoreReferenceType/ItemReference.php @@ -0,0 +1,190 @@ +storage = $storageFactory->getInstance($this->schemaId()); + $this->resourceMapper = $resourceMapper; + } + + /** + * Container injection. + * + * @param \Drupal\common\Plugin\ContainerInterface $container + * The service container. + * @param array $config + * A configuration array containing information about the plugin instance. + * @param string $pluginId + * The plugin_id for the plugin instance. + * @param mixed $pluginDefinition + * The plugin implementation definition. + * + * @return static + */ + public static function create( + ContainerInterface $container, + array $config, + $pluginId, + $pluginDefinition + ) { + $loggerFactory = $container->get('logger.factory'); + $storageFactory = $container->get('dkan.metastore.storage'); + $resourceMapper = $container->get('dkan.metastore.resource_mapper'); + return new static($config, $pluginId, $pluginDefinition, $loggerFactory, $storageFactory, $resourceMapper); + } + + /** + * {@inheritdoc} + */ + public function reference($value): string { + // First see if there is an existing item that matches the value. + $identifier = $this->checkExistingReference($value); + // In some cases, we always want to create and save a new referenced item. + if (!$identifier || $this->newRevision()) { + $identifier = $this->createPropertyReference($value); + } + + return $identifier; + } + + /** + * Should a new revision of this item be saved, even if it exists already? + * + * @return bool + * True if a new revision should be created regardless. + * + * @todo Refactor; this logic should be absracted and not distribution/resource-specific. + */ + protected function newRevision() { + if ($this->property() == 'distribution' && $this->resourceMapper->newRevision()) { + return TRUE; + } + return FALSE; + } + + /** + * {@inheritdoc} + */ + public function dereference(string $identifier, bool $showId = FALSE) { + try { + $value = $this->storage->retrieve($identifier); + } + catch (MissingObjectException $exception) { + $value = FALSE; + } + + if (!$value) { + // If a property node was not found, it most likely means it was deleted + // while still being referenced. + $this->logger->error( + 'Property @property_id reference @identifier not found', + [ + '@property_id' => $this->property(), + '@identifier' => var_export($identifier, TRUE), + ] + ); + + return NULL; + } + $metadata = json_decode($value); + // Just return the contents of "data" unless we're requesting to show IDs. + return $showId ? $metadata : $metadata->data; + + } + + /** + * Checks for an existing value reference for that property id. + * + * @param string|object $value + * The property's value used to find an existing reference. + * + * @return string|null + * The existing reference's uuid, or NULL if not found. + * + * @throws \Drupal\Component\Plugin\Exception\InvalidPluginDefinitionException + * @throws \Drupal\Component\Plugin\Exception\PluginNotFoundException + */ + protected function checkExistingReference($value) { + $identifier = $this->storage->retrieveByHash(Service::metadataHash($value)) ?? NULL; + if ($identifier && !$this->storage->isPublished($identifier)) { + $this->storage->publish($identifier); + } + + return $identifier; + } + + /** + * Creates a new value reference for that property id in a data node. + * + * @param string|object $value + * The property's value. + * + * @return string + * The new reference's uuid, or NULL. + * + * @todo Replace identifier/data structure. + */ + protected function createPropertyReference($value): string { + // Create json metadata for the reference. + $data = new \stdClass(); + $data->identifier = (new Uuid5())->generate($this->schemaId, $value); + $data->data = $value; + $json = json_encode($data); + + // Create node to store this reference. + $identifier = $this->storage->store($json, $data->identifier); + return $identifier; + } + +} diff --git a/modules/metastore/src/Plugin/MetastoreReferenceType/ResourceReference.php b/modules/metastore/src/Plugin/MetastoreReferenceType/ResourceReference.php new file mode 100644 index 0000000000..16481fa219 --- /dev/null +++ b/modules/metastore/src/Plugin/MetastoreReferenceType/ResourceReference.php @@ -0,0 +1,429 @@ +resourceMapper = $resourceMapper; + $this->fileSystem = $fileSystem; + $this->entityTypeManager = $entityTypeManager; + $this->client = $client; + parent::__construct($config, $pluginDefinition, $pluginId, $loggerFactory); + } + + /** + * Container injection. + * + * @param \Drupal\common\Plugin\ContainerInterface $container + * The service container. + * @param array $config + * A configuration array containing information about the plugin instance. + * @param string $pluginId + * The plugin_id for the plugin instance. + * @param mixed $pluginDefinition + * The plugin implementation definition. + * + * @return static + */ + public static function create( + ContainerInterface $container, + array $config, + $pluginId, + $pluginDefinition + ) { + return new static( + $config, + $pluginId, + $pluginDefinition, + $container->get('logger.factory'), + $container->get('dkan.metastore.resource_mapper'), + $container->get('file_system'), + $container->get('entity_type.manager'), + $container->get('http_client') + ); + } + + /** + * {@inheritdoc} + */ + public function reference($value): string { + return $this->registerWithResourceMapper( + static::hostify($value), + $this->getMimeType($this->context) + ); + } + + /** + * {@inheritdoc} + */ + public function dereference(string $identifier, bool $showId = FALSE) { + // If simple identifier, convert to URL. + if (filter_var($identifier, FILTER_VALIDATE_URL)) { + return $identifier; + } + + $resource = $this->resourceLookup($identifier); + if ($resource && $showId) { + return [$this->createResourceReference($resource)]; + } + return $resource ? UrlHostTokenResolver::resolve($resource->getFilePath()) : $identifier; + } + + /** + * Build object with identifier/data structure for reference. + * + * @param Drupal\common\DataResource $resource + * A DKAN resource object. + * + * @return object + * The same resource object, wrapped in a stdClass object with an identifier + * property. + */ + private function createResourceReference(DataResource $resource): object { + return (object) [ + "identifier" => $resource->getUniqueIdentifier(), + "data" => $resource, + ]; + } + + /** + * Register the supplied resource details with the resource mapper. + * + * @param string $downloadUrl + * The download URL for the resource being registered. + * @param string $mimeType + * The mime type for the resource being registered. + * + * @return string + * A unique ID for the resource generated using the supplied details. + */ + protected function registerWithResourceMapper(string $downloadUrl, string $mimeType): string { + try { + // Create a new resource using the supplied resource details. + $resource = new DataResource($downloadUrl, $mimeType); + + // Attempt to register the url with the resource file mapper. + if ($this->resourceMapper->register($resource)) { + // Upon successful registration, replace the download URL with a unique + // ID generated by the resource mapper. + $downloadUrl = $resource->getUniqueIdentifier(); + } + } + catch (AlreadyRegistered $e) { + $info = json_decode($e->getMessage()); + + // If resource mapper registration failed due to this resource already + // being registered, generate a new version of the resource and update the + // download URL with the new version ID. + if (isset($info[0]->identifier)) { + $stored = $this->resourceMapper->get($info[0]->identifier, DataResource::DEFAULT_SOURCE_PERSPECTIVE); + $downloadUrl = $this->handleExistingResource($info, $stored, $mimeType); + } + } + + return $downloadUrl; + } + + /** + * Private. + */ + protected function handleExistingResource($info, $stored, $mimeType) { + if ($info[0]->perspective == DataResource::DEFAULT_SOURCE_PERSPECTIVE && + ($this->resourceMapper->newRevision() == 1 || $stored->getMimeType() != $mimeType)) { + $new = $stored->createNewVersion(); + // Update the MIME type, since this may be updated by the user. + $new->changeMimeType($mimeType); + + $this->resourceMapper->registerNewVersion($new); + $downloadUrl = $new->getUniqueIdentifier(); + } + else { + $downloadUrl = $stored->getUniqueIdentifier(); + } + return $downloadUrl; + } + + /** + * Substitute the host for local URLs with a custom localhost token. + * + * @param string $resourceUrl + * The URL of the resource being substituted. + * + * @return string + * The resource URL with the custom localhost token. + */ + public static function hostify(string $resourceUrl): string { + // Get HTTP server public files URL and extract the host. + $serverPublicFilesUrl = UrlHostTokenResolver::getServerPublicFilesUrl(); + $serverPublicFilesUrl = isset($serverPublicFilesUrl) ? parse_url($serverPublicFilesUrl) : NULL; + $serverHost = $serverPublicFilesUrl['host'] ?? \Drupal::request()->getHost(); + // Determine whether the resource URL has the same host as this server. + $resourceParsedUrl = parse_url($resourceUrl); + if (isset($resourceParsedUrl['host']) && $resourceParsedUrl['host'] == $serverHost) { + // Swap out the host portion of the resource URL with the localhost token. + $resourceParsedUrl['host'] = UrlHostTokenResolver::TOKEN; + $resourceUrl = self::unparseUrl($resourceParsedUrl); + } + return $resourceUrl; + } + + /** + * Process URL. + * + * @param mixed $parsedUrl + * Outut of parse_url() + * + * @return string + * A resource URL + * + * @todo Clean all this URL/file logic up! + */ + protected static function unparseUrl($parsedUrl) { + $url = ''; + $urlParts = [ + 'scheme', + 'host', + 'port', + 'user', + 'pass', + 'path', + 'query', + 'fragment', + ]; + + foreach ($urlParts as $part) { + if (!isset($parsedUrl[$part])) { + continue; + } + $url .= ($part == "port") ? ':' : ''; + $url .= ($part == "query") ? '?' : ''; + $url .= ($part == "fragment") ? '#' : ''; + $url .= $parsedUrl[$part]; + $url .= ($part == "scheme") ? '://' : ''; + } + + return $url; + } + + /** + * Determine the mime type of the supplied local file. + * + * @param string $downloadUrl + * Local resource file path. + * + * @return string|null + * The detected mime type or NULL on failure. + */ + private function getLocalMimeType(string $downloadUrl): ?string { + $mime_type = NULL; + + // Retrieve and decode the file name from the supplied download URL's path. + $filename = $this->fileSystem->basename($downloadUrl); + $filename = urldecode($filename); + + // Attempt to load the file by file name. + $files = $this->entityTypeManager->getStorage('file')->loadByProperties(['filename' => $filename]); + $file = reset($files); + + // If a valid file was found for the given file name, extract the file's + // mime type. + if ($file instanceof File) { + $mime_type = $file->getMimeType(); + } + // Otherwise, log an error notifying the user that a file was not found. + else { + $this->logger->notice( + 'Unable to determine mime type of "@name"; file not found.', + ['@name' => $filename] + ); + } + + return $mime_type; + } + + /** + * Determine the mime type of the supplied remote file. + * + * @param string $downloadUrl + * Remote resource file URL. + * + * @return string|null + * The detected mime type, or NULL on failure. + */ + private function getRemoteMimeType(string $downloadUrl): ?string { + $mime_type = NULL; + + // Perform HTTP Head request against the supplied URL in order to determine + // the content type of the remote resource. + $response = $this->client->head($downloadUrl); + // Extract the full value of the content type header. + $content_type = $response->getHeader('Content-Type'); + // Attempt to extract the mime type from the content type header. + if (isset($content_type[0])) { + $mime_type = $content_type[0]; + } + + return $mime_type; + } + + /** + * Determine the mime type of the supplied distribution's resource. + * + * @param object $distribution + * A dataset distribution object. + * + * @return string + * The detected mime type, or DEFAULT_MIME_TYPE on failure. + * + * @todo Update the UI to set mediaType when a format is selected. + */ + private function getMimeType($distribution): string { + $mimeType = "text/plain"; + + // If we have a mediaType set, use that. + if (isset($distribution->mediaType)) { + $mimeType = $distribution->mediaType; + } + // Fall back if we have an importable format set. + elseif (isset($distribution->format) && $distribution->format == 'csv') { + $mimeType = 'text/csv'; + } + elseif (isset($distribution->format) && $distribution->format == 'tsv') { + $mimeType = 'text/tab-separated-values'; + } + // Otherwise, determine the proper mime type using the distribution's + // download URL. + elseif (isset($distribution->downloadURL)) { + // Determine whether the supplied distribution has a local or remote + // resource. + $is_local = $distribution->downloadURL !== $this->hostify($distribution->downloadURL); + $mimeType = $is_local ? + $this->getLocalMimeType($distribution->downloadURL) : + $this->getRemoteMimeType($distribution->downloadURL); + } + + return $mimeType ?? self::DEFAULT_MIME_TYPE; + } + + /** + * Get a file resource object. + * + * @param string $resourceIdentifier + * Identifier for resource. + * + * @return \Drupal\common\DataResource|null + * URL value or null if none found. + */ + protected function resourceLookup(string $resourceIdentifier) { + $info = DataResource::parseUniqueIdentifier($resourceIdentifier); + + // Load resource object. + $resource = $this->resourceMapper->get( + $info['identifier'], + DataResource::DEFAULT_SOURCE_PERSPECTIVE, + $info['version'] + ); + + if (!$resource) { + return NULL; + } + + $perspective = $this->resourceMapper->display(); + + if ( + $perspective != DataResource::DEFAULT_SOURCE_PERSPECTIVE && + $new = $this->resourceMapper->get($info['identifier'], $perspective, $info['version']) + ) { + $resource = $new; + } + return $resource; + } + +} diff --git a/modules/metastore/src/Plugin/MetastoreReferenceType/UrlItemReference.php b/modules/metastore/src/Plugin/MetastoreReferenceType/UrlItemReference.php new file mode 100644 index 0000000000..4968e97174 --- /dev/null +++ b/modules/metastore/src/Plugin/MetastoreReferenceType/UrlItemReference.php @@ -0,0 +1,116 @@ +checkExistingReference($value)) { + return $identifier; + } + // As this is a URL field, if we can't reference we just pass through. + return $value; + } + + /** + * {@inheritdoc} + */ + public function dereference(string $identifier, $showId = FALSE) { + if (strpos($identifier, '://') !== FALSE) { + // This is still a URL, and was never referenced. + return $identifier; + } + + $storage = $this->storageFactory->getInstance($this->schemaId()); + try { + $storage->retrieve($identifier); + } + catch (MissingObjectException $exception) { + $this->logger->notice( + 'Property @property_id reference @identifier not found. The referenced item may have been deleted.', + [ + '@property_id' => $this->property(), + '@identifier' => var_export($identifier, TRUE), + ] + ); + return NULL; + } + + $itemUri = 'dkan://metastore/schemas/' . $this->schemaId() . '/items/' . $identifier; + $value = Url::fromUri($itemUri)->toString(); + return $showId ? $this->createIdRef($value, $identifier) : $value; + } + + /** + * Create an identifier/data structure for $ref object. + * + * @param string $identifier + * The reference identifier to be dereferenced. + * @param mixed $value + * The dereferenced value. + */ + protected function createIdRef(string $identifier, $value) { + return (object) [ + 'identifier' => $identifier, + 'data' => $value, + ]; + } + + /** + * Checks for an existing value reference for that property id. + * + * @param string|object $value + * The property's value used to find an existing reference. + * + * @return string|false + * The existing reference's uuid, or FALSE if not found. + */ + protected function checkExistingReference($value) { + $parts = UrlHelper::parse($value); + // We expect to see the path to the schema's metastore items. + $expected = 'api/1/metastore/schemas/' . $this->schemaId() . '/items/'; + // String position of metastore path for schema items should be an integer. + $pos = strpos($parts['path'], $expected); + if ($pos === FALSE) { + return FALSE; + } + // Identifier should be at end of path. + $identifier = substr($value, ($pos + strlen($expected))); + + // If there is a metastore item by this schema and identifier, we're good. + $storage = $this->storageFactory->getInstance($this->schemaId()); + try { + $storage->retrieve($identifier); + } + catch (MissingObjectException $exception) { + // If the URL was formatted correctly but no item was found, log it. + $this->logger->notice( + 'Could not map URL to existing @schema item: @property_id with value: @value', + [ + '@schema' => $this->schemaId(), + '@property_id' => $this->property(), + '@value' => var_export($value, TRUE), + ] + ); + $identifier = FALSE; + } + + return $identifier; + } + +} diff --git a/modules/metastore/src/Reference/Dereferencer.php b/modules/metastore/src/Reference/Dereferencer.php index f495307841..24c8a5f3cd 100644 --- a/modules/metastore/src/Reference/Dereferencer.php +++ b/modules/metastore/src/Reference/Dereferencer.php @@ -2,184 +2,73 @@ namespace Drupal\metastore\Reference; -use Contracts\FactoryInterface; -use Drupal\Core\Config\ConfigFactoryInterface; -use Drupal\common\LoggerTrait; -use Drupal\metastore\Exception\MissingObjectException; - /** * Metastore dereferencer. */ -class Dereferencer { - use HelperTrait; - use LoggerTrait; +class Dereferencer implements DereferencerInterface { /** - * Storage factory interface service. + * Reference map service. * - * @var \Contracts\FactoryInterface + * @var \Drupal\metastore\Reference\ReferenceMapInterface */ - private $storageFactory; + private ReferenceMapInterface $referenceMap; /** * Constructor. + * + * @param \Drupal\metastore\Reference\ReferenceMapInterface $referenceMap + * ReferenceMap service, to find a schema's referenced properties and + * their types. */ - public function __construct(ConfigFactoryInterface $configService, FactoryInterface $storageFactory) { - $this->setConfigService($configService); - $this->storageFactory = $storageFactory; + public function __construct(ReferenceMapInterface $referenceMap) { + $this->referenceMap = $referenceMap; } /** - * Replaces value references in a dataset with with their actual values. - * - * @param object $data - * The json metadata object. - * - * @return mixed - * Modified json metadata object. + * {@inheritdoc} */ - public function dereference($data) { - $this->validate($data); - + public function dereference(object $metadata, string $schemaId = 'dataset') { + $refs = $this->referenceMap->getAllReferences($schemaId); // Cycle through the dataset properties we seek to dereference. - foreach ($this->getPropertyList() as $propertyId) { - if (isset($data->{$propertyId})) { - $this->dereferenceProperty($propertyId, $data); + foreach ($refs as $propertyName => $reference) { + if (!isset($metadata->{$propertyName})) { + continue; } - } - return $data; - } - /** - * Dereferences property and handles empty values if any. - * - * @param string $propertyId - * The dataset property id. - * @param object $data - * Modified json metadata object. - */ - private function dereferenceProperty(string $propertyId, $data) { - $referenceProperty = "%Ref:{$propertyId}"; - $ref = NULL; - $actual = NULL; - [$ref, $actual] = $this->dereferencePropertyUuid($propertyId, $data->{$propertyId}); - if (!empty($ref) && !empty($actual)) { - $data->{$referenceProperty} = $ref; - $data->{$propertyId} = $actual; - } - else { - unset($data->{$propertyId}); - } - } + $value = $metadata->{$propertyName}; + $metadata->{$propertyName} = $this->dereferenceProperty($reference, $value, FALSE); - /** - * Replaces a property reference with its actual value, general case. - * - * @param string $property_id - * The dataset property id. - * @param string|array $uuid - * A single reference uuid string, or an array of reference uuids. - * - * @return mixed - * An array of dereferenced values, a single one, or NULL. - */ - private function dereferencePropertyUuid(string $property_id, $uuid) { - if (is_array($uuid)) { - return $this->dereferenceMultiple($property_id, $uuid); - } - elseif (is_string($uuid) && $this->getUuidService()->isValid($uuid)) { - return $this->dereferenceSingle($property_id, $uuid); - } - else { - $this->log('value_referencer', 'Unexpected data type when dereferencing property_id: @property_id with uuid: @uuid', - [ - '@property_id' => $property_id, - '@uuid' => var_export($uuid, TRUE), - ]); - return NULL; - } - } - - /** - * Replaces a property reference with its actual value, array case. - * - * @param string $property_id - * A dataset property id. - * @param array $uuids - * An array of reference uuids. - * - * @return array - * An array of dereferenced values. - */ - private function dereferenceMultiple(string $property_id, array $uuids) : array { - $result = []; - $reference = []; - $ref = NULL; - $actual = NULL; - foreach ($uuids as $uuid) { - [$ref, $actual] = $this->dereferenceSingle($property_id, $uuid); - if (NULL !== $ref && NULL !== $actual) { - $result[] = $actual; - $reference[] = $ref; + if (is_null($metadata->{$propertyName})) { + unset($metadata->{$propertyName}); + } + else { + $metadata->{"%Ref:{$propertyName}"} = $this->dereferenceProperty($reference, $value, TRUE); } } - return [$reference, $result]; + return $metadata; } /** - * Replaces a property reference with its actual value, string or object case. - * - * @param string $property_id - * The dataset property id. - * @param string $uuid - * Either a uuid or an actual json value. - * - * @return object|string - * The data from this reference. + * Dereferences property and handles empty values if any. * - * @throws \Drupal\Component\Plugin\Exception\InvalidPluginDefinitionException - * @throws \Drupal\Component\Plugin\Exception\PluginNotFoundException + * @param \Drupal\metastore\Reference\ReferenceTypeInterface $reference + * The reference information. + * @param string|string[] $value + * The value to dereference. + * @param bool $showId + * Wrap the value in an object with identifier/data properties? */ - private function dereferenceSingle(string $property_id, string $uuid) { - $storage = $this->storageFactory->getInstance($property_id); - try { - $value = $storage->retrieve($uuid); + private function dereferenceProperty(ReferenceTypeInterface $reference, $value, bool $showId = FALSE) { + if (!is_array($value)) { + return $reference->dereference($value, $showId); } - catch (MissingObjectException $exception) { - $value = FALSE; - } - - if ($value) { - $metadata = json_decode($value); - return [$metadata, $metadata->data]; - } - - // If a property node was not found, it most likely means it was deleted - // while still being referenced. - $this->log( - 'value_referencer', - 'Property @property_id reference @uuid not found', - [ - '@property_id' => $property_id, - '@uuid' => var_export($uuid, TRUE), - ] - ); - - return [NULL, NULL]; - } - /** - * Validates data. - * - * @param object $data - * The json metadata object. - * - * @throws \Exception - */ - private function validate($data) { - if (!is_object($data)) { - throw new \Exception("data must be an object."); + $dereferenced = []; + foreach ($value as $identifier) { + $dereferenced[] = $reference->dereference($identifier, $showId); } + return empty($dereferenced) ? NULL : $dereferenced; } } diff --git a/modules/metastore/src/Reference/DereferencerInterface.php b/modules/metastore/src/Reference/DereferencerInterface.php new file mode 100644 index 0000000000..ddb2a90e2a --- /dev/null +++ b/modules/metastore/src/Reference/DereferencerInterface.php @@ -0,0 +1,21 @@ +configService = $configService; - } - - /** - * Get the list of dataset properties being referenced. - * - * @return array - * List of dataset properties. - * - * @todo consolidate with common RouteProvider's getPropertyList. - */ - private function getPropertyList() : array { - if (isset($this->configService)) { - $list = $this->configService->get('metastore.settings')->get('property_list'); - return array_values(array_filter($list)); - } - throw new \Exception("Can't get property list, the config service was not set."); - } - - /** - * Private. - * - * @param mixed $data - * Data whose type we want to match. - * - * @return array|string - * Either the empty string or an empty array. - */ - private function emptyPropertyOfSameType($data) { - if (is_array($data)) { - return []; - } - return ""; - } - - /** - * Uuid Service. - */ - private function getUuidService() { - return new Uuid5(); - } - -} diff --git a/modules/metastore/src/Reference/OrphanChecker.php b/modules/metastore/src/Reference/OrphanChecker.php index e3fb9f3f30..e21072d94f 100644 --- a/modules/metastore/src/Reference/OrphanChecker.php +++ b/modules/metastore/src/Reference/OrphanChecker.php @@ -9,7 +9,6 @@ * Checks for orphanned references in deleted datasets. */ class OrphanChecker { - use HelperTrait; /** * The queue service. @@ -18,12 +17,19 @@ class OrphanChecker { */ protected $queueService; + /** + * The config service. + * + * @var \Drupal\Core\Config\ConfigFactoryInterface + */ + private ConfigFactoryInterface $configService; + /** * Constructor. */ public function __construct(ConfigFactoryInterface $configService, QueueFactory $queueService) { + $this->configService = $configService; $this->queueService = $queueService; - $this->setConfigService($configService); } /** @@ -46,7 +52,8 @@ public function processReferencesInDeletedDataset($data) { throw new \Exception("data must be an object."); } // Cycle through the dataset properties we seek to reference. - foreach ($this->getPropertyList() as $property_id) { + $list = $this->configService->get('metastore.settings')->get('property_list'); + foreach ($list as $property_id) { if (isset($data->{$property_id})) { $this->processReferencesInDeletedProperty($property_id, $data->{$property_id}); } @@ -64,7 +71,8 @@ public function processReferencesInDeletedDataset($data) { public function processReferencesInUpdatedDataset($old_dataset, $new_dataset) { $this->objectsCheck([$old_dataset, $new_dataset]); // Cycle through the dataset properties being referenced, check for orphans. - foreach ($this->getPropertyList() as $property_id) { + $list = $this->configService->get('metastore.settings')->get('property_list'); + foreach ($list as $property_id) { if (!isset($old_dataset->{$property_id})) { // The old dataset had no value for this property, thus no references // could be deleted. Safe to skip checking for orphan reference. @@ -147,4 +155,20 @@ private function objectsCheck($objects) { } } + /** + * Private. + * + * @param mixed $data + * Data whose type we want to match. + * + * @return array|string + * Either the empty string or an empty array. + */ + private function emptyPropertyOfSameType($data) { + if (is_array($data)) { + return []; + } + return ""; + } + } diff --git a/modules/metastore/src/Reference/ReferenceLookup.php b/modules/metastore/src/Reference/ReferenceLookup.php index 0c3116427a..729227ff73 100644 --- a/modules/metastore/src/Reference/ReferenceLookup.php +++ b/modules/metastore/src/Reference/ReferenceLookup.php @@ -2,12 +2,12 @@ namespace Drupal\metastore\Reference; -use Drupal\common\LoggerTrait; use Drupal\metastore\Factory\MetastoreItemFactoryInterface; use Drupal\metastore\ReferenceLookupInterface; use Contracts\FactoryInterface; use Drupal\Core\Cache\Cache; +use Drupal\Core\Cache\CacheTagsInvalidator; use Drupal\Core\Cache\CacheTagsInvalidatorInterface; use Drupal\Core\Extension\ModuleHandlerInterface; use RootedData\RootedJsonData; @@ -16,8 +16,6 @@ * {@inheritdoc} */ class ReferenceLookup implements ReferenceLookupInterface { - use HelperTrait; - use LoggerTrait; /** * Metastore Storage service. @@ -33,6 +31,20 @@ class ReferenceLookup implements ReferenceLookupInterface { */ protected $metastoreItemFactory; + /** + * Cache tag invalidator service. + * + * @var \Drupal\Core\Cache\CacheTagsInvalidator + */ + protected CacheTagsInvalidator $invalidator; + + /** + * Module handler service. + * + * @var \Drupal\Core\Extension\ModuleHandlerInterface + */ + protected ModuleHandlerInterface $moduleHandler; + /** * Module Handler service. * @@ -107,7 +119,7 @@ protected function decodeJsonMetadata(string $json): array { // Decode the supplied JSON metadata string. $metadata = json_decode($json); // Determine the path to the legacy metadata schema file. - $module_path = $this->moduleHandler->getModule(get_module_name())->getPath(); + $module_path = $this->moduleHandler->getModule('metastore')->getPath(); $legacy_schema_path = $module_path . '/docs/legacy_metadata.json'; // Fetch the legacy metadata schema. $legacy_schema = file_get_contents($legacy_schema_path); diff --git a/modules/metastore/src/Reference/ReferenceMap.php b/modules/metastore/src/Reference/ReferenceMap.php new file mode 100644 index 0000000000..1bac9fbc21 --- /dev/null +++ b/modules/metastore/src/Reference/ReferenceMap.php @@ -0,0 +1,123 @@ +referenceTypeManager = $referenceTypeManager; + $this->configService = $configService; + $this->map = $this->buildReferenceMap(); + } + + /** + * {@inheritdoc} + */ + public function getAllReferences(string $schemaId): array { + return $this->map[$schemaId] ?? []; + } + + /** + * {@inheritdoc} + */ + public function getReference(string $schemaId, string $propertyName): ?ReferenceTypeInterface { + $refs = $this->getAllReferences($schemaId); + return $refs[$propertyName] ?? NULL; + } + + /** + * Temporary solution as we move toward YAML-based schema definitions. + * + * @return array[] + * An array of arrays of references keyed by schema then property name. + */ + protected function buildReferenceMap() { + return [ + "catalog" => [ + 'dataset' => $this->createReference('item', 'dataset', 'dataset'), + ], + "distribution" => [ + 'downloadURL' => $this->createReference('resource', 'downloadURL'), + // 'describedBy' => $this->createReference('url', 'describedBy', 'data-dictionary'), + ], + 'dataset' => $this->getDatasetReferences(), + ]; + } + + /** + * Get the list of dataset properties being referenced. + * + * @return array + * List of dataset properties. + * + * @todo consolidate with common RouteProvider's getPropertyList. + */ + protected function getDatasetReferences() : array { + $list = $this->configService->get('metastore.settings')->get('property_list'); + foreach (array_values(array_filter($list)) as $propertyName) { + $refs[$propertyName] = $this->createReference('item', $propertyName, $propertyName); + } + return $refs; + } + + /** + * Build a reference object. + * + * @param string $type + * Refernce type - ID of a plugin that implements ReferenceTypeInterface. + * @param string $propertyName + * The property of the schema to build the reference for. + * @param string|null $schemaId + * ID of the schema to pull the property from. + * + * @return \Drupal\metastore\Reference\ReferenceTypeInterface + * An instantiated ReferenceType object. + */ + protected function createReference(string $type, string $propertyName, ?string $schemaId = NULL): ReferenceTypeInterface { + $config = [ + 'property' => $propertyName, + 'schemaId' => $schemaId, + ]; + + return $this->referenceTypeManager->createInstance($type, $config); + } + +} diff --git a/modules/metastore/src/Reference/ReferenceMapInterface.php b/modules/metastore/src/Reference/ReferenceMapInterface.php new file mode 100644 index 0000000000..e4d36bb9bc --- /dev/null +++ b/modules/metastore/src/Reference/ReferenceMapInterface.php @@ -0,0 +1,34 @@ +property = $config['property']; + $this->schemaId = $config['schemaId'] ?? NULL; + $this->logger = $loggerFactory->get('metastore'); + parent::__construct($config, $pluginId, $pluginDefinition); + } + + /** + * Retrieve the @description property from the annotation and return it. + * + * @return string + * Description. + */ + public function description() { + return $this->pluginDefinition['description']; + } + + /** + * {@inheritdoc} + */ + public function property(): string { + return $this->property; + } + + /** + * {@inheritdoc} + */ + public function type(): string { + return $this->pluginDefinition['id']; + } + + /** + * {@inheritdoc} + */ + public function schemaId(): ?string { + return $this->schemaId; + } + + /** + * {@inheritdoc} + */ + public function setContext($context): self { + $this->context = $context; + return $this; + } + +} diff --git a/modules/metastore/src/Reference/ReferenceTypeInterface.php b/modules/metastore/src/Reference/ReferenceTypeInterface.php new file mode 100644 index 0000000000..48aa7f1ed2 --- /dev/null +++ b/modules/metastore/src/Reference/ReferenceTypeInterface.php @@ -0,0 +1,75 @@ +alterInfo('metastore_reference_type_info'); + $this->setCacheBackend($cache_backend, 'metastore_reference_type_plugins'); + } + +} diff --git a/modules/metastore/src/Reference/Referencer.php b/modules/metastore/src/Reference/Referencer.php index 9cbad78d40..abdbfdb1b5 100644 --- a/modules/metastore/src/Reference/Referencer.php +++ b/modules/metastore/src/Reference/Referencer.php @@ -2,24 +2,10 @@ namespace Drupal\metastore\Reference; -use Drupal\Core\Config\ConfigFactoryInterface; - -use Drupal\common\LoggerTrait; -use Drupal\common\DataResource; -use Drupal\common\UrlHostTokenResolver; -use Drupal\metastore\Exception\AlreadyRegistered; -use Drupal\metastore\ResourceMapper; -use Drupal\metastore\Service; - -use Contracts\FactoryInterface; -use GuzzleHttp\Client as GuzzleClient; - /** * Metastore referencer service. */ -class Referencer { - use HelperTrait; - use LoggerTrait; +class Referencer implements ReferencerInterface { /** * Default Mime Type to use when mime type detection fails. @@ -27,431 +13,64 @@ class Referencer { * @var string */ protected const DEFAULT_MIME_TYPE = 'text/plain'; - /** - * Storage factory interface service. + * The reference information by property. * - * @var \Contracts\FactoryInterface + * @var \Drupal\metastore\Reference\ReferenceMap */ - private $storageFactory; + private $referenceMap; /** * Constructor. + * + * @param \Drupal\metastore\Reference\ReferenceMapInterface $referenceMap + * ReferenceMap service, to find a schema's referenced properties and + * their types. */ - public function __construct(ConfigFactoryInterface $configService, FactoryInterface $storageFactory) { - $this->setConfigService($configService); - $this->storageFactory = $storageFactory; - $this->setLoggerFactory(\Drupal::service('logger.factory')); + public function __construct(ReferenceMapInterface $referenceMap) { + $this->referenceMap = $referenceMap; } /** - * Replaces some dataset property values with references. - * - * @param object $data - * Dataset json object. - * - * @return object - * Json object modified with references to some of its properties' values. + * {@inheritdoc} */ - public function reference($data) { - if (!is_object($data)) { - throw new \Exception("data must be an object."); - } + public function reference(object $metadata, string $schemaId = 'dataset') { + $refs = $this->referenceMap->getAllReferences($schemaId); // Cycle through the dataset properties we seek to reference. - foreach ($this->getPropertyList() as $property_id) { - if (isset($data->{$property_id})) { - $data->{$property_id} = $this->referenceProperty($property_id, $data->{$property_id}); + foreach ($refs as $propertyName => $reference) { + if (!isset($metadata->{$propertyName})) { + continue; } - } - return $data; - } - /** - * References a dataset property's value, general case. - * - * @param string $property_id - * The dataset property id. - * @param mixed $data - * Single value or array of values to be referenced. - * - * @return string|array - * Single reference, or an array of references. - */ - private function referenceProperty(string $property_id, $data) { - if (is_array($data)) { - return $this->referenceMultiple($property_id, $data); - } - else { - // Case for $data being an object or a string. - return $this->referenceSingle($property_id, $data); + $value = $metadata->{$propertyName}; + $reference->setContext($metadata); + if (is_array($value)) { + $metadata->{$propertyName} = $this->referenceArray($reference, $value); + } + else { + $metadata->{$propertyName} = $reference->reference($value); + } } + return $metadata; } /** * References a dataset property's value, array case. * - * @param string $property_id - * The dataset property id. + * @param \Drupal\metastore\Reference\ReferenceTypeInterface $reference + * The reference information. * @param array $values * The array of values to be referenced. * * @return array * The array of uuid references. */ - private function referenceMultiple(string $property_id, array $values) : array { + private function referenceArray(ReferenceTypeInterface $reference, array $values) : array { $result = []; foreach ($values as $value) { - $data = $this->referenceSingle($property_id, $value); - if (NULL !== $data) { - $result[] = $data; - } - } - return $result; - } - - /** - * References a dataset property's value, string or object case. - * - * @param string $property_id - * The dataset property id. - * @param string|object $value - * The value to be referenced. - * - * @return string|null - * The Uuid reference, or NULL on failure. - */ - private function referenceSingle(string $property_id, $value) { - - if ($property_id == 'distribution') { - $value = $this->distributionHandling($value); - } - - $uuid = $this->checkExistingReference($property_id, $value); - if (!$uuid) { - $uuid = $this->createPropertyReference($property_id, $value); - } - if ($uuid) { - return $uuid; - } - else { - $this->log( - 'value_referencer', - 'Neither found an existing nor could create a new reference for property_id: @property_id with value: @value', - [ - '@property_id' => $property_id, - '@value' => var_export($value, TRUE), - ] - ); - return NULL; - } - } - - /** - * Attempt to register this distribution's resource with the resource mapper. - * - * If this distribution has a resource, register it with the resource mapper - * and replace the download URL with a resource ID. - * - * @param object $distribution - * A dataset distribution object. - * - * @return object - * The supplied distribution with an updated resource download URL. - */ - private function distributionHandling($distribution): object { - // Ensure the supplied distribution has a valid resource before attempting - // to register it with the resource mapper. - if (is_object($distribution) && isset($distribution->downloadURL)) { - // Register this distribution's resource with the resource mapper and - // replace the download URL with a unique ID registered in the resource - // mapper. - $distribution->downloadURL = $this->registerWithResourceMapper( - $this->hostify($distribution->downloadURL), $this->getMimeType($distribution)); - } - - return $distribution; - } - - /** - * Register the supplied resource details with the resource mapper. - * - * @param string $downloadUrl - * The download URL for the resource being registered. - * @param string $mimeType - * The mime type for the resource being registered. - * - * @return string - * A unique ID for the resource generated using the supplied details. - */ - private function registerWithResourceMapper(string $downloadUrl, string $mimeType): string { - try { - // Create a new resource using the supplied resource details. - $resource = new DataResource($downloadUrl, $mimeType); - - // Attempt to register the url with the resource file mapper. - if ($this->getFileMapper()->register($resource)) { - // Upon successful registration, replace the download URL with a unique - // ID generated by the resource mapper. - $downloadUrl = $resource->getUniqueIdentifier(); - } - } - catch (AlreadyRegistered $e) { - $info = json_decode($e->getMessage()); - - // If resource mapper registration failed due to this resource already - // being registered, generate a new version of the resource and update the - // download URL with the new version ID. - if (isset($info[0]->identifier)) { - /** @var \Drupal\common\DataResource $stored */ - $stored = $this->getFileMapper()->get($info[0]->identifier, DataResource::DEFAULT_SOURCE_PERSPECTIVE); - $downloadUrl = $this->handleExistingResource($info, $stored, $mimeType); - } - } - - return $downloadUrl; - } - - /** - * Private. - */ - private function handleExistingResource($info, $stored, $mimeType) { - if ($info[0]->perspective == DataResource::DEFAULT_SOURCE_PERSPECTIVE && - (ResourceMapper::newRevision() == 1 || $stored->getMimeType() != $mimeType)) { - $new = $stored->createNewVersion(); - // Update the MIME type, since this may be updated by the user. - $new->changeMimeType($mimeType); - - $this->getFileMapper()->registerNewVersion($new); - $downloadUrl = $new->getUniqueIdentifier(); - } - else { - $downloadUrl = $stored->getUniqueIdentifier(); - } - return $downloadUrl; - } - - /** - * Private. - */ - private function getFileMapper(): ResourceMapper { - return \Drupal::service('dkan.metastore.resource_mapper'); - } - - /** - * Substitute the host for local URLs with a custom localhost token. - * - * @param string $resourceUrl - * The URL of the resource being substituted. - * - * @return string - * The resource URL with the custom localhost token. - */ - public static function hostify(string $resourceUrl): string { - // Get HTTP server public files URL and extract the host. - $serverPublicFilesUrl = UrlHostTokenResolver::getServerPublicFilesUrl(); - $serverPublicFilesUrl = isset($serverPublicFilesUrl) ? parse_url($serverPublicFilesUrl) : NULL; - $serverHost = $serverPublicFilesUrl['host'] ?? \Drupal::request()->getHost(); - // Determine whether the resource URL has the same host as this server. - $resourceParsedUrl = parse_url($resourceUrl); - if (isset($resourceParsedUrl['host']) && $resourceParsedUrl['host'] == $serverHost) { - // Swap out the host portion of the resource URL with the localhost token. - $resourceParsedUrl['host'] = UrlHostTokenResolver::TOKEN; - $resourceUrl = self::unparseUrl($resourceParsedUrl); - } - return $resourceUrl; - } - - /** - * Private. - */ - private static function unparseUrl($parsedUrl) { - $url = ''; - $urlParts = [ - 'scheme', - 'host', - 'port', - 'user', - 'pass', - 'path', - 'query', - 'fragment', - ]; - - foreach ($urlParts as $part) { - if (!isset($parsedUrl[$part])) { - continue; - } - $url .= ($part == "port") ? ':' : ''; - $url .= ($part == "query") ? '?' : ''; - $url .= ($part == "fragment") ? '#' : ''; - $url .= $parsedUrl[$part]; - $url .= ($part == "scheme") ? '://' : ''; - } - - return $url; - } - - /** - * Determine the mime type of the supplied local file. - * - * @param string $downloadUrl - * Local resource file path. - * - * @return string|null - * The detected mime type or NULL on failure. - */ - private function getLocalMimeType(string $downloadUrl): ?string { - $mime_type = NULL; - - // Retrieve and decode the file name from the supplied download URL's path. - $filename = \Drupal::service('file_system')->basename($downloadUrl); - $filename = urldecode($filename); - - // Attempt to load the file by file name. - $files = \Drupal::entityTypeManager() - ->getStorage('file') - ->loadByProperties(['filename' => $filename]); - $file = reset($files); - - // If a valid file was found for the given file name, extract the file's - // mime type... - if ($file !== FALSE) { - $mime_type = $file->getMimeType(); - } - // Otherwise, log an error notifying the user that a file was not found. - else { - $this->log('value_referencer', 'Unable to determine mime type of file with name "@name", because no file was found with that name.', [ - '@name' => $filename, - ]); - } - - return $mime_type; - } - - /** - * Determine the mime type of the supplied remote file. - * - * @param string $downloadUrl - * Remote resource file URL. - * - * @return string|null - * The detected mime type, or NULL on failure. - */ - private function getRemoteMimeType(string $downloadUrl): ?string { - $mime_type = NULL; - - // Perform HTTP Head request against the supplied URL in order to determine - // the content type of the remote resource. - $client = new GuzzleClient(); - $response = $client->head($downloadUrl); - // Extract the full value of the content type header. - $content_type = $response->getHeader('Content-Type'); - // Attempt to extract the mime type from the content type header. - if (isset($content_type[0])) { - $mime_type = $content_type[0]; + $result[] = $reference->reference($value); } - - return $mime_type; - } - - /** - * Determine the mime type of the supplied distribution's resource. - * - * @param object $distribution - * A dataset distribution object. - * - * @return string - * The detected mime type, or DEFAULT_MIME_TYPE on failure. - * - * @todo Update the UI to set mediaType when a format is selected. - */ - private function getMimeType($distribution): string { - $mimeType = "text/plain"; - - // If we have a mediaType set, use that. - if (isset($distribution->mediaType)) { - $mimeType = $distribution->mediaType; - } - // Fall back if we have an importable format set. - elseif (isset($distribution->format) && $distribution->format == 'csv') { - $mimeType = 'text/csv'; - } - elseif (isset($distribution->format) && $distribution->format == 'tsv') { - $mimeType = 'text/tab-separated-values'; - } - // Otherwise, determine the proper mime type using the distribution's - // download URL. - elseif (isset($distribution->downloadURL)) { - // Determine whether the supplied distribution has a local or remote - // resource. - $is_local = $distribution->downloadURL !== $this->hostify($distribution->downloadURL); - $mimeType = $is_local ? - $this->getLocalMimeType($distribution->downloadURL) : - $this->getRemoteMimeType($distribution->downloadURL); - } - - return $mimeType ?? self::DEFAULT_MIME_TYPE; - } - - /** - * Checks for an existing value reference for that property id. - * - * @param string $property_id - * The dataset property id. - * @param string|object $data - * The property's value used to find an existing reference. - * - * @return string|null - * The existing reference's uuid, or NULL if not found. - * - * @throws \Drupal\Component\Plugin\Exception\InvalidPluginDefinitionException - * @throws \Drupal\Component\Plugin\Exception\PluginNotFoundException - */ - private function checkExistingReference(string $property_id, $data) { - $storage = $this->storageFactory->getInstance($property_id); - $nodes = $storage->getEntityStorage()->loadByProperties([ - 'field_data_type' => $property_id, - 'title' => Service::metadataHash($data), - ]); - - if ($node = reset($nodes)) { - // @todo if referencing node in draft state, don't publish referenced node - // If an existing referenced node is found but unpublished, publish it. - if ($node->get('moderation_state')->value !== "published") { - $node->set('moderation_state', 'published'); - $node->save(); - } - return $node->uuid(); - } - return NULL; - } - - /** - * Creates a new value reference for that property id in a data node. - * - * @param string $property_id - * The dataset property id. - * @param string|object $value - * The property's value. - * - * @return string|null - * The new reference's uuid, or NULL. - * - * @throws \Drupal\Component\Plugin\Exception\InvalidPluginDefinitionException - * @throws \Drupal\Component\Plugin\Exception\PluginNotFoundException - * @throws \Drupal\Core\Entity\EntityStorageException - */ - private function createPropertyReference(string $property_id, $value) { - // Create json metadata for the reference. - $data = new \stdClass(); - $data->identifier = $this->getUuidService()->generate($property_id, $value); - $data->data = $value; - $json = json_encode($data); - - // Create node to store this reference. - $storage = $this->storageFactory->getInstance($property_id); - $entity_uuid = $storage->store($json, $data->identifier); - return $entity_uuid; + return array_filter($result); } } diff --git a/modules/metastore/src/Reference/ReferencerInterface.php b/modules/metastore/src/Reference/ReferencerInterface.php new file mode 100644 index 0000000000..dd35f77bd8 --- /dev/null +++ b/modules/metastore/src/Reference/ReferencerInterface.php @@ -0,0 +1,23 @@ +filePathExists($resource->getFilePath()); - $this->store->store(json_encode($resource)); + $this->getStore()->store(json_encode($resource)); $this->dispatchEvent(self::EVENT_REGISTRATION, $resource); return TRUE; @@ -166,12 +181,17 @@ public function remove(DataResource $resource) { } /** - * Private. + * Get the latest revision of a resource from the mapper table. * - * @return mixed - * object || False + * @param string $identifier + * The resource identifier. + * @param string $perspective + * Resource perspective. + * + * @return object|false + * Resource mapper table row, or false if no revision. */ - private function getLatestRevision($identifier, $perspective) { + protected function getLatestRevision(string $identifier, string $perspective) { $query = $this->getCommonQuery($identifier, $perspective); $query->sortByDescending('version'); $items = $this->store->query($query); @@ -184,7 +204,7 @@ private function getLatestRevision($identifier, $perspective) { * @return mixed * object || False */ - private function getRevision($identifier, $perspective, $version) { + protected function getRevision($identifier, $perspective, $version) { $query = $this->getCommonQuery($identifier, $perspective); $query->conditionByIsEqualTo('version', $version); $items = $this->store->query($query); @@ -225,7 +245,7 @@ private function getCommonQuery($identifier, $perspective) { * * @todo Refactor this so it's not an exception. */ - public function filePathExists($filePath) { + public function filePathExists(string $filePath): bool { $query = new Query(); $query->conditionByIsEqualTo('filePath', $filePath, TRUE); $results = $this->getStore()->query($query); diff --git a/modules/metastore/src/Storage/Data.php b/modules/metastore/src/Storage/Data.php index 26962a56e3..45c59ede89 100644 --- a/modules/metastore/src/Storage/Data.php +++ b/modules/metastore/src/Storage/Data.php @@ -328,10 +328,10 @@ public function store($data, string $uuid = NULL): string { * @param object $data * JSON data. * - * @return string|null - * The content entity UUID, or null if failed. + * @return string + * The content entity UUID. */ - private function updateExistingEntity(ContentEntityInterface $entity, $data): ?string { + private function updateExistingEntity(ContentEntityInterface $entity, $data): string { $entity->{$this->schemaIdField} = $this->schemaId; $new_data = json_encode($data); $entity->{$this->metadataField} = $new_data; @@ -365,7 +365,7 @@ private function updateExistingEntity(ContentEntityInterface $entity, $data): ?s private function createNewEntity(string $uuid, $data) { $title = ''; if ($this->schemaId === 'dataset') { - $title = isset($data->title) ? $data->title : $data->name; + $title = $data->title ?? $data->name; } else { $title = Service::metadataHash($data->data); diff --git a/modules/metastore/src/Storage/MetastoreStorageInterface.php b/modules/metastore/src/Storage/MetastoreStorageInterface.php index a7ca14ed5b..8ca3535ca6 100644 --- a/modules/metastore/src/Storage/MetastoreStorageInterface.php +++ b/modules/metastore/src/Storage/MetastoreStorageInterface.php @@ -133,12 +133,10 @@ public function store($data, string $id = NULL): string; * * @param string $hash * The hash for the data. - * @param string $schemaId - * The schema ID. * * @return string|null * The uuid of the item with that hash. */ - public function retrieveByHash($hash, $schemaId); + public function retrieveByHash($hash); } diff --git a/modules/metastore/src/Storage/NodeData.php b/modules/metastore/src/Storage/NodeData.php index d223346bc1..1dc460b67c 100644 --- a/modules/metastore/src/Storage/NodeData.php +++ b/modules/metastore/src/Storage/NodeData.php @@ -43,8 +43,6 @@ public function retrieveContains(string $string, bool $caseSensitive = TRUE): ar * * @param string $hash * The hash for the data. - * @param string $schema_id - * The schema ID. * * @return string|null * The uuid of the item with that hash. @@ -52,10 +50,10 @@ public function retrieveContains(string $string, bool $caseSensitive = TRUE): ar * @todo This method is not consistent with others in this class, and * may not be needed at all. Fix or remove. */ - public function retrieveByHash($hash, $schema_id) { + public function retrieveByHash($hash) { $nodes = $this->getEntityStorage()->loadByProperties([ $this->labelKey => $hash, - $this->schemaIdField => $schema_id, + $this->schemaIdField => $this->schemaId, ]); if ($node = reset($nodes)) { return $node->uuid(); diff --git a/modules/metastore/tests/src/Functional/MetastoreApiPageCacheTest.php b/modules/metastore/tests/src/Functional/MetastoreApiPageCacheTest.php index df5ecaaa16..64af573da4 100644 --- a/modules/metastore/tests/src/Functional/MetastoreApiPageCacheTest.php +++ b/modules/metastore/tests/src/Functional/MetastoreApiPageCacheTest.php @@ -122,9 +122,9 @@ public function testDatasetApiPageCache() { $response = $client->request('GET', 'api/1/datastore/query/111/0'); $this->assertEquals("MISS", $response->getHeaders()['X-Drupal-Cache'][0]); - // The import endpoints shouldn't be there at all anymore. + // The resource ID import endpoints shouldn't be there at all anymore. $response = $client->request('GET', "api/1/datastore/imports/$distributionId"); - $this->assertEquals(404, $response->getStatusCode()); + $this->assertEquals(200, $response->getStatusCode()); $response = $client->request('GET', "api/1/datastore/imports/$resourceId"); $this->assertEquals(404, $response->getStatusCode()); } diff --git a/modules/metastore/tests/src/Unit/Plugin/MetastoreReferenceType/ItemReferenceTest.php b/modules/metastore/tests/src/Unit/Plugin/MetastoreReferenceType/ItemReferenceTest.php new file mode 100644 index 0000000000..b807831d14 --- /dev/null +++ b/modules/metastore/tests/src/Unit/Plugin/MetastoreReferenceType/ItemReferenceTest.php @@ -0,0 +1,181 @@ +new_value = 'new keyword'; + $this->new_identifier = self::genId($this->new_value); + + $this->existing_value = 'existing keyword'; + $this->existing_identifier = self::genId($this->existing_value); + + $this->bad_value = 'bad keyword'; + $this->bad_identifier = self::genId($this->bad_value); + + $this->distro_value = (object) ['downloadUrl' => 'http://whatever']; + $this->distro_identifier = self::genId($this->distro_value, 'distribution'); + + } + + /** + * Setup a container with a reactive metastore storage. + */ + private function getContainer() { + + $retrieveByHash = (new Options()) + // No hash for new reference; it doesn't exist yet in storage. + ->add(Service::metadataHash($this->new_value), new ReturnNull()) + // Simulate the stored hash for an existing reference. + ->add(Service::metadataHash($this->existing_value), $this->existing_identifier) + // For our "bad" reference, we don't find it in storage either. + ->add(Service::metadataHash($this->bad_value), new ReturnNull()) + // Simulate the stored hash for an existing reference. + ->add(Service::metadataHash($this->distro_value), $this->distro_identifier); + + $retrieve = (new Options()) + // Retrieving an existing metastore item should return a JSON string with + // the "wrapped" identifier/data object structure. + ->add([$this->existing_identifier, FALSE], json_encode(self::wrap($this->existing_identifier, $this->existing_value))) + // Retrieving a non-existing identifier should thow an exception from + // the storage. + ->add([$this->bad_identifier, FALSE], new MissingObjectException()); + + $store = (new Options()) + // Successfully storing a new keyword will return an identifier. + ->add( + [ + json_encode(self::wrap($this->new_identifier, $this->new_value)), + $this->new_identifier, + ], $this->new_identifier + ) + // For some reason, storage failed. Throw exception. + ->add( + [ + json_encode(self::wrap($this->bad_identifier, $this->bad_value)), + $this->bad_identifier, + ], new EntityStorageException()) + // Store distribution reference. + ->add( + [ + json_encode(self::wrap($this->distro_identifier, $this->distro_value)), + $this->distro_identifier, + ], $this->distro_identifier + ); + + // Set up returns for the service container. + $services = (new Options()) + ->add('logger.factory', LoggerChannelFactory::class) + ->add('dkan.metastore.storage', DataFactory::class) + ->add('dkan.metastore.resource_mapper', ResourceMapper::class) + ->index(0); + + $container_chain = (new Chain($this)) + ->add(Container::class, 'get', $services) + ->add(DataFactory::class, 'getInstance', NodeData::class) + ->add(NodeData::class, 'retrieveByHash', $retrieveByHash) + ->add(NodeData::class, 'store', $store) + // For existing reference, test unpublished behavior. + ->add(NodeData::class, 'isPublished', FALSE) + ->add(NodeData::class, 'publish', TRUE) + ->add(NodeData::class, 'retrieve', $retrieve) + // For our distirbution test, let's pretend it's making a new revision. + ->add(ResourceMapper::class, 'newRevision', TRUE); + + return $container_chain->getMock(); + } + + /** + * Shortcut to get a real UUID for a sample value. + */ + private static function genId($value, $schema_id = 'keyword'): string { + return (new Uuid5())->generate($schema_id, $value); + } + + /** + * Wrap a value and an identifier in the current object structure. + */ + private static function wrap(string $identifier, $value): object { + return (object) [ + 'identifier' => $identifier, + 'data' => $value, + ]; + } + + public function testKeywordReference() { + $definition = [ + 'id' => 'item', + 'class' => ItemReference::class, + ]; + $config = ['schemaId' => 'keyword', 'property' => 'keyword']; + + // Test for new reference. + $itemReference = ItemReference::create($this->getContainer(), $config, 'item', $definition); + $this->assertEquals($this->new_identifier, $itemReference->reference($this->new_value)); + + // Test for existing reference. + $this->assertEquals($this->existing_identifier, $itemReference->reference($this->existing_value)); + + // Storage failed for some reason. + $this->expectException(EntityStorageException::class); + $itemReference->reference($this->bad_value); + } + + public function testDistroReference() { + $definition = [ + 'id' => 'item', + 'class' => ItemReference::class, + ]; + $config = ['schemaId' => 'distribution', 'property' => 'distribution']; + + $itemReference = ItemReference::create($this->getContainer(), $config, 'item', $definition); + $this->assertEquals($this->distro_identifier, $itemReference->reference($this->distro_value)); + } + + public function testKeyWordDereference() { + $definition = [ + 'id' => 'item', + 'class' => ItemReference::class, + ]; + $config = ['schemaId' => 'keyword', 'property' => 'keyword']; + + $itemReference = ItemReference::create($this->getContainer(), $config, 'item', $definition); + + // Test for normal value. + $this->assertEquals($this->existing_value, $itemReference->dereference($this->existing_identifier)); + + // Test for value w/showId. + $showIdResult = self::wrap($this->existing_identifier, $this->existing_value); + $this->assertEquals($showIdResult, $itemReference->dereference($this->existing_identifier, TRUE)); + + // Test for bad reference. + $this->assertNull($itemReference->dereference($this->bad_identifier)); + } + +} diff --git a/modules/metastore/tests/src/Unit/Plugin/MetastoreReferenceType/MockClient.php b/modules/metastore/tests/src/Unit/Plugin/MetastoreReferenceType/MockClient.php new file mode 100644 index 0000000000..b8dfb4ab4f --- /dev/null +++ b/modules/metastore/tests/src/Unit/Plugin/MetastoreReferenceType/MockClient.php @@ -0,0 +1,18 @@ +definition = [ + 'id' => 'item', + 'class' => ResourceReference::class, + ]; + + $this->config = ['property' => 'downloadURL']; + + // New resource in the system. + $this->new_url = 'http://sample.com/newfile.csv'; + $this->new_identifier = self::genId($this->new_url); + + // Existing resource in system that doesn't trigger revision. + $this->existing_url = 'http://sample.com/existing.csv'; + $this->existing_identifier = self::genId($this->existing_url); + // ID for local perspective of same resource. + $this->existing_local_perspective_identifier = implode('__', [ + self::extract($this->existing_identifier, 'identifier'), + 'local_file', + self::extract($this->existing_identifier, 'version'), + ]); + + // File already located on webserver. + $this->local_url = 'http://mysite.com/local.csv'; + $this->local_resolved_url = 'http://h-o.st/local.csv'; + $this->local_identifier = self::genId($this->local_resolved_url); + + // Existing resource in system that doesn't trigger revision. + $this->tsv_url = 'http://sample.com/data.tsv'; + $this->tsv_identifier = self::genId($this->tsv_url); + + $this->local_bad_url = 'http://mysite.com/bad.csv'; + $this->local_bad_resolved_url = 'http://h-o.st/bad.csv'; + $this->local_bad_identifier = self::genId($this->local_bad_resolved_url); + + // We still have a static method calling \Drupal::service() + $this->setContainer(); + } + + private function setContainer() { + $services = (new Options()) + ->add('stream_wrapper_manager', StreamWrapperManager::class) + ->add('request_stack', RequestStack::class) + ->add('datetime.time', Time::class) + ->index(0); + + $container_chain = (new Chain($this)) + ->add(Container::class, 'get', $services) + ->add(StreamWrapperManager::class, 'getViaUri', StreamWrapperInterface::class) + // Fake stream wrapper to simulate local URL. + ->add(StreamWrapperInterface::class, 'getExternalUrl', 'http://mysite.com') + ->add(RequestStack::class, 'getCurrentRequest', Request::class) + ->add(Request::class, 'getHost', 'host') + ->add(Time::class, 'getCurrentTime', self::TIME); + + \Drupal::setContainer($container_chain->getMock()); + } + + /** + * Setup a container with a reactive metastore storage. + */ + private function getContainer($new_revision = 0, $display = DataResource::DEFAULT_SOURCE_PERSPECTIVE) { + + $filePathExists = (new Options()) + // For the new URL, a filePath does not yet exist. + ->add($this->new_url, FALSE) + // We are also making the local URLs new resources. + ->add($this->local_resolved_url, FALSE) + ->add($this->local_bad_resolved_url, FALSE) + // For the existing one, it does, so we'll expect an exception. + ->add($this->existing_url, new AlreadyRegistered(json_encode([ + (object) [ + "identifier" => $this->existing_identifier, + "perspective" => 'source', + ], + ]))) + // TSV file, let's say its a new one. + ->add($this->tsv_url, FALSE); + + $latestRevision = (new Options()) + // For an existing URL, we simulate a record in the mapper table. + ->add($this->existing_identifier, self::mapperTableRow($this->existing_url, $this->existing_identifier)) + ->add(self::extract($this->existing_identifier, "identifier"), self::mapperTableRow($this->existing_url, $this->existing_identifier)) + ->index(0); + + $store = (new Options()) + // Successfully storing a new keyword will return an identifier. + ->add( + new DataResource($this->new_url, 'text/csv', DataResource::DEFAULT_SOURCE_PERSPECTIVE), + $this->new_identifier + ) + // The local URL is also new and will have to be stored. + ->add( + new DataResource($this->local_resolved_url, 'text/csv', DataResource::DEFAULT_SOURCE_PERSPECTIVE), + $this->local_identifier + ) + // Existing resource when creating new revision. + ->add( + (new DataResource($this->existing_url, 'text/csv', DataResource::DEFAULT_SOURCE_PERSPECTIVE))->createNewVersion(), + $this->local_identifier + ) + // TSV file is new. + ->add( + (new DataResource($this->tsv_url, 'text/tab-separated-values', DataResource::DEFAULT_SOURCE_PERSPECTIVE)), + $this->tsv_identifier + ) + // "Bad" file fails mimetype detection, so returns text/plain. + ->add( + (new DataResource($this->local_bad_resolved_url, 'text/plain', DataResource::DEFAULT_SOURCE_PERSPECTIVE)), + $this->local_bad_identifier + ) + ->index(0); + + $revision = (new Options()) + // Retrieve a db row for the existing resource. + ->add([ + self::extract($this->existing_identifier, 'identifier'), + self::extract($this->existing_identifier, 'perspective'), + self::extract($this->existing_identifier, 'version'), + ], self::mapperTableRow($this->existing_url, $this->existing_identifier)) + ->add([ + self::extract($this->existing_identifier, 'identifier'), + 'local_file', + self::extract($this->existing_identifier, 'version'), + ], self::mapperTableRow($this->existing_url, $this->existing_local_perspective_identifier)) + ->add([ + self::extract($this->local_bad_identifier, 'identifier'), + self::extract($this->local_bad_identifier, 'perspective'), + self::extract($this->local_bad_identifier, 'version'), + ], FALSE); + + // Set up returns for the service container. + $services = (new Options()) + ->add('logger.factory', LoggerChannelFactory::class) + ->add('dkan.metastore.resource_mapper', ResourceMapper::class) + ->add('file_system', FileSystemInterface::class) + ->add('entity_type.manager', EntityTypeManager::class) + ->add('http_client', MockClient::class) + ->index(0); + + // Stub of file object to return. + $file = $this->createStub(File::class); + $file->method('getMimeType')->willReturn('text/csv'); + // In the local mimeType test, loadByProperties loads stub then none. + $loadByProperties = (new Sequence()) + ->add([$file]) + ->add([]); + + $container_chain = (new Chain($this)) + ->add(Container::class, 'get', $services) + ->add(EntityTypeManager::class, 'getStorage', EntityStorageInterface::class) + ->add(EntityStorageInterface::class, 'loadByProperties', $loadByProperties) + ->add(ResourceMapper::class, 'filePathExists', $filePathExists) + ->add(ResourceMapper::class, 'getStore', DatabaseTableInterface::class) + ->add(ResourceMapper::class, 'dispatchEvent', []) + ->add(ResourceMapper::class, 'getLatestRevision', $latestRevision) + ->add(ResourceMapper::class, 'getRevision', $revision) + ->add(ResourceMapper::class, 'newRevision', $new_revision) + ->add(ResourceMapper::class, 'display', $display) + ->add(ResourceMapper::class, 'validateNewVersion', TRUE) + ->add(MockClient::class, 'head', Response::class) + ->add(Response::class, 'getHeader', ['text/csv']) + ->add(DatabaseTableInterface::class, 'store', $store); + + return $container_chain->getMock(); + } + + /** + * Shortcut to get a full resource identifier/version/perspective. + */ + private static function genId($url, $perspective = 'source'): string { + $hash = md5($url); + return DataResource::buildUniqueIdentifier($hash, self::TIME, $perspective); + } + + /** + * Simulate the return of a DB query against file mapper table. + */ + private static function mapperTableRow(string $filepath, string $identifier): object { + return (object) [ + 'identifier' => self::extract($identifier, 'identifier'), + 'version' => (int) self::extract($identifier, 'version'), + 'filePath' => $filepath, + 'perspective' => substr($identifier, 46), + 'mimeType' => 'text/csv', + 'checksum' => NULL, + ]; + } + + /** + * Get a specific substring of a full identifier string. + * + * @param string $full_identifier + * Full identifier string, e.g. 7e174878bc2140d04334d0cedf1f3931__1679494210__source + * @param string $part + * Which part; can be "identifier, "version" or "perspective". + * + * @return string + * The substring requested. + * + * @throws \OutOfBoundsException + */ + private static function extract(string $full_identifier, string $part): string { + switch ($part) { + case 'identifier': + return substr($full_identifier, 0, 32); + + case 'version': + return substr($full_identifier, 34, 10); + + case 'perspective': + return substr($full_identifier, 46); + + default: + throw new \OutOfBoundsException("\"$part\" is not a valid part"); + } + } + + /** + * Wrap a value and an identifier in the current object structure. + */ + private static function distribution(string $url, array $properties = ['mediaType' => 'text/csv']): object { + return (object) (['downloadURL' => $url] + $properties); + } + + public function testReference() { + // Test for new reference. + $resourceReference = ResourceReference::create($this->getContainer(), $this->config, 'resource', $this->definition); + + // This should successfully run the registerWithResourceMapper method. + $resourceReference->setContext(self::distribution($this->new_url)); + $this->assertEquals($this->new_identifier, $resourceReference->reference($this->new_url)); + + // A new resource, except it's from the local domain. + $resourceReference->setContext(self::distribution($this->local_url)); + $this->assertEquals($this->local_identifier, $resourceReference->reference($this->local_url)); + + $resourceReference->setContext(self::distribution($this->existing_url)); + $this->assertEquals($this->existing_identifier, $resourceReference->reference($this->existing_url)); + + // // Storage failed for some reason. + // $this->expectException(EntityStorageException::class); + // $itemReference->reference($this->bad_url); + } + + public function testNewRevisionReference() { + // We expect an identifier for a new revision of existing. + $existing_new_revision = implode('__', [ + self::extract($this->existing_identifier, 'identifier'), + (string ) ((int) self::extract($this->existing_identifier, 'version') + 1), + DataResource::DEFAULT_SOURCE_PERSPECTIVE, + ]); + $resourceReference = ResourceReference::create($this->getContainer(1), $this->config, 'resource', $this->definition); + $resourceReference->setContext(self::distribution($this->existing_url)); + $this->assertEquals($existing_new_revision, $resourceReference->reference($this->existing_url)); + } + + public function testReferenceCsvFormat() { + $resourceReference = ResourceReference::create($this->getContainer(), $this->config, 'resource', $this->definition); + $resourceReference->setContext(self::distribution($this->new_url, ['format' => 'csv'])); + $this->assertEquals($this->new_identifier, $resourceReference->reference($this->new_url)); + } + + public function testReferenceTsvFormat() { + $resourceReference = ResourceReference::create($this->getContainer(), $this->config, 'resource', $this->definition); + $resourceReference->setContext(self::distribution($this->tsv_url, ['format' => 'tsv'])); + $this->assertEquals($this->tsv_identifier, $resourceReference->reference($this->tsv_url)); + + // New let's try conflicting formats. + $resourceReference->setContext(self::distribution($this->tsv_url, [ + 'format' => 'csv', + 'mediaType' => 'text/tab-separated-values', + ])); + // (If mimetype is parsed wrong, this would fail to match the option in the + // DatabaseTable::store() mock.) + $this->assertEquals($this->tsv_identifier, $resourceReference->reference($this->tsv_url)); + } + + public function testRemoteNoFormatOrMimetype() { + // Remote URL detect mimetype. + $resourceReference = ResourceReference::create($this->getContainer(), $this->config, 'resource', $this->definition); + $resourceReference->setContext(self::distribution($this->new_url, [])); + $this->assertEquals($this->new_identifier, $resourceReference->reference($this->new_url)); + } + + public function testLocalNoFormatOrMimetype() { + // Remote URL detect mimetype. + $resourceReference = ResourceReference::create($this->getContainer(), $this->config, 'resource', $this->definition); + $resourceReference->setContext(self::distribution($this->local_url, [])); + $this->assertEquals($this->local_identifier, $resourceReference->reference($this->local_url)); + // For some reason, this URL fails to create local file entity. + // It should get a text/plain mimetype, see building of $store + // return in getContainer(). + $this->assertEquals($this->local_bad_identifier, $resourceReference->reference($this->local_bad_url)); + } + + public function testDereference() { + $definition = [ + 'id' => 'item', + 'class' => ResourceReference::class, + ]; + $config = ['property' => 'downloadURL']; + + $itemReference = ResourceReference::create($this->getContainer(), $config, 'resource', $definition); + // Make sure an existing remote URL comes back correctly. + $this->assertEquals($this->existing_url, $itemReference->dereference($this->existing_identifier)); + + // Test with showID. + $showIdResult[] = (object) [ + 'identifier' => $this->existing_identifier, + 'data' => new DataResource($this->existing_url, 'text/csv', 'source'), + ]; + $this->assertEquals($showIdResult, $itemReference->dereference($this->existing_identifier, TRUE)); + + // Test URL stored instead of identifier + $this->assertEquals($this->existing_url, $itemReference->dereference($this->existing_url)); + + // If a reference cannot be resolved, it's left as-is. + $this->assertEquals($this->local_bad_identifier, $itemReference->dereference($this->local_bad_identifier)); + } + + // If static 'metastore_resource_mapper_display' is set, we retrieve a + // different perspective. + public function testDereferenceWithDisplay() { + $definition = [ + 'id' => 'item', + 'class' => ResourceReference::class, + ]; + $config = ['property' => 'downloadURL']; + + $itemReference = ResourceReference::create($this->getContainer(0, 'local_file'), $config, 'resource', $definition); + // Make sure an existing remote URL comes back correctly. + $this->assertEquals($this->existing_url, $itemReference->dereference($this->existing_identifier)); + } + +} diff --git a/modules/metastore/tests/src/Unit/Reference/DereferencerTest.php b/modules/metastore/tests/src/Unit/Reference/DereferencerTest.php index f5bef3da18..2017d4d343 100644 --- a/modules/metastore/tests/src/Unit/Reference/DereferencerTest.php +++ b/modules/metastore/tests/src/Unit/Reference/DereferencerTest.php @@ -3,14 +3,27 @@ namespace Drupal\Tests\metastore\Unit\Reference; use Drupal\Core\Config\ConfigFactory; +use Drupal\Core\Config\ConfigFactoryInterface; use Drupal\Core\Config\ImmutableConfig; +use Drupal\Core\DependencyInjection\Container; +use Drupal\Core\Entity\EntityTypeManager; +use Drupal\Core\File\FileSystemInterface; +use Drupal\Core\Logger\LoggerChannelFactory; use Drupal\Core\Queue\QueueFactory; use Drupal\metastore\Exception\MissingObjectException; +use Drupal\metastore\Plugin\MetastoreReferenceType\ItemReference; +use Drupal\metastore\Plugin\MetastoreReferenceType\ResourceReference; use Drupal\metastore\Reference\Dereferencer; +use Drupal\metastore\Reference\ReferenceMap; +use Drupal\metastore\Reference\ReferenceTypeManager; +use Drupal\metastore\ResourceMapper; use Drupal\metastore\Service\Uuid5; use Drupal\metastore\Storage\DataFactory; use Drupal\metastore\Storage\NodeData; +use Drupal\Tests\metastore\Unit\Plugin\MetastoreReferenceType\MockClient; use MockChain\Chain; +use MockChain\Options; +use MockChain\ReturnNull; use MockChain\Sequence; use PHPUnit\Framework\TestCase; @@ -20,51 +33,104 @@ class DereferencerTest extends TestCase { /** + * List referenceable dataset properties. * + * @var string[] */ - public function testDereference() { - $metadata = '{"data":{"name":"Gerardo","company":"CivicActions"}}'; + const REFERENCEABLE_PROPERTY_LIST = [ + 'keyword' => 0, + 'theme' => 'theme', + 'distribution' => 'distribution', + 'title' => 0, + 'identifier' => 0, + 'description' => 0, + 'accessLevel' => 0, + 'modified' => 0, + ]; - $storageFactory = (new Chain($this)) - ->add(DataFactory::class, 'getInstance', NodeData::class) - ->add(NodeData::class, 'retrieve', $metadata) + /** + * + */ + private function mockDereferencer($config, $value) { + $definitions = [ + ['id' => 'item', 'class' => ItemReference::class], + ['id' => 'resource', 'class' => ResourceReference::class], + ]; + + $refs = [ + 'keyword' => 'keyword', + 'publisher' => 'publisher', + 'title' => NULL, + ]; + + $itemReference = ItemReference::create($this->getContainer($value), $config, 'item', $definitions[0]); + + $config = ['property' => 'downloadURL']; + $resourceReference = ResourceReference::create($this->getContainer($value), $config, 'resource', $definitions[1]); + + $createInstance = (new Options()) + ->add('item', $itemReference) + ->add('resource', $resourceReference) + ->index(0); + + $manager = (new Chain($this)) + ->add(ReferenceTypeManager::class, 'getDefinitions', $definitions) + ->add(ReferenceTypeManager::class, 'createInstance', $createInstance) ->getMock(); - $uuidService = new Uuid5(); - $uuid = $uuidService->generate('dataset', "some value"); - $configService = (new Chain($this)) - ->add(ConfigFactory::class, 'get', ImmutableConfig::class) - ->add(ImmutableConfig::class, 'get', ['publisher']) + ->add(ConfigFactoryInterface::class, 'get', ImmutableConfig::class) + ->add(ImmutableConfig::class, 'get', $refs) ->getMock(); - $queueService = (new Chain($this)) - ->add(QueueFactory::class) + return new Dereferencer(new ReferenceMap($manager, $configService)); + } + + private function getContainer($value) { + $options = (new Options()) + ->add('stream_wrapper_manager', StreamWrapperManager::class) + ->add('logger.factory', LoggerChannelFactory::class) + ->add('dkan.metastore.storage', DataFactory::class) + ->add('dkan.metastore.resource_mapper', ResourceMapper::class) + ->add('file_system', FileSystemInterface::class) + ->add('entity_type.manager', EntityTypeManager::class) + ->add('http_client', MockClient::class) + ->index(0); + + return (new Chain($this)) + ->add(Container::class, 'get', $options) + ->add(DataFactory::class, 'getInstance', NodeData::class) + ->add(NodeData::class, 'isPublished', TRUE) + ->add(NodeData::class, 'retrieve', $value) + ->add(ResourceMapper::class, 'register', TRUE) + ->add(ResourceMapper::class, 'filePathExists', TRUE) ->getMock(); + } + + public function testDereferenceBasic() { + $config = ['schemaId' => 'publisher', 'property' => 'publisher']; + $uuidService = new Uuid5(); + $uuid = $uuidService->generate('dataset', "some value"); + $value = '{"data":{"name":"Gerardo","company":"CivicActions"}}'; - $valueReferencer = new Dereferencer($configService, $storageFactory); + $valueReferencer = $this->mockDereferencer($config, $value); $referenced = $valueReferencer->dereference((object) ['publisher' => $uuid]); $this->assertTrue(is_object($referenced)); - $this->assertEquals((object) ['name' => 'Gerardo', 'company' => 'CivicActions'], $referenced->publisher); + $this->assertEquals((object) [ + 'name' => 'Gerardo', + 'company' => 'CivicActions', + ], $referenced->publisher); } public function testDereferenceDeletedReference() { - $storageFactory = (new Chain($this)) - ->add(DataFactory::class, 'getInstance', NodeData::class) - ->add(NodeData::class, 'retrieve', new MissingObjectException("bad")) - ->getMock(); - - $configService = (new Chain($this)) - ->add(ConfigFactory::class, 'get', ImmutableConfig::class) - ->add(ImmutableConfig::class, 'get', ['distribution']) - ->getMock(); - + $config = ['schemaId' => 'publisher', 'property' => 'publisher']; + $value = NULL; $uuidService = new Uuid5(); $uuid = $uuidService->generate('dataset', "some value"); - $valueReferencer = new Dereferencer($configService, $storageFactory); - $referenced = $valueReferencer->dereference((object) ['distribution' => $uuid]); + $valueReferencer = $this->mockDereferencer($config, $value); + $referenced = $valueReferencer->dereference((object) ['publisher' => $uuid]); $this->assertEmpty((array) $referenced); } @@ -73,30 +139,11 @@ public function testDereferenceDeletedReference() { * */ public function testDereferenceMultiple() { - $keyword1 = '{"data":"Gerardo"}'; - $keyword2 = '{"data":"CivicActions"}'; - - $keywords = (new Sequence()) - ->add($keyword1) - ->add($keyword2); - - $storageFactory = (new Chain($this)) - ->add(DataFactory::class, 'getInstance', NodeData::class) - ->add(NodeData::class, 'retrieve', $keywords) - ->getMock(); - - $uuidService = new Uuid5(); - - $configService = (new Chain($this)) - ->add(ConfigFactory::class, 'get', ImmutableConfig::class) - ->add(ImmutableConfig::class, 'get', ['keyword']) - ->getMock(); - - $queueService = (new Chain($this)) - ->add(QueueFactory::class) - ->getMock(); - - $valueReferencer = new Dereferencer($configService, $storageFactory); + $config = ['schemaId' => 'keyword', 'property' => 'keyword']; + $value = (new Sequence()) + ->add('{"data":"Gerardo"}') + ->add('{"data":"CivicActions"}'); + $valueReferencer = $this->mockDereferencer($config, $value); $referenced = $valueReferencer->dereference((object) ['keyword' => ['123456789', '987654321']]); $this->assertTrue(is_object($referenced)); diff --git a/modules/metastore/tests/src/Unit/Reference/ReferencerTest.php b/modules/metastore/tests/src/Unit/Reference/ReferencerTest.php index ffeb868e73..8c819d9396 100644 --- a/modules/metastore/tests/src/Unit/Reference/ReferencerTest.php +++ b/modules/metastore/tests/src/Unit/Reference/ReferencerTest.php @@ -4,26 +4,20 @@ use Drupal\Core\Config\ConfigFactoryInterface; use Drupal\Core\Config\ImmutableConfig; -use Drupal\Core\Entity\EntityStorageInterface; use Drupal\Core\Entity\EntityTypeManager; -use Drupal\Core\Field\FieldItemListInterface; -use Drupal\Core\File\FileSystem; use Drupal\Core\Logger\LoggerChannelFactory; -use Drupal\Core\StreamWrapper\PublicStream; use Drupal\Core\StreamWrapper\StreamWrapperManager; - -use Drupal\common\UrlHostTokenResolver; -use Drupal\common\DataResource; -use Drupal\Component\EventDispatcher\ContainerAwareEventDispatcher; +use Drupal\Core\File\FileSystemInterface; +use Drupal\Core\StreamWrapper\StreamWrapperInterface; +use Drupal\metastore\Plugin\MetastoreReferenceType\ItemReference; +use Drupal\metastore\Plugin\MetastoreReferenceType\ResourceReference; +use Drupal\metastore\Reference\ReferenceMap; use Drupal\metastore\Reference\Referencer; +use Drupal\metastore\Reference\ReferenceTypeManager; use Drupal\metastore\ResourceMapper; use Drupal\metastore\Storage\DataFactory; use Drupal\metastore\Storage\NodeData; -use Drupal\metastore\Storage\ResourceMapperDatabaseTable; -use Drupal\node\Entity\Node; -use Drupal\node\NodeStorage; - -use GuzzleHttp\Exception\ConnectException; +use Drupal\Tests\metastore\Unit\Plugin\MetastoreReferenceType\MockClient; use MockChain\Chain; use MockChain\Options; use PHPUnit\Framework\TestCase; @@ -56,6 +50,7 @@ class ReferencerTest extends TestCase { */ public const REFERENCEABLE_PROPERTY_LIST = [ 'keyword' => 0, + 'theme' => 'theme', 'distribution' => 'distribution', 'title' => 0, 'identifier' => 0, @@ -64,232 +59,85 @@ class ReferencerTest extends TestCase { 'modified' => 0, ]; - private function mockReferencer($existing = TRUE) { - if ($existing) { - $node = (new Chain($this)) - ->add(Node::class, 'get', FieldItemListInterface::class) - ->addd('uuid', '0398f054-d712-4e20-ad1e-a03193d6ab33') - ->add(FieldItemListInterface::class, 'getString', 'orphaned') - ->add(Node::class, 'set') - ->add(Node::class, 'save') - ->getMock(); - } - else { - $node = (new Chain($this)) - ->add(Node::class, 'get', FieldItemListInterface::class) - ->addd('uuid', null) - ->add(FieldItemListInterface::class, 'getString', 'orphaned') - ->add(Node::class, 'set') - ->add(Node::class, 'save') - ->add(Node::class, 'setRevisionLogMessage') - ->getMock(); - } + protected function setUp(): void { + // We still have a static method calling \Drupal::service() + $this->setContainer(); + } - $storageFactory = (new Chain($this)) - ->add(DataFactory::class, 'getInstance', NodeData::class) - ->add(NodeData::class, 'getEntityStorage', NodeStorage::class) - ->add(NodeStorage::class, 'loadByProperties', [$node]) - ->add(NodeData::class, 'getEntityIdFromUuid', "1") - ->add(NodeData::class, 'getEntityLatestRevision', NULL) - ->add(NodeData::class, 'store', "abc") - ->getMock(); + private function mockReferencer() { + $definitions = [ + ['id' => 'item', 'class' => ItemReference::class], + ['id' => 'resource', 'class' => ResourceReference::class], + ]; - $immutableConfig = (new Chain($this)) - ->add(ImmutableConfig::class, 'get', self::REFERENCEABLE_PROPERTY_LIST) + $config = ['schemaId' => 'distribution', 'property' => 'distribution']; + $itemReference = ItemReference::create($this->getContainer(), $config, 'item', $definitions[0]); + + $config = ['property' => 'downloadURL']; + $resourceReference = ResourceReference::create($this->getContainer(), $config, 'resource', $definitions[1]); + + $createInstance = (new Options()) + ->add('item', $itemReference) + ->add('resource', $resourceReference) + ->index(0); + + $manager = (new Chain($this)) + ->add(ReferenceTypeManager::class, 'getDefinitions', $definitions) + ->add(ReferenceTypeManager::class, 'createInstance', $createInstance) ->getMock(); $configService = (new Chain($this)) - ->add(ConfigFactoryInterface::class, 'get', $immutableConfig) + ->add(ConfigFactoryInterface::class, 'get', ImmutableConfig::class) + ->add(ImmutableConfig::class, 'get', self::REFERENCEABLE_PROPERTY_LIST) ->getMock(); - $referencer = new Referencer($configService, $storageFactory); - return $referencer; + return new Referencer(new ReferenceMap($manager, $configService)); } - private function getContainer() { - $options = (new Options()) + private function setContainer() { + $services = (new Options()) ->add('stream_wrapper_manager', StreamWrapperManager::class) - ->add('logger.factory', LoggerChannelFactory::class) ->add('request_stack', RequestStack::class) - ->add('dkan.metastore.resource_mapper', ResourceMapper::class) - ->add('file_system', FileSystem::class) + ->add('datetime.time', Time::class) ->index(0); $container_chain = (new Chain($this)) - ->add(Container::class, 'get', $options) + ->add(Container::class, 'get', $services) + ->add(StreamWrapperManager::class, 'getViaUri', StreamWrapperInterface::class) + // Fake stream wrapper to simulate local URL. + ->add(StreamWrapperInterface::class, 'getExternalUrl', 'http://mysite.com') ->add(RequestStack::class, 'getCurrentRequest', Request::class) - ->add(Request::class, 'getHost', 'test.test') - ->add(ResourceMapper::class, 'register', TRUE, 'resource') - ->add(FileSystem::class, 'getTempDirectory', '/tmp'); - - return $container_chain; - } - - /** - * Test file mime type. - * - * @var string - */ - public function testNoMediaType() { - $container_chain = $this->getContainer(); - $container = $container_chain->getMock(); - \Drupal::setContainer($container); - $referencer = $this->mockReferencer(); - - $downloadUrl = 'https://dkan-default-content-files.s3.amazonaws.com/phpunit/district_centerpoints_small.csv'; - $json = ' - { - "title": "Test Dataset No Media Type", - "description": "Hi", - "identifier": "12345", - "accessLevel": "public", - "modified": "06-04-2020", - "keyword": ["hello"], - "distribution": [ - { - "title": "blah", - "downloadURL": "' . $downloadUrl . '" - } - ] - }'; - $data = json_decode($json); - $referencer->reference($data); - $this->assertEquals('text/csv', $container_chain->getStoredInput('resource')[0]->getMimeType()); - } - - /** - * Test that CSV format translates to correct mediatype if mediatype not supplied - */ - public function testWithMediaTypeConflictingFormat() { - $container_chain = $this->getContainer(); - $container = $container_chain->getMock(); - \Drupal::setContainer($container); - $referencer = $this->mockReferencer(); - - $downloadUrl = 'https://dkan-default-content-files.s3.amazonaws.com/phpunit/district_centerpoints_small.csv'; - $json = ' - { - "title": "Test Dataset No Media Type", - "description": "Hi", - "identifier": "12345", - "accessLevel": "public", - "modified": "06-04-2020", - "keyword": ["hello"], - "distribution": [ - { - "title": "blah", - "downloadURL": "' . $downloadUrl . '", - "format": "csv", - "mediaType": "text/tab-separated-values" - } - ] - }'; - $data = json_decode($json); - $referencer->reference($data); - $this->assertEquals('text/tab-separated-values', $container_chain->getStoredInput('resource')[0]->getMimeType()); - } - - /** - * Test that CSV format translates to correct mediatype if mediatype not supplied - */ - public function testNoMediaTypeWitCsvFormat() { - $container_chain = $this->getContainer(); - $container = $container_chain->getMock(); - \Drupal::setContainer($container); - $referencer = $this->mockReferencer(); - - $downloadUrl = 'https://dkan-default-content-files.s3.amazonaws.com/phpunit/district_centerpoints_small.csv'; - $json = ' - { - "title": "Test Dataset No Media Type", - "description": "Hi", - "identifier": "12345", - "accessLevel": "public", - "modified": "06-04-2020", - "keyword": ["hello"], - "distribution": [ - { - "title": "blah", - "downloadURL": "' . $downloadUrl . '", - "format": "csv" - } - ] - }'; - $data = json_decode($json); - $referencer->reference($data); - $this->assertEquals('text/csv', $container_chain->getStoredInput('resource')[0]->getMimeType()); + ->add(Request::class, 'getHost', 'host'); + \Drupal::setContainer($container_chain->getMock()); } - /** - * Test that CSV format translates to correct mediatype if mediatype not supplied - */ - public function testChangeMediaType() { + private function getContainer() { $options = (new Options()) ->add('stream_wrapper_manager', StreamWrapperManager::class) ->add('logger.factory', LoggerChannelFactory::class) - ->add('request_stack', RequestStack::class) + ->add('dkan.metastore.storage', DataFactory::class) ->add('dkan.metastore.resource_mapper', ResourceMapper::class) - ->add('dkan.metastore.resource_mapper_database_table', ResourceMapperDatabaseTable::class) - ->add('event_dispatcher', ContainerAwareEventDispatcher::class) - ->add('file_system', FileSystem::class) + ->add('file_system', FileSystemInterface::class) + ->add('entity_type.manager', EntityTypeManager::class) + ->add('http_client', MockClient::class) ->index(0); - $downloadUrl = 'https://dkan-default-content-files.s3.amazonaws.com/phpunit/district_centerpoints_small.csv'; - $resource = new DataResource($downloadUrl, 'application/octet-stream'); - - $container_chain = (new Chain($this)) + return (new Chain($this)) ->add(Container::class, 'get', $options) - ->add(RequestStack::class, 'getCurrentRequest', Request::class) - ->add(Request::class, 'getHost', 'test.test') - ->add(ResourceMapper::class, 'getStore', ResourceMapperDatabaseTable::class) - ->add(ResourceMapper::class, 'validateNewVersion', TRUE) - ->add(ResourceMapper::class, 'get', $resource) - ->add(ResourceMapperDatabaseTable::class, 'query', [ - [ - 'identifier' => '123', - 'perspective' => DataResource::DEFAULT_SOURCE_PERSPECTIVE, - ], - ]) - ->add(ResourceMapperDatabaseTable::class, 'store', '123', 'resource') - ->add(FileSystem::class, 'getTempDirectory', '/tmp'); - - $container = $container_chain->getMock(); - \Drupal::setContainer($container); - $referencer = $this->mockReferencer(); - - $json = ' - { - "title": "Test Dataset No Format", - "description": "Hi", - "identifier": "12345", - "accessLevel": "public", - "modified": "06-04-2020", - "keyword": ["hello"], - "distribution": [ - { - "title": "blah", - "downloadURL": "' . $downloadUrl . '", - "format": "csv" - } - ] - }'; - $data = json_decode($json); - $referencer->reference($data); - $storedResource = DataResource::hydrate($container_chain->getStoredInput('resource')[0]); - // A new resource should have been stored, with the mimetype set to text/csv - $this->assertEquals('text/csv', $storedResource->getMimeType()); + ->add(DataFactory::class, 'getInstance', NodeData::class) + ->add(NodeData::class, 'retrieveByHash', 'abc') + ->add(NodeData::class, 'isPublished', TRUE) + ->add(ResourceMapper::class, 'register', TRUE) + ->add(ResourceMapper::class, 'filePathExists', TRUE) + ->add(ResourceMapper::class, 'newRevision', FALSE) + ->getMock(); } - - /** - * Test that TSV format translates to correct mediatype if mediatype not supplied + * Test that a new reference is created when needed. */ - public function testNoMediaTypeWithTsvFormat() { - $container_chain = $this->getContainer(); - $container = $container_chain->getMock(); - \Drupal::setContainer($container); - $referencer = $this->mockReferencer(); + public function testKeywordDistirbutionReference() { + $referencer = $this->mockReferencer(FALSE); $downloadUrl = 'https://dkan-default-content-files.s3.amazonaws.com/phpunit/district_centerpoints_small.csv'; $json = ' @@ -310,150 +158,27 @@ public function testNoMediaTypeWithTsvFormat() { }'; $data = json_decode($json); $referencer->reference($data); - $this->assertEquals('text/tab-separated-values', $container_chain->getStoredInput('resource')[0]->getMimeType()); + $this->assertEquals('abc', $data->distribution[0]); + $this->assertEquals('hello', $data->keyword[0]); } /** * Test that a new reference is created when needed. */ - public function testNewReference() { - $container_chain = $this->getContainer(); - $container = $container_chain->getMock(); - \Drupal::setContainer($container); + public function testDownloadUrlReference() { $referencer = $this->mockReferencer(FALSE); $downloadUrl = 'https://dkan-default-content-files.s3.amazonaws.com/phpunit/district_centerpoints_small.csv'; $json = ' { - "title": "Test Dataset No Media Type", - "description": "Hi", - "identifier": "12345", - "accessLevel": "public", - "modified": "06-04-2020", - "keyword": ["hello"], - "distribution": [ - { - "title": "blah", - "downloadURL": "' . $downloadUrl . '", - "format": "tsv" - } - ] + "title": "blah", + "downloadURL": "' . $downloadUrl . '", + "format": "tsv" }'; $data = json_decode($json); - $referencer->reference($data); - $this->assertEquals('abc', $data->distribution[0]); - } - - /** - * Create a test dataset using the supplied download URL. - */ - private function getData(string $downloadUrl, string $mediaType = NULL): object { - return (object) [ - 'title' => 'Test Dataset No Media Type', - 'description' => 'Hi', - 'identifier'=> '12345', - 'accessLevel'=> 'public', - 'modified'=> '06-04-2020', - 'keyword'=> ['hello'], - 'distribution'=> [ - (object) array_filter([ - 'title'=> 'blah', - 'mediaType' => $mediaType, - 'downloadURL'=> $downloadUrl, - ]), - ], - ]; - } - - /** - * Test the `Referencer::hostify()` method. - */ - public function testHostify(): void { - // Initialize `\Drupal::container`. - $options = (new Options()) - ->add('stream_wrapper_manager', StreamWrapperManager::class) - ->index(0); - $container_chain = (new Chain($this)) - ->add(Container::class, 'get', $options) - ->add(PublicStream::class, 'getExternalUrl', self::HOST) - ->add(StreamWrapperManager::class, 'getViaUri', PublicStream::class); - \Drupal::setContainer($container_chain->getMock()); - // Ensure the hostify method is properly resolving the supplied URL. - $this->assertEquals( - 'http://' . UrlHostTokenResolver::TOKEN . '/' . self::FILE_PATH, - Referencer::hostify(self::HOST . '/' . self::FILE_PATH)); - } - - /** - * Test the remote/local file mime type detection logic. - */ - public function testMimeTypeDetection(): void { - // Initialize mock node class. - $node = (new Chain($this)) - ->add(Node::class, 'get', FieldItemListInterface::class) - ->addd('uuid', '0398f054-d712-4e20-ad1e-a03193d6ab33') - ->add(FieldItemListInterface::class, 'getString', 'orphaned') - ->add(Node::class, 'set') - ->add(Node::class, 'save') - ->getMock(); - - // Create a mock file storage class. - $storage = new class { - public function loadByProperties() { - return [ - new class { - public function getMimeType() { return ReferencerTest::MIME_TYPE; } - } - ]; - } - }; - - // Initialize `\Drupal::container`. - $options = (new Options()) - ->add('dkan.metastore.resource_mapper', ResourceMapper::class) - ->add('entity_type.manager', EntityTypeManager::class) - ->add('file_system', FileSystem::class) - ->add('request_stack', RequestStack::class) - ->add('stream_wrapper_manager', StreamWrapperManager::class) - ->add('logger.factory', LoggerChannelFactory::class) - ->index(0); - $container_chain = (new Chain($this)) - ->add(Container::class, 'get', $options) - ->add(EntityTypeManager::class, 'getStorage', $storage) - ->add(PublicStream::class, 'getExternalUrl', self::HOST) - ->add(ResourceMapper::class, 'register', TRUE, 'resource') - ->add(StreamWrapperManager::class, 'getViaUri', PublicStream::class); - \Drupal::setContainer($container_chain->getMock()); - - // Initialize mock referencer service. - $entity = (new Chain($this)) - ->add(EntityStorageInterface::class, 'loadByProperties', [$node]) - ->getMock(); - $configService = (new Chain($this)) - ->add(ConfigFactoryInterface::class, 'get', new class { public function get() { return ReferencerTest::REFERENCEABLE_PROPERTY_LIST; } }) - ->getMock(); - $storageFactory = (new Chain($this)) - ->add(DataFactory::class, 'getInstance', NodeData::class) - ->add(NodeData::class, 'getEntityStorage', $entity) - ->getMock(); - $referencer = new Referencer($configService, $storageFactory); - - // Test Mime Type detection using the resource `mediaType` property. - $data = $this->getData(self::HOST . '/' . self::FILE_PATH, self::MIME_TYPE); - $referencer->reference($data); - $this->assertEquals(self::MIME_TYPE, $container_chain->getStoredInput('resource')[0]->getMimeType(), 'Unable to fetch MIME type from `mediaType` property'); - // Test Mime Type detection on a local file. - $data = $this->getData(self::HOST . '/' . self::FILE_PATH); - $referencer->reference($data); - $this->assertEquals(self::MIME_TYPE, $container_chain->getStoredInput('resource')[0]->getMimeType(), 'Unable to fetch MIME type for local file'); - // Test Mime Type detection on a remote file. - $data = $this->getData('https://dkan-default-content-files.s3.amazonaws.com/phpunit/district_centerpoints_small.csv'); - $referencer->reference($data); - $this->assertEquals(self::MIME_TYPE, $container_chain->getStoredInput('resource')[0]->getMimeType(), 'Unable to fetch MIME type for remote file'); - // Test Mime Type detection on a invalid remote file path. - $data = $this->getData('http://invalid'); - $this->expectException(ConnectException::class); - $referencer->reference($data); + $referencer->reference($data, 'distribution'); + $identifier = md5($downloadUrl) . '__' . time() . '__' . 'source'; + $this->assertEquals($identifier, $data->downloadURL); } } diff --git a/phpunit.xml b/phpunit.xml index 3ec3332fd3..0c50a73d00 100644 --- a/phpunit.xml +++ b/phpunit.xml @@ -26,18 +26,14 @@ . - tests/src/Unit modules/common/tests/src/Unit modules/metastore/tests/src/Unit - modules/metastore/modules/metastore/tests/src/Unit modules/metastore/modules/metastore_search/tests/src/Unit - modules/metastore/modules/metastore_admin/tests/src/Unit modules/datastore/tests/src/Unit modules/datastore/modules/datastore_mysql_import/tests/src/Unit modules/frontend/tests/src/Unit modules/dkan_js_frontend/tests/src modules/harvest/tests/src/Unit - modules/harvest/modules/harvest_dashboard/tests/src/Unit modules/json_form_widget/tests/src/Unit