Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MET-6211 Remove deprecations in metis indexing and cleanup #698

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package eu.europeana.metis.authentication.rest.controller;

import static eu.europeana.metis.utils.CommonStringValues.CRLF_PATTERN;
import static eu.europeana.metis.utils.CommonStringValues.sanitizeCRLF;

import eu.europeana.metis.authentication.service.AuthenticationService;
import eu.europeana.metis.authentication.user.AccountRole;
Expand Down Expand Up @@ -181,8 +181,7 @@ public void deleteUser(@RequestHeader("Authorization") String authorization,
}
authenticationService.deleteUser(emailParameter.getEmail());
if (LOGGER.isInfoEnabled()) {
LOGGER.info("User with email: {} deleted",
CRLF_PATTERN.matcher(emailParameter.getEmail()).replaceAll(""));
LOGGER.info("User with email: {} deleted", sanitizeCRLF(emailParameter.getEmail()));
}
}

Expand All @@ -208,15 +207,13 @@ public void updateUserToMakeAdmin(@RequestHeader("Authorization") String authori
if (emailParameter == null || StringUtils.isBlank(emailParameter.getEmail())) {
throw new BadContentException("userEmailToMakeAdmin is empty");
}
String accessToken = authenticationService
.validateAuthorizationHeaderWithAccessToken(authorization);
String accessToken = authenticationService.validateAuthorizationHeaderWithAccessToken(authorization);
jeortizquan marked this conversation as resolved.
Show resolved Hide resolved
if (!authenticationService.isUserAdmin(accessToken)) {
throw new UserUnauthorizedException(ACTION_NOT_ALLOWED_FOR_USER);
}
authenticationService.updateUserMakeAdmin(emailParameter.getEmail());
if (LOGGER.isInfoEnabled()) {
LOGGER.info("User with email: {} made admin",
CRLF_PATTERN.matcher(emailParameter.getEmail()).replaceAll(""));
LOGGER.info("User with email: {} made admin", sanitizeCRLF(emailParameter.getEmail()));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,19 @@ public final class CommonStringValues {

public static final String REPLACEABLE_CRLF_CHARACTERS_REGEX = "[\r\n\t]";

public static final Pattern CRLF_PATTERN = Pattern.compile(CommonStringValues.REPLACEABLE_CRLF_CHARACTERS_REGEX);
public static final Pattern CRLF_PATTERN = Pattern.compile(REPLACEABLE_CRLF_CHARACTERS_REGEX);

private CommonStringValues() {
}

/**
* Sanitized input value from Logging injection attacks(javasecurity:S5145).
* <p>Replaces CR and LF characters with a safe value e.g. '_'.</p>
jeortizquan marked this conversation as resolved.
Show resolved Hide resolved
*
* @param input the input
* @return the sanitized input, safe for logging
*/
public static String sanitizeCRLF(String input) {
jeortizquan marked this conversation as resolved.
Show resolved Hide resolved
return input == null ? null : CRLF_PATTERN.matcher(input).replaceAll("");
jeortizquan marked this conversation as resolved.
Show resolved Hide resolved
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,39 @@

/**
* Enum for depublication reason.
* <p>Note: The enum value {@link #UNKNOWN} is to be used for historical depublication workflows(before the reason was
* implemented).
* In other words the historical workflows will be populated by a script once with the {@link #UNKNOWN} reason, and this value
* should never be used during depublication since its release. Therefore the url is an empty string and not meant to be used for
* populating records in the database(e.g. tombstoning)</p>
*/
public enum DepublicationReason {

BROKEN_MEDIA_LINKS("Broken media links", "http://data.europeana.eu/vocabulary/depublicationReason/contentTier0"),
GDPR("GDPR", "http://data.europeana.eu/vocabulary/depublicationReason/gdpr"),
PERMISSION_ISSUES("Permission issues", "http://data.europeana.eu/vocabulary/depublicationReason/noPermission"),
SENSITIVE_CONTENT("Sensitive content", "http://data.europeana.eu/vocabulary/depublicationReason/sensitiveContent"),
REMOVED_DATA_AT_SOURCE("Removed data at source", "http://data.europeana.eu/vocabulary/depublicationReason/sourceRemoval"),
GENERIC("Generic", "http://data.europeana.eu/vocabulary/depublicationReason/generic"),
UNKNOWN("Unknown", "http://data.europeana.eu/vocabulary/depublicationReason/unknown");
BROKEN_MEDIA_LINKS("Broken media links", "contentTier0"),
GDPR("GDPR", "gdpr"),
PERMISSION_ISSUES("Permission issues", "noPermission"),
SENSITIVE_CONTENT("Sensitive content", "sensitiveContent"),
REMOVED_DATA_AT_SOURCE("Removed data at source", "sourceRemoval"),
GENERIC("Generic", "generic"),
UNKNOWN("Unknown", "");
jeortizquan marked this conversation as resolved.
Show resolved Hide resolved

private final String valueAsString;
private static final String BASE_URL = "http://data.europeana.eu/vocabulary/depublicationReason/";

private final String title;
private final String url;

DepublicationReason(String valueAsString, String url) {
this.valueAsString = valueAsString;
this.url = url;
DepublicationReason(String title, String urlSuffix) {
this.title = title;
this.url = BASE_URL + urlSuffix;
}

@Override
public String toString(){
return valueAsString;
public String toString() {
return title;
}

public String getTitle() {
return title;
}

public String getUrl() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package eu.europeana.metis.utils;

import static eu.europeana.metis.utils.CommonStringValues.BATCH_OF_DATASETS_RETURNED;
import static eu.europeana.metis.utils.CommonStringValues.CRLF_PATTERN;
import static eu.europeana.metis.utils.CommonStringValues.DATE_FORMAT;
import static eu.europeana.metis.utils.CommonStringValues.DATE_FORMAT_FOR_REQUEST_PARAM;
import static eu.europeana.metis.utils.CommonStringValues.DATE_FORMAT_FOR_SCHEDULING;
import static eu.europeana.metis.utils.CommonStringValues.DATE_FORMAT_Z;
import static eu.europeana.metis.utils.CommonStringValues.EUROPEANA_ID_CREATOR_INITIALIZATION_FAILED;
import static eu.europeana.metis.utils.CommonStringValues.NEXT_PAGE_CANNOT_BE_NEGATIVE;
import static eu.europeana.metis.utils.CommonStringValues.PAGE_COUNT_CANNOT_BE_ZERO_OR_NEGATIVE;
import static eu.europeana.metis.utils.CommonStringValues.PLUGIN_EXECUTION_NOT_ALLOWED;
import static eu.europeana.metis.utils.CommonStringValues.REPLACEABLE_CRLF_CHARACTERS_REGEX;
import static eu.europeana.metis.utils.CommonStringValues.S_DATA_PROVIDERS_S_DATA_SETS_S_TEMPLATE;
import static eu.europeana.metis.utils.CommonStringValues.UNAUTHORIZED;
import static eu.europeana.metis.utils.CommonStringValues.WRONG_ACCESS_TOKEN;
import static eu.europeana.metis.utils.CommonStringValues.sanitizeCRLF;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;

import java.util.regex.Pattern;
import org.junit.jupiter.api.Test;

class CommonStringValuesTest {

@Test
void testFieldsAreUsed() {
assertNotNull(WRONG_ACCESS_TOKEN);
assertNotNull(BATCH_OF_DATASETS_RETURNED);
assertNotNull(NEXT_PAGE_CANNOT_BE_NEGATIVE);
assertNotNull(PAGE_COUNT_CANNOT_BE_ZERO_OR_NEGATIVE);
assertNotNull(PLUGIN_EXECUTION_NOT_ALLOWED);
assertNotNull(UNAUTHORIZED);
assertNotNull(EUROPEANA_ID_CREATOR_INITIALIZATION_FAILED);
assertNotNull(DATE_FORMAT);
assertNotNull(DATE_FORMAT_Z);
assertNotNull(DATE_FORMAT_FOR_SCHEDULING);
assertNotNull(DATE_FORMAT_FOR_REQUEST_PARAM);
assertNotNull(S_DATA_PROVIDERS_S_DATA_SETS_S_TEMPLATE);
assertNotNull(REPLACEABLE_CRLF_CHARACTERS_REGEX);
assertNotNull(CRLF_PATTERN);
}

@Test
void testPattern() {
Pattern expectedPattern = Pattern.compile("[\r\n\t]");
assertEquals(expectedPattern.pattern(), CRLF_PATTERN.pattern());
}

@Test
void testSanitizeCRLF_NullInput() {
assertNull(sanitizeCRLF(null));
}

@Test
void testSanitizeStringForLogging_EmptyString() {
String input = "";
assertEquals("", sanitizeCRLF(input));
}

@Test
void testSanitizeCRLF_NoSpecialCharacters() {
String input = "This is a test.";
assertEquals("This is a test.", sanitizeCRLF(input));
}

@Test
void testSanitizeCRLF_WithCRLFCharacters() {
String input = "This is a test.\nThis is a new line.\rThis is a carriage return.\tThis is a tab.";
String expected = "This is a test.This is a new line.This is a carriage return.This is a tab.";
assertEquals(expected, sanitizeCRLF(input));
}

@Test
void testSanitizeCRLF_MixedInput() {
String input = "\r\n\tThis string has special characters at the start.\r\n";
String expected = "This string has special characters at the start.";
assertEquals(expected, sanitizeCRLF(input));
}

@Test
void testSanitizeCRLF_NoCRLFCharacters() {
String input = "Regular string without CRLF.";
assertEquals("Regular string without CRLF.", sanitizeCRLF(input));
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package eu.europeana.metis.utils;

import static eu.europeana.metis.utils.DepublicationReason.BROKEN_MEDIA_LINKS;
import static eu.europeana.metis.utils.DepublicationReason.GDPR;
import static eu.europeana.metis.utils.DepublicationReason.GENERIC;
import static eu.europeana.metis.utils.DepublicationReason.PERMISSION_ISSUES;
import static eu.europeana.metis.utils.DepublicationReason.REMOVED_DATA_AT_SOURCE;
import static eu.europeana.metis.utils.DepublicationReason.SENSITIVE_CONTENT;
import static eu.europeana.metis.utils.DepublicationReason.UNKNOWN;
import static eu.europeana.metis.utils.DepublicationReason.values;
import static java.util.Arrays.asList;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.util.Arrays;
import java.util.List;
import org.junit.jupiter.api.Test;

class DepublicationReasonTest {

@Test
void testValues() {
Arrays.stream(values()).forEach(depublicationReason -> {
assertNotNull(depublicationReason.getTitle());
assertNotNull(depublicationReason.getUrl());
});
}

@Test
void testToStringMethod() {
assertEquals("Broken media links", BROKEN_MEDIA_LINKS.toString());
assertEquals("GDPR", GDPR.toString());
assertEquals("Permission issues", PERMISSION_ISSUES.toString());
assertEquals("Sensitive content", SENSITIVE_CONTENT.toString());
assertEquals("Removed data at source", REMOVED_DATA_AT_SOURCE.toString());
assertEquals("Generic", GENERIC.toString());
assertEquals("Unknown", UNKNOWN.toString());
}

@Test
void testEnumValuePresence() {
List<DepublicationReason> depublicationReasons = asList(values());
assertEquals(7, depublicationReasons.size());

assertTrue(depublicationReasons.contains(BROKEN_MEDIA_LINKS));
assertTrue(depublicationReasons.contains(GDPR));
assertTrue(depublicationReasons.contains(PERMISSION_ISSUES));
assertTrue(depublicationReasons.contains(SENSITIVE_CONTENT));
assertTrue(depublicationReasons.contains(REMOVED_DATA_AT_SOURCE));
assertTrue(depublicationReasons.contains(GENERIC));
assertTrue(depublicationReasons.contains(UNKNOWN));
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package eu.europeana.metis.dereference;

import static eu.europeana.metis.utils.CommonStringValues.CRLF_PATTERN;
import static eu.europeana.metis.utils.CommonStringValues.sanitizeCRLF;

import eu.europeana.metis.exception.BadContentException;
import java.io.ByteArrayInputStream;
Expand Down Expand Up @@ -105,8 +105,7 @@ private Optional<String> getValidatedXml(String resourceId, String xml) throws B
if (isEmptyXml(xml)) {
xmlResponse = Optional.empty();
if (LOGGER.isInfoEnabled()) {
LOGGER.info("Transformed entity {} results to an empty XML.",
CRLF_PATTERN.matcher(resourceId).replaceAll(""));
LOGGER.info("Transformed entity {} results to an empty XML.", sanitizeCRLF(resourceId));
}
} else {
try {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package eu.europeana.metis.dereference.service;

import static eu.europeana.metis.utils.CommonStringValues.CRLF_PATTERN;
import static eu.europeana.metis.utils.CommonStringValues.sanitizeCRLF;

import eu.europeana.enrichment.api.external.DereferenceResultStatus;
import eu.europeana.enrichment.api.external.model.Concept;
Expand Down Expand Up @@ -284,7 +284,7 @@ private OriginalEntity retrieveOriginalEntity(String resourceId, Set<String> pot

// Evaluate and return the result.
if (originalEntity == null && LOGGER.isInfoEnabled()) {
LOGGER.info("No entity XML for uri {}", CRLF_PATTERN.matcher(resourceId).replaceAll(""));
LOGGER.info("No entity XML for uri {}", sanitizeCRLF(resourceId));
}
final DereferenceResultStatus dereferenceResultStatus = originalEntity == null ?
DereferenceResultStatus.NO_ENTITY_FOR_VOCABULARY : DereferenceResultStatus.SUCCESS;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package eu.europeana.metis.dereference.service.utils;

import static eu.europeana.metis.utils.CommonStringValues.CRLF_PATTERN;
import static eu.europeana.metis.utils.CommonStringValues.sanitizeCRLF;

import eu.europeana.metis.dereference.Vocabulary;
import java.net.URI;
Expand Down Expand Up @@ -71,10 +71,10 @@ public static VocabularyCandidates findVocabulariesForUrl(String resourceId,

// Log and done.
if (candidates.isEmpty() && (LOGGER.isInfoEnabled())) {
LOGGER.info("No vocabularies found for uri {}", CRLF_PATTERN.matcher(resourceId).replaceAll(""));
LOGGER.info("No vocabularies found for uri {}", sanitizeCRLF(resourceId));
}
if (candidates.size() > 1 && LOGGER.isWarnEnabled()) {
LOGGER.warn("Multiple vocabularies found for uri {}: {}", CRLF_PATTERN.matcher(resourceId).replaceAll(""),
LOGGER.warn("Multiple vocabularies found for uri {}: {}", sanitizeCRLF(resourceId),
candidates.stream().map(Vocabulary::getName).collect(Collectors.joining(", ")));
}
return new VocabularyCandidates(candidates);
Expand Down
30 changes: 9 additions & 21 deletions metis-indexing/src/main/java/eu/europeana/indexing/Indexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,18 @@ public interface Indexer extends Closeable {

/**
* <p>
* This method indexes a single record, publishing it to the provided data stores.
* This method indexes a single rdf, publishing it to the provided data stores.
* </p>
* <p>
* <b>NOTE:</b> this operation should not coincide with a remove operation as this operation is
* not done within a transaction.
* </p>
*
* @param record The record to index.
* @param rdf The rdf to index.
* @param indexingProperties The properties of this indexing operation.
* @throws IndexingException In case a problem occurred during indexing.
*/
void indexRdf(RDF record, IndexingProperties indexingProperties) throws IndexingException;
void indexRdf(RDF rdf, IndexingProperties indexingProperties) throws IndexingException;

/**
* <p>
Expand All @@ -60,18 +60,18 @@ public interface Indexer extends Closeable {

/**
* <p>
* This method indexes a single record, publishing it to the provided data stores.
* This method indexes a single rdfString, publishing it to the provided data stores.
* </p>
* <p>
* <b>NOTE:</b> this operation should not coincide with a remove operation as this operation is
* not done within a transaction.
* </p>
*
* @param record The record to index (can be parsed to RDF).
* @param rdfString The rdfString to index (can be parsed to RDF).
* @param indexingProperties The properties of this indexing operation.
* @throws IndexingException In case a problem occurred during indexing.
*/
void index(String record, IndexingProperties indexingProperties) throws IndexingException;
void index(String rdfString, IndexingProperties indexingProperties) throws IndexingException;

/**
* <p>
Expand Down Expand Up @@ -110,18 +110,18 @@ void index(String stringRdfRecord, IndexingProperties indexingProperties,

/**
* <p>
* This method indexes a single record, publishing it to the provided data stores.
* This method indexes a single rdfInputStream, publishing it to the provided data stores.
* </p>
* <p>
* <b>NOTE:</b> this operation should not coincide with a remove operation as this operation is
* not done within a transaction.
* </p>
*
* @param record The record to index (can be parsed to RDF).
* @param rdfInputStream The rdfInputStream to index (can be parsed to RDF).
* @param indexingProperties The properties of this indexing operation.
* @throws IndexingException In case a problem occurred during indexing.
*/
void index(InputStream record, IndexingProperties indexingProperties) throws IndexingException;
void index(InputStream rdfInputStream, IndexingProperties indexingProperties) throws IndexingException;

/**
* <p>
Expand Down Expand Up @@ -188,18 +188,6 @@ TierResults indexAndGetTierCalculations(InputStream recordContent,
*/
FullBeanImpl getTombstone(String rdfAbout);

/**
* Creates and indexes a tombstone record.
*
* @param rdfAbout the id of the record
* @return whether a record was tombstoned
* @throws IndexingException in case something went wrong.
* @deprecated Use {@link #indexTombstone(String, DepublicationReason)}.
*/
//TODO: 2024-09-24 - Remove once ecloud has updated the code for tombstoning
@Deprecated(since = "13-SNAPSHOT", forRemoval = true)
boolean indexTombstone(String rdfAbout) throws IndexingException;

/**
* Creates and indexes a tombstone record.
*
Expand Down
Loading