Skip to content

Commit

Permalink
MET-6053: Add support in again for caching (was removed in previous c…
Browse files Browse the repository at this point in the history
…ommit).
  • Loading branch information
jochen-vermeulen committed Jul 24, 2024
1 parent aae2345 commit c1db018
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 60 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import dev.morphia.annotations.Index;
import dev.morphia.annotations.IndexOptions;
import dev.morphia.annotations.Indexes;
import eu.europeana.enrichment.api.external.DereferenceResultStatus;
import eu.europeana.metis.mongo.utils.ObjectIdSerializer;
import jakarta.xml.bind.annotation.XmlElement;
import jakarta.xml.bind.annotation.XmlRootElement;
Expand All @@ -27,12 +28,12 @@ public class ProcessedEntity {
private ObjectId id;

/**
* The resourceId (URI) of the resource
* The resourceId (URI) of the resource.
**/
private String resourceId;

/**
* A xml representation of the mapped resource in one of the contextual resources
* A xml representation of the contextual resource (transformed from the original entity).
**/
private String xml;

Expand All @@ -41,6 +42,11 @@ public class ProcessedEntity {
**/
private String vocabularyId;

/**
* The status of the dereference operation.
*/
private DereferenceResultStatus resultStatus;

@XmlElement
public ObjectId getId() {
return id;
Expand Down Expand Up @@ -76,4 +82,13 @@ public String getVocabularyId() {
public void setVocabularyId(String vocabularyId) {
this.vocabularyId = vocabularyId;
}

@XmlElement
public DereferenceResultStatus getResultStatus() {
return resultStatus;
}

public void setResultStatus(DereferenceResultStatus resultStatus) {
this.resultStatus = resultStatus;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import eu.europeana.enrichment.utils.EnrichmentBaseConverter;
import eu.europeana.metis.dereference.DereferenceResult;
import eu.europeana.metis.dereference.IncomingRecordToEdmTransformer;
import eu.europeana.metis.dereference.ProcessedEntity;
import eu.europeana.metis.dereference.RdfRetriever;
import eu.europeana.metis.dereference.Vocabulary;
import eu.europeana.metis.dereference.service.dao.ProcessedEntityDao;
Expand All @@ -33,8 +34,10 @@
import jakarta.xml.bind.JAXBException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.bson.types.ObjectId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
Expand Down Expand Up @@ -151,15 +154,6 @@ private void extractBroaderResources(Pair<EnrichmentBase, DereferenceResultStatu
resourceIdStream.filter(Objects::nonNull).forEach(destination::add);
}

// TODO this code is used to determine whether the vocabulary has changed. Don't forget this.
/* Vocabulary cachedVocabulary = null;
boolean cachedVocabularyChanged = false;
if (cachedEntity != null && StringUtils.isNotBlank(cachedEntity.getVocabularyId())) {
cachedVocabulary = vocabularyDao.get(cachedEntity.getVocabularyId());
cachedVocabularyChanged = cachedVocabulary == null;
}
*/

private TransformedEntity dereferenceSingleResource(String resourceId) {

// Check for URI validity.
Expand All @@ -170,6 +164,20 @@ private TransformedEntity dereferenceSingleResource(String resourceId) {
return new TransformedEntity(null, null, DereferenceResultStatus.INVALID_URL);
}

// Check if a cached item exists for this resource ID.
final TransformedEntity cachedEntity = getFromCache(resourceId);
if (cachedEntity != null) {
return cachedEntity;
}

// So no cached item exists. Perform the actual algorithm and save the result to cache.
final TransformedEntity result = performDereferenceAlgorithmForSingleResource(resourceId);
saveToCache(resourceId, result);
return result;
}

private TransformedEntity performDereferenceAlgorithmForSingleResource(String resourceId) {

// Find matching vocabularies, report if there are none.
final VocabularyCandidates vocabularyCandidates;
try {
Expand Down Expand Up @@ -212,20 +220,6 @@ private TransformedEntity dereferenceSingleResource(String resourceId) {
return new TransformedEntity(null, null, status);
}

/* private void saveEntity(String resourceId, DereferenceResultWrapper transformedEntityAndVocabularyPair) {
final String entityXml = transformedEntityAndVocabularyPair.getEntity();
final Vocabulary vocabulary = transformedEntityAndVocabularyPair.getVocabulary();
final String vocabularyIdString = Optional.ofNullable(vocabulary).map(Vocabulary::getId)
.map(ObjectId::toString).orElse(null);
//Save entity
ProcessedEntity entityToCache = new ProcessedEntity();
entityToCache.setResourceId(resourceId);
entityToCache.setXml(entityXml);
entityToCache.setVocabularyId(vocabularyIdString);
processedEntityDao.save(entityToCache);
}*/

private TransformedEntity transformEntity(Vocabulary vocabulary, final String originalEntity,
final String resourceId) {
try {
Expand Down Expand Up @@ -275,4 +269,38 @@ private OriginalEntity retrieveOriginalEntity(String resourceId, Set<String> pot
DereferenceResultStatus.NO_ENTITY_FOR_VOCABULARY : DereferenceResultStatus.SUCCESS;
return new OriginalEntity(originalEntity, dereferenceResultStatus);
}

private TransformedEntity getFromCache(String resourceId) {

// Try to find a cached entity. If there is none, we are done.
final ProcessedEntity cachedEntity = processedEntityDao.getByResourceId(resourceId);
if (cachedEntity == null) {
return null;
}

// Check the vocabulary. If it no longer exists, we need to process the entity again.
final Vocabulary vocabulary;
if (StringUtils.isNotBlank(cachedEntity.getVocabularyId())) {
vocabulary = vocabularyDao.get(cachedEntity.getVocabularyId());
if (vocabulary == null) {
return null;
}
} else {
vocabulary = null;
}

// Convert to a transformed entity and return.
return new TransformedEntity(vocabulary, cachedEntity.getXml(),
cachedEntity.getResultStatus());
}

private void saveToCache(String resourceId, TransformedEntity transformedEntity) {
final ProcessedEntity entityToCache = new ProcessedEntity();
entityToCache.setResourceId(resourceId);
entityToCache.setXml(transformedEntity.getTransformedEntity());
entityToCache.setVocabularyId(Optional.ofNullable(transformedEntity.getVocabulary())
.map(Vocabulary::getId).map(ObjectId::toString).orElse(null));
entityToCache.setResultStatus(transformedEntity.getResultStatus());
processedEntityDao.save(entityToCache);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.clearInvocations;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.mock;
Expand Down Expand Up @@ -104,25 +105,26 @@ void resetMocks() throws IOException, URISyntaxException {
@Test
void testDereference_Success() throws IOException, URISyntaxException {

// First time: no cache
// First time: no cached item available
final DereferenceResult result0 = dereferenceService.dereference(PLACE_ID);
assertNotNull(result0);
assertEquals(1, result0.getEnrichmentBasesAsList().size());
assertEquals(PLACE_ID, result0.getEnrichmentBasesAsList().get(0).getAbout());
assertEquals(DereferenceResultStatus.SUCCESS, result0.getDereferenceStatus());
verify(vocabularyDao, times(1)).getByUriSearch(anyString());
verify(retriever, times(1)).retrieve(eq(PLACE_ID), anyString());
// assertTrue(CACHE.containsKey(PLACE_ID));
// TODO check the cached item.
assertTrue(CACHE.containsKey(PLACE_ID));
assertEquals(result0.getDereferenceStatus(), CACHE.get(PLACE_ID).getResultStatus());

// Second time: use cache, no second retrieval.
// TODO
/* final DereferenceResult result1 = mongoDereferenceService.dereference(PLACE_ID);
clearInvocations(vocabularyDao, retriever);
final DereferenceResult result1 = dereferenceService.dereference(PLACE_ID);
assertNotNull(result1);
assertEquals(1, result1.getEnrichmentBasesAsList().size());
assertEquals(PLACE_ID, result1.getEnrichmentBasesAsList().get(0).getAbout());
assertEquals(DereferenceResultStatus.SUCCESS, result1.getDereferenceStatus());
verify(vocabularyDao, never()).getByUriSearch(anyString());
verify(retriever, never()).retrieve(eq(PLACE_ID), anyString());
assertTrue(CACHE.containsKey(PLACE_ID));*/
}

@Test
Expand All @@ -134,34 +136,50 @@ void testDereference_IllegalArgument() {

@Test
void testDereference_NoVocabularyMatching() throws IOException, URISyntaxException {

// First time: no cached item available
final String nonExistingVocabularyEntity = "http://XXX.YYYYYYY.org/3020251/";
final DereferenceResult emptyResult = dereferenceService.dereference(nonExistingVocabularyEntity);
assertNotNull(emptyResult);
assertTrue(emptyResult.getEnrichmentBasesAsList().isEmpty());
assertEquals(DereferenceResultStatus.NO_VOCABULARY_MATCHING, emptyResult.getDereferenceStatus());
final DereferenceResult result0 = dereferenceService.dereference(nonExistingVocabularyEntity);
assertNotNull(result0);
assertTrue(result0.getEnrichmentBasesAsList().isEmpty());
assertEquals(DereferenceResultStatus.NO_VOCABULARY_MATCHING, result0.getDereferenceStatus());
verify(vocabularyDao, times(1)).getByUriSearch(anyString());
verify(retriever, never()).retrieve(anyString(), anyString());
// assertTrue(CACHE.containsKey(nonExistingVocabularyEntity));
// TODO check the cached item.
assertTrue(CACHE.containsKey(nonExistingVocabularyEntity));
assertEquals(result0.getDereferenceStatus(), CACHE.get(nonExistingVocabularyEntity).getResultStatus());

// Second time: use cache, no second retrieval.
// TODO
clearInvocations(vocabularyDao, retriever);
final DereferenceResult result1 = dereferenceService.dereference(nonExistingVocabularyEntity);
assertNotNull(result1);
assertTrue(result1.getEnrichmentBasesAsList().isEmpty());
assertEquals(DereferenceResultStatus.NO_VOCABULARY_MATCHING, result1.getDereferenceStatus());
verify(vocabularyDao, never()).getByUriSearch(anyString());
verify(retriever, never()).retrieve(eq(nonExistingVocabularyEntity), anyString());
}

@Test
void testDereference_NoEntityForVocabulary() throws IOException, URISyntaxException {

// Without cached item
// First time: no cached item available
final String nonExistingId = GEONAMES_URI + "XXXXXX";
final DereferenceResult emptyResult = dereferenceService.dereference(nonExistingId);
assertNotNull(emptyResult);
assertTrue(emptyResult.getEnrichmentBasesAsList().isEmpty());
assertEquals(DereferenceResultStatus.NO_ENTITY_FOR_VOCABULARY, emptyResult.getDereferenceStatus());
final DereferenceResult result0 = dereferenceService.dereference(nonExistingId);
assertNotNull(result0);
assertTrue(result0.getEnrichmentBasesAsList().isEmpty());
assertEquals(DereferenceResultStatus.NO_ENTITY_FOR_VOCABULARY, result0.getDereferenceStatus());
verify(vocabularyDao, times(1)).getByUriSearch(anyString());
verify(retriever, times(1)).retrieve(eq(nonExistingId), anyString());
// assertTrue(CACHE.containsKey(nonExistingId));
// TODO check the cached item.
assertTrue(CACHE.containsKey(nonExistingId));
assertEquals(result0.getDereferenceStatus(), CACHE.get(nonExistingId).getResultStatus());

// Second time: use cache, no second retrieval.
// TODO
clearInvocations(vocabularyDao, retriever);
final DereferenceResult result1 = dereferenceService.dereference(nonExistingId);
assertNotNull(result1);
assertTrue(result1.getEnrichmentBasesAsList().isEmpty());
assertEquals(DereferenceResultStatus.NO_ENTITY_FOR_VOCABULARY, result1.getDereferenceStatus());
verify(vocabularyDao, never()).getByUriSearch(anyString());
verify(retriever, never()).retrieve(eq(nonExistingId), anyString());
}

@Test
Expand All @@ -182,31 +200,49 @@ void testDereference_InvalidUrl() throws IOException, URISyntaxException {

@Test
void testDereference_XmlXsltError() throws URISyntaxException, IOException {

// First time: no cached item available
doReturn("THIS WILL BE AN ERROR").when(retriever).retrieve(eq(PLACE_ID), anyString());
final DereferenceResult result = dereferenceService.dereference(PLACE_ID);
assertNotNull(result);
assertTrue(result.getEnrichmentBasesAsList().isEmpty());
assertEquals(DereferenceResultStatus.ENTITY_FOUND_XML_XSLT_ERROR, result.getDereferenceStatus());
final DereferenceResult result0 = dereferenceService.dereference(PLACE_ID);
assertNotNull(result0);
assertTrue(result0.getEnrichmentBasesAsList().isEmpty());
assertEquals(DereferenceResultStatus.ENTITY_FOUND_XML_XSLT_ERROR, result0.getDereferenceStatus());
verify(vocabularyDao, times(1)).getByUriSearch(anyString());
verify(retriever, times(1)).retrieve(eq(PLACE_ID), anyString());
// assertTrue(CACHE.containsKey(PLACE_ID));
// TODO check the cached item.
assertTrue(CACHE.containsKey(PLACE_ID));
assertEquals(result0.getDereferenceStatus(), CACHE.get(PLACE_ID).getResultStatus());

// Second time: use cache, no second retrieval.
// TODO
clearInvocations(vocabularyDao, retriever);
final DereferenceResult result1 = dereferenceService.dereference(PLACE_ID);
assertNotNull(result1);
assertTrue(result1.getEnrichmentBasesAsList().isEmpty());
assertEquals(DereferenceResultStatus.ENTITY_FOUND_XML_XSLT_ERROR, result1.getDereferenceStatus());
verify(vocabularyDao, never()).getByUriSearch(anyString());
verify(retriever, never()).retrieve(eq(PLACE_ID), anyString());
}

@Test
void testDereference_XmlXsltProduceNoContextualClass() throws URISyntaxException, IOException {

// First time: no cached item available
doReturn("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?><empty/>").when(retriever).retrieve(eq(PLACE_ID), anyString());
final DereferenceResult result = dereferenceService.dereference(PLACE_ID);
assertNotNull(result);
assertTrue(result.getEnrichmentBasesAsList().isEmpty());
assertEquals(DereferenceResultStatus.ENTITY_FOUND_XML_XSLT_PRODUCE_NO_CONTEXTUAL_CLASS, result.getDereferenceStatus());
final DereferenceResult result0 = dereferenceService.dereference(PLACE_ID);
assertNotNull(result0);
assertTrue(result0.getEnrichmentBasesAsList().isEmpty());
assertEquals(DereferenceResultStatus.ENTITY_FOUND_XML_XSLT_PRODUCE_NO_CONTEXTUAL_CLASS, result0.getDereferenceStatus());
verify(vocabularyDao, times(1)).getByUriSearch(anyString());
verify(retriever, times(1)).retrieve(eq(PLACE_ID), anyString());
// assertTrue(CACHE.containsKey(PLACE_ID));
// TODO check the cached item.
assertTrue(CACHE.containsKey(PLACE_ID));
assertEquals(result0.getDereferenceStatus(), CACHE.get(PLACE_ID).getResultStatus());

// Second time: use cache, no second retrieval.
// TODO
clearInvocations(vocabularyDao, retriever);
final DereferenceResult result1 = dereferenceService.dereference(PLACE_ID);
assertNotNull(result1);
assertTrue(result1.getEnrichmentBasesAsList().isEmpty());
assertEquals(DereferenceResultStatus.ENTITY_FOUND_XML_XSLT_PRODUCE_NO_CONTEXTUAL_CLASS, result1.getDereferenceStatus());
verify(vocabularyDao, never()).getByUriSearch(anyString());
verify(retriever, never()).retrieve(eq(PLACE_ID), anyString());
}
}

0 comments on commit c1db018

Please sign in to comment.