From ba64bb1caff1807c8df75c18644c9c655ea10fc7 Mon Sep 17 00:00:00 2001 From: srishti Date: Tue, 2 Apr 2024 15:39:45 +0200 Subject: [PATCH 1/4] EA-3681 added migration code, added as a seperate command line runner application --- pom.xml | 1 + record-api-migration/pom.xml | 52 ++ .../record/migration/MigrationHandler.java | 273 +++++++++ .../record/migration/MigrationSettings.java | 54 ++ .../record/migration/RecordDomProcessor.java | 382 ++++++++++++ .../record/migration/RecordJenaProcessor.java | 543 ++++++++++++++++++ .../api/record/migration/RunMigration.java | 166 ++++++ .../api/record/migration/ViewComparator.java | 33 ++ .../src/main/resources/migration.properties | 3 + .../db/repository/RecordRepository.java | 5 +- 10 files changed, 1511 insertions(+), 1 deletion(-) create mode 100644 record-api-migration/pom.xml create mode 100755 record-api-migration/src/main/java/eu/europeana/api/record/migration/MigrationHandler.java create mode 100644 record-api-migration/src/main/java/eu/europeana/api/record/migration/MigrationSettings.java create mode 100755 record-api-migration/src/main/java/eu/europeana/api/record/migration/RecordDomProcessor.java create mode 100755 record-api-migration/src/main/java/eu/europeana/api/record/migration/RecordJenaProcessor.java create mode 100755 record-api-migration/src/main/java/eu/europeana/api/record/migration/RunMigration.java create mode 100755 record-api-migration/src/main/java/eu/europeana/api/record/migration/ViewComparator.java create mode 100644 record-api-migration/src/main/resources/migration.properties diff --git a/pom.xml b/pom.xml index 8163968..6489d5d 100644 --- a/pom.xml +++ b/pom.xml @@ -7,6 +7,7 @@ record-api-common record-api-model record-api-jena + record-api-migration diff --git a/record-api-migration/pom.xml b/record-api-migration/pom.xml new file mode 100644 index 0000000..4fb4ccc --- /dev/null +++ b/record-api-migration/pom.xml @@ -0,0 +1,52 @@ + + + + record-api + eu.europeana.api + 1.0-SNAPSHOT + + 4.0.0 + + record-api-migration + This is a temporary module to migrate all the data from the DB + + 17 + 17 + eu.europeana.api.record.migration.RunMigration + + + + + org.springframework.boot + spring-boot-starter-web + + + + org.springframework.boot + spring-boot-starter-logging + + + + + + + + org.apache.commons + commons-lang3 + ${apache.commomLang3.version} + + + eu.europeana.api + record-api-model + 1.0-SNAPSHOT + + + eu.europeana.api + record-api-mongo + 1.0-SNAPSHOT + + + + \ No newline at end of file diff --git a/record-api-migration/src/main/java/eu/europeana/api/record/migration/MigrationHandler.java b/record-api-migration/src/main/java/eu/europeana/api/record/migration/MigrationHandler.java new file mode 100755 index 0000000..0a71faf --- /dev/null +++ b/record-api-migration/src/main/java/eu/europeana/api/record/migration/MigrationHandler.java @@ -0,0 +1,273 @@ +package eu.europeana.api.record.migration; + +import dev.morphia.query.filters.Filters; +import eu.europeana.api.edm.EDM; +import eu.europeana.api.format.RdfFormat; +import eu.europeana.api.record.db.repository.RecordRepository; +import eu.europeana.api.record.io.FormatHandlerRegistry; +import eu.europeana.api.record.migration.RecordDomProcessor.Result; +import eu.europeana.api.record.model.*; +import eu.europeana.api.record.model.internal.ProxyComparator; +import eu.europeana.jena.encoder.JenaObjectDecoder; +import eu.europeana.jena.encoder.JenaObjectEncoder; +import eu.europeana.jena.encoder.library.TemplateLibrary; +import eu.europeana.jena.encoder.utils.JenaUtils; +import org.apache.commons.io.IOUtils; +import org.apache.jena.rdf.model.Model; +import org.apache.jena.rdf.model.ModelFactory; +import org.apache.jena.rdf.model.ResIterator; +import org.apache.jena.rdf.model.Resource; +import org.apache.jena.rdfxml.xmlinput.DOM2Model; +import org.apache.jena.vocabulary.RDF; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; +import org.w3c.dom.Document; +import org.xml.sax.SAXParseException; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.Collections; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; + +import static org.apache.jena.rdf.model.ResourceFactory.createResource; + +/** + * @author Hugo + * @since 3 Nov 2023 + */ +@Service +public class MigrationHandler { + + private static final int DEFAULT_THREADS = 20; + private static final Resource ProvidedCHO = createResource(EDM.NS + EDM.ProvidedCHO); + private static ThreadLocal recordId = new ThreadLocal(); + private RdfFormat format = RdfFormat.JSONLD; + private boolean validate = true; + protected boolean validateDB = false; + protected boolean saveCopy = false; + private File logDir = null; + private int threads = DEFAULT_THREADS; + private ExecutorService executor = null; + + private final MigrationSettings settings; + + private RecordJenaProcessor jenaProcessor; + + private RecordDomProcessor domProcessor; + + private final FormatHandlerRegistry registry; + + private final RecordRepository migrationRepository; + + private final TemplateLibrary library; + + @Autowired + public MigrationHandler(MigrationSettings settings, FormatHandlerRegistry registry, RecordRepository migrationRepository, TemplateLibrary library) { + this.settings = settings; + this.registry = registry; + this.migrationRepository = migrationRepository; + this.library = library; + this.jenaProcessor = new RecordJenaProcessor(this.settings.getMediaTypes()); + this.domProcessor = new RecordDomProcessor(); + JenaUtils.disableRiotValidation(); + executor = null; + } + + public void setLoggingDir(File logDir) { + this.logDir = logDir; + } + + public void setThreads(int threads) { + this.threads = threads; + } + + public void setValidate(boolean validate) { + this.validate = validate; + } + + public void setSaveCopy(boolean saveCopy) { + this.saveCopy = saveCopy; + } + + public void setValidateDB(boolean validate) { + this.validateDB = validate; + } + + + public void start() { + executor = getExecutor(); + } + + public void runTask(Document doc) { + if (executor != null) { + executor.submit(new MigrationTask(doc)); + } + } + + public void end() { + if (executor == null) { + return; + } + + executor.shutdown(); + try { + executor.awaitTermination(1, TimeUnit.DAYS); + } catch (InterruptedException ie) { + } + } + + public class MigrationTask implements Runnable { + + private Document document; + private Resource cho; + + public MigrationTask(Document document) { + this.document = document; + } + + @Override + public void run() { + String recordId = domProcessor.getRecordId(document); + if (recordId == null) { + return; + } + + MigrationHandler.recordId.set(recordId); + try { + storeInDB(parse(domProcessor.process(document))); + } catch (Throwable t) { + synchronized (System.err) { + logErr("Exception in cho: " + recordId); + t.printStackTrace(System.err); + } + } finally { + MigrationHandler.recordId.remove(); + } + } + + private ProvidedCHO parse(Result res) throws IOException, SAXParseException { + + Model m = ModelFactory.createDefaultModel(); + DOM2Model dom2Model = DOM2Model.createD2M(res.uri, m); + dom2Model.setProperty(JenaUtils.allowBadURIs, "true"); + dom2Model.load(res.doc); + + ResIterator iter = m.listResourcesWithProperty(RDF.type, ProvidedCHO); + if (!iter.hasNext()) { + return null; + } + + Resource cho = jenaProcessor.upgrade(iter.next()); + if (cho == null) { + return null; + } + + this.cho = cho; + ProvidedCHO pcho = (ProvidedCHO) new JenaObjectDecoder(library, RecordModelFactoryImpl.INSTANCE).decode(cho); + + //Sort proxies from Europeana to Provider Proxy + Collections.sort(pcho.getProxies(), ProxyComparator.INSTANCE); + + //Sort views based on the original order in the XML + Aggregation aggr = pcho.getProviderProxy().getProxyIn(); + if (aggr != null && aggr.hasViews()) { + ViewComparator.sort(aggr.getViews(), res.views); + } + + if (validate) { + validate(m, pcho); + } + return pcho; + } + + private ProvidedCHO validate(Model m1, ProvidedCHO cho) throws IOException { + String uri = cho.getID(); + Model m2 = ModelFactory.createDefaultModel(); + new JenaObjectEncoder(library).encode(cho, m2, uri); + Model diff = m1.difference(m2); + long differences = diff.size(); + if (differences > 0) { + logErr("Jena differences <" + uri + ">" + differences + "\n" + + diff.toString()); + System.err.flush(); + } + + if (logDir != null && (saveCopy || differences > 0)) { + storeInFile(cho, newFile(uri)); + } + return cho; + } + + private void storeInDB(ProvidedCHO cho) throws IOException { + if (migrationRepository == null) { + return; + } + + migrationRepository.save(cho); + + if (!validateDB) { + return; + } + + String uri = cho.getID(); + EDMClass o2 = migrationRepository.getDatastore().find(cho.getClass()) + .filter(Filters.eq(ModelConstants.id, uri)) + .first(); + + Model m2 = ModelFactory.createDefaultModel(); + new JenaObjectEncoder(library).encode(o2, m2, uri); + + Model diff = this.cho.getModel().difference(m2); + long differences = diff.size(); + if (differences > 0) { + logErr("db differences " + this.cho.getModel().size() + " - " + m2.size() + " = " + differences + "\n" + + diff.toString()); + System.err.flush(); + } + + if (logDir != null && (saveCopy || differences > 0)) { + storeInFile(cho, newFile(uri)); + } + } + + private void storeInFile(ProvidedCHO cho, File out) throws IOException { + FileOutputStream fos = new FileOutputStream(out); + try { + registry.get(format).write(cho, fos); + fos.flush(); + } finally { + IOUtils.closeQuietly(fos); + } + } + } + + private File newFile(String uri) { + String name = uri.replace(ModelConstants.dataItemUri, "") + + "." + format.getExtension(); + File file = new File(logDir, name); + File dir = file.getParentFile(); + if (!dir.exists()) { + dir.mkdirs(); + } + return file; + } + + private ExecutorService getExecutor() { + return new ThreadPoolExecutor( + threads, threads, 0L, TimeUnit.MILLISECONDS + , new ArrayBlockingQueue(threads * 10) + , new ThreadPoolExecutor.CallerRunsPolicy()); + } + + public static void log(String str) { + System.out.println(recordId.get() + " :> " + str); + } + + public static void logErr(String str) { + System.err.println(recordId.get() + " :> " + str); + } +} diff --git a/record-api-migration/src/main/java/eu/europeana/api/record/migration/MigrationSettings.java b/record-api-migration/src/main/java/eu/europeana/api/record/migration/MigrationSettings.java new file mode 100644 index 0000000..45bd675 --- /dev/null +++ b/record-api-migration/src/main/java/eu/europeana/api/record/migration/MigrationSettings.java @@ -0,0 +1,54 @@ +package eu.europeana.api.record.migration; + +import eu.europeana.api.config.AppConfigConstants; +import eu.europeana.api.config.MediaTypeConfig; +import eu.europeana.api.edm.Namespaces; +import eu.europeana.api.model.MediaTypes; +import eu.europeana.api.record.db.config.DataSourceConfig; +import eu.europeana.api.record.io.FormatHandlerRegistry; +import eu.europeana.api.record.io.JenaBasedFormatWriter; +import eu.europeana.api.record.io.RecordIOConfig; +import eu.europeana.api.record.io.jena.RecordApiTemplateLibrary; +import eu.europeana.api.record.io.json.JsonLdWriter; +import eu.europeana.api.record.io.xml.XmlRecordWriter; +import eu.europeana.jena.encoder.codec.CodecRegistry; +import eu.europeana.jena.encoder.library.DefaultUriNormalizer; +import eu.europeana.jena.encoder.library.TemplateLibrary; +import jakarta.annotation.Resource; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Import; +import org.springframework.context.annotation.PropertySource; + +@Configuration +@PropertySource( + value = {"classpath:migration.properties", "classpath:migration.user.properties"}, + ignoreResourceNotFound = true) +@Import({MediaTypeConfig.class, DataSourceConfig.class}) +public class MigrationSettings { + + @Resource(name = AppConfigConstants.BEAN_MEDIA_TYPES) + private MediaTypes mediaTypes; + + + @Bean + public FormatHandlerRegistry getFormatHandlerRegistry() { + return new FormatHandlerRegistry( + new JsonLdWriter(), + new XmlRecordWriter(), + new JenaBasedFormatWriter("TURTLE"), + new JenaBasedFormatWriter("N3"), + new JenaBasedFormatWriter("NT")); + } + + @Bean + public TemplateLibrary getTemplateLibrary() { + return new RecordApiTemplateLibrary(new CodecRegistry(), new Namespaces(), new DefaultUriNormalizer()); + } + + public MediaTypes getMediaTypes() { + return mediaTypes; + } + + +} diff --git a/record-api-migration/src/main/java/eu/europeana/api/record/migration/RecordDomProcessor.java b/record-api-migration/src/main/java/eu/europeana/api/record/migration/RecordDomProcessor.java new file mode 100755 index 0000000..9c3dc4d --- /dev/null +++ b/record-api-migration/src/main/java/eu/europeana/api/record/migration/RecordDomProcessor.java @@ -0,0 +1,382 @@ +package eu.europeana.api.record.migration; + +import eu.europeana.api.edm.*; +import org.apache.commons.lang3.StringUtils; +import org.apache.jena.rdf.model.Model; +import org.apache.jena.rdf.model.ModelFactory; +import org.apache.jena.rdfxml.xmlinput.DOM2Model; +import org.w3c.dom.*; + +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMResult; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; +import javax.xml.transform.stream.StreamSource; +import java.io.File; +import java.util.*; + +import static eu.europeana.api.record.migration.MigrationHandler.log; + +/** + * @author Hugo + * @since 1 Nov 2023 + */ +public class RecordDomProcessor { + + private static String PREFIX_ITEM = "http://data.europeana.eu/item"; + private static String PREFIX_AGGREGATION_EUROPEANA = "http://data.europeana.eu/aggregation/europeana"; + private static String PREFIX_PROXY_EUROPEANA = "http://data.europeana.eu/proxy/europeana"; + + public String getRecordId(Document doc) { + String uri = (doc.hasChildNodes() + ? getURI(doc.getDocumentElement() + .getElementsByTagNameNS(EDM.NS, EDM.ProvidedCHO)) + : null); + return (uri != null ? uri.replace(PREFIX_ITEM, "") : null); + } + + + public Result process(Document doc) { + Element root = doc.getDocumentElement(); + if (root == null) { + return null; + } + + + List entities = getEntities(root); + List proxies = append(root.getElementsByTagNameNS(ORE.NS, ORE.Proxy) + , new ArrayList(3)); + + Collection entityIds = getIds(entities, new TreeSet()); + + //re-establish references + processMetadata(root.getElementsByTagNameNS(ORE.NS, ORE.Proxy) + , entityIds); + + //check for URIs that must be changed to relative + Map newIds = improveURIs(entityIds); + if (!newIds.isEmpty()) { + applyNewIdsToResources(entities, newIds); + applyNewIdsToProperties(proxies, newIds); + } + + processAggregations(root.getElementsByTagNameNS(ORE.NS, ORE.Aggregation)); + processAggregations(root.getElementsByTagNameNS(EDM.NS, EDM.EuropeanaAggregation)); + + fixIdForResource(root.getElementsByTagNameNS(EDM.NS, EDM.EuropeanaAggregation) + , PREFIX_AGGREGATION_EUROPEANA); + + + List views = getHasViews(root.getElementsByTagNameNS(ORE.NS, ORE.Aggregation) + , new ArrayList()); + + String uri = getURI(root.getElementsByTagNameNS(EDM.NS, EDM.ProvidedCHO)); + if (uri == null) { + return null; + } + + return new Result(doc, uri, views); + } + + private List getEntities(Element root) { + NodeList agents = root.getElementsByTagNameNS(EDM.NS, EDM.Agent); + NodeList times = root.getElementsByTagNameNS(EDM.NS, EDM.TimeSpan); + NodeList places = root.getElementsByTagNameNS(EDM.NS, EDM.Place); + NodeList concepts = root.getElementsByTagNameNS(SKOS.NS, SKOS.Concept); + + List list = new ArrayList(agents.getLength() + + times.getLength() + + places.getLength() + + concepts.getLength()); + return append(concepts, append(places, append(times, append(agents, list)))); + } + + private List append(NodeList nodeList, List list) { + for (int i = 0; i < nodeList.getLength(); i++) { + list.add((Element) nodeList.item(i)); + } + return list; + } + + private Collection getIds(List list + , Collection ids) { + for (Element e : list) { + String uri = getId(e); + if (uri != null) { + ids.add(uri); + } + } + return ids; + } + + private void processMetadata(NodeList list, Collection ids) { + for (int i = 0; i < list.getLength(); i++) { + Element elem = (Element) list.item(i); + NodeList props = elem.getChildNodes(); + for (int e = 0; e < props.getLength(); e++) { + Element prop = (Element) props.item(e); + if (isReference(prop) || !canBecomeReference(prop)) { + continue; + } + + String str = prop.getTextContent(); + if (StringUtils.isBlank(str)) { + continue; + } + + if (ids.contains(str)) { + upgradeToReference(prop); + } + } + } + } + + private void fixIdForResource(NodeList list, String uriPrefix) { + if (list == null) { + return; + } + + for (int i = 0; i < list.getLength(); i++) { + Element elem = (Element) list.item(i); + String uri = getId(elem); + if (uri != null && uri.startsWith(uriPrefix)) { + continue; + } + + String recordId = getRecordID(uri); + if (recordId == null) { + continue; + } + + uri = uriPrefix + recordId; + setId(elem, uri); + log("Fixed URI to: " + uri); + } + } + + private Map improveURIs(Collection ids) { + Map ret = new HashMap(); + for (String id : ids) { + if (isRelativeURI(id) || isFullURI(id)) { + continue; + } + + String newId = newRelativeURI(id); + if (ids.contains(newId)) { + log("Conflicting id: " + newId); + continue; + } + ret.put(id, newId); + } + return ret; + } + + private String newRelativeURI(String id) { + return (id.contains("/") || id.contains("#") ? "/" : "#") + id; + } + + private boolean isFullURI(String uri) { + return uri.contains("://"); + } + + private boolean isRelativeURI(String uri) { + char c = uri.charAt(0); + switch (c) { + case '.': + case '/': + case '#': + return true; + } + return false; + } + + private void applyNewIdsToResources(List list + , Map newIds) { + for (Element elem : list) { + String id = getId(elem); + if (id == null) { + continue; + } + + String newId = newIds.get(id); + if (newId == null) { + continue; + } + + log("Changed declared id: " + id + " => " + newId); + setId(elem, newId); + } + } + + private void applyNewIdsToProperties(List list + , Map newIds) { + for (Element elem : list) { + NodeList props = elem.getChildNodes(); + for (int e = 0; e < props.getLength(); e++) { + Element prop = (Element) props.item(e); + + String ref = getReference(prop); + if (ref == null) { + continue; + } + + String newId = newIds.get(ref); + if (newId == null) { + continue; + } + + log("Changed reference: " + ref + " => " + newId); + setReference(prop, newId); + } + } + } + + + private void processAggregations(NodeList list) { + if (list == null) { + return; + } + + for (int i = 0; i < list.getLength(); i++) { + Element aggr = (Element) list.item(i); + fixWebResourceURLs(aggr.getElementsByTagNameNS(EDM.NS, EDM.isShownAt)); + fixWebResourceURLs(aggr.getElementsByTagNameNS(EDM.NS, EDM.object)); + fixWebResourceURLs(aggr.getElementsByTagNameNS(EDM.NS, EDM.isShownBy)); + fixWebResourceURLs(aggr.getElementsByTagNameNS(EDM.NS, EDM.hasView)); + fixWebResourceURLs(aggr.getElementsByTagNameNS(EDM.NS, EDM.preview)); + } + } + + private void fixWebResourceURLs(NodeList list) { + for (int i = 0; i < list.getLength(); i++) { + Element prop = (Element) list.item(i); + String attr = getReference(prop); + if (attr == null) { + continue; + } + + if (!attr.startsWith(PREFIX_ITEM)) { + continue; + } + + log("Fixed webresource reference: " + attr); + setReference(prop, attr.replace(PREFIX_ITEM, "")); + } + } + + private List getHasViews(NodeList list, List views) { + if (list == null) { + return views; + } + + for (int i = 0; i < list.getLength(); i++) { + Element aggr = (Element) list.item(i); + NodeList hasViews = aggr.getElementsByTagNameNS(EDM.NS, EDM.hasView); + for (int e = 0; e < hasViews.getLength(); e++) { + Element hasView = (Element) hasViews.item(e); + + String attr = getReference(hasView); + if (attr != null) { + views.add(attr); + } + } + } + return views; + } + + private String getURI(NodeList list) { + if (list == null) { + return null; + } + + for (int i = 0; i < list.getLength(); i++) { + Element cho = (Element) list.item(i); + + return getId(cho); + } + return null; + } + + private String getId(Element elem) { + String attr = elem.getAttributeNS(RDF.NS, RDF.about); + return (StringUtils.isBlank(attr) ? null : attr); + } + + private void setId(Element elem, String uri) { + elem.setAttributeNS(RDF.NS, RDF.PREFIX + ":" + RDF.about, uri); + } + + + private boolean isReference(Element prop) { + return prop.hasAttributeNS(RDF.NS, RDF.resource); + } + + private String getReference(Element prop) { + String attr = prop.getAttributeNS(RDF.NS, RDF.resource); + return (StringUtils.isBlank(attr) ? null : attr); + } + + private void setReference(Element prop, String uri) { + prop.setAttributeNS(RDF.NS, RDF.PREFIX + ":" + RDF.resource, uri); + } + + private boolean canBecomeReference(Element prop) { + return (!prop.getLocalName().equals(DC.identifier)); + } + + private void upgradeToReference(Element prop) { + String str = prop.getTextContent(); + log("Upgrading reference: " + str); + + while (prop.hasChildNodes()) { + prop.removeChild(prop.getFirstChild()); + } + NamedNodeMap map = prop.getAttributes(); + while (map.getLength() > 0) { + Node attr = map.item(0); + map.removeNamedItem(attr.getNodeName()); + } + + setReference(prop, str); + } + + private String getRecordID(String uri) { + int i = uri.lastIndexOf('/'); + if (i <= 0) { + return null; + } + i = uri.lastIndexOf('/', i - 1); + return (i < 0 ? null : uri.substring(i)); + } + + public static class Result { + public Document doc; + public String uri; + public List views; + + public Result(Document doc, String uri, List views) { + this.doc = doc; + this.uri = uri; + this.views = views; + } + } + + public static final void main(String[] args) throws Throwable { + File src = new File("C:\\Work\\incoming\\Record v3\\source\\urn_imss_indepth_100177.xml"); + Transformer t = TransformerFactory.newInstance().newTransformer(); + DOMResult result = new DOMResult(); + t.transform(new StreamSource(src), result); + Result res = new RecordDomProcessor().process((Document) result.getNode()); + t.transform(new DOMSource(result.getNode()), new StreamResult(System.out)); + + Model m = ModelFactory.createDefaultModel(); + + DOM2Model dom2Model = DOM2Model.createD2M(res.uri, m); + dom2Model.setProperty("allowBadURIs", "true"); + dom2Model.load(result.getNode()); + + m.write(System.out, "RDF/XML"); + + } +} diff --git a/record-api-migration/src/main/java/eu/europeana/api/record/migration/RecordJenaProcessor.java b/record-api-migration/src/main/java/eu/europeana/api/record/migration/RecordJenaProcessor.java new file mode 100755 index 0000000..6746b5d --- /dev/null +++ b/record-api-migration/src/main/java/eu/europeana/api/record/migration/RecordJenaProcessor.java @@ -0,0 +1,543 @@ +package eu.europeana.api.record.migration; + +import static org.apache.jena.rdf.model.ResourceFactory.*; + +import java.util.*; + +import eu.europeana.api.config.AppConfigConstants; +import eu.europeana.api.model.MediaType; +import eu.europeana.api.model.MediaTypes; +import org.apache.jena.rdf.model.Literal; +import org.apache.jena.rdf.model.Model; +import org.apache.jena.rdf.model.Property; +import org.apache.jena.rdf.model.RDFNode; +import org.apache.jena.rdf.model.ResIterator; +import org.apache.jena.rdf.model.Resource; +import org.apache.jena.rdf.model.Statement; +import org.apache.jena.rdf.model.StmtIterator; +import org.apache.jena.vocabulary.RDF; + +import eu.europeana.api.edm.CC; +import eu.europeana.api.edm.DC; +import eu.europeana.api.edm.DCTerms; +import eu.europeana.api.edm.EBUCORE; +import eu.europeana.api.edm.EDM; +import eu.europeana.api.edm.FOAF; +import eu.europeana.api.edm.ORE; +import eu.europeana.api.edm.RDAGR2; +import eu.europeana.api.edm.SKOS; +import eu.europeana.api.edm.SVCS; +import eu.europeana.api.edm.XSD; +import eu.europeana.api.record.io.jena.RecordApiTemplateLibrary; + +import static eu.europeana.api.record.migration.MigrationHandler.log; + +/** + * @author Hugo + * @since 25 Oct 2023 + */ +public class RecordJenaProcessor { + + private static final Resource ProvidedCHO = createResource(EDM.NS + EDM.ProvidedCHO); + private static final Resource Proxy = createResource(ORE.NS + ORE.Proxy); + private static final Resource WebResource = createResource(EDM.NS + EDM.WebResource); + private static final Resource Aggregation = createResource(ORE.NS + ORE.Aggregation); + private static final Resource EuropeanaAggregation = createResource(EDM.NS + EDM.EuropeanaAggregation); + private static final Resource FullTextResource = createResource(EDM.NS + EDM.FullTextResource); + private static final Resource Service = createResource(SVCS.NS + SVCS.Service); + private static final Resource License = createResource(CC.NS + CC.License); + + //entities + private static final Resource Agent = createResource(EDM.NS + EDM.Agent); + private static final Resource Place = createResource(EDM.NS + EDM.Place); + private static final Resource Concept = createResource(SKOS.NS + SKOS.Concept); + private static final Resource TimeSpan = createResource(EDM.NS + EDM.TimeSpan); + private static final Resource Organization = createResource(FOAF.NS + FOAF.Organization); + + private static final Property aggregates = createProperty(ORE.NS + ORE.aggregates); + private static final Property proxyFor = createProperty(ORE.NS + ORE.proxyFor); + private static final Property proxyIn = createProperty(ORE.NS + ORE.proxyIn); + private static final Property aggregatedCHO = createProperty(EDM.NS + EDM.aggregatedCHO); + private static final Property lineage = createProperty(ORE.NS + ORE.lineage); + private static final Property created = createProperty(DCTerms.NS + DCTerms.created); + private static final Property modified = createProperty(DCTerms.NS + DCTerms.modified); + private static final Property creator = createProperty(DC.NS + DC.creator); + private static final Property europeanaProxy = createProperty(EDM.NS + EDM.europeanaProxy); + private static final Property placeOfBirth = createProperty(RDAGR2.NS + RDAGR2.placeOfBirth); + private static final Property placeOfDeath = createProperty(RDAGR2.NS + RDAGR2.placeOfDeath); + private static final Property isPartOf = createProperty(DCTerms.NS + DCTerms.isPartOf); + private static final Property hasPart = createProperty(DCTerms.NS + DCTerms.hasPart); + private static final Property bioInfo = createProperty(RDAGR2.NS + RDAGR2.biographicalInformation); + + + private static final Property spatial = createProperty(DCTerms.NS + DCTerms.spatial); + + private static final Property hasMimeType = createProperty(EBUCORE.NS + EBUCORE.hasMimeType); + private static final Property componentColor = createProperty(EDM.NS + EDM.componentColor); + private static final Property spatialResolution = createProperty(EDM.NS + EDM.spatialResolution); + private static final Property duration = createProperty(EBUCORE.NS + EBUCORE.duration); + + private static final Property preview = createProperty(EDM.NS + EDM.preview); + private static final Property isShownBy = createProperty(EDM.NS + EDM.isShownBy); + private static final Property isShownAt = createProperty(EDM.NS + EDM.isShownAt); + private static final Property object = createProperty(EDM.NS + EDM.object); + private static final Property hasView = createProperty(EDM.NS + EDM.hasView); + + private static List ENTITIES + = Arrays.asList(Agent, Place, Concept, TimeSpan, Organization); + + private static Collection CORE_CLASSES + = Arrays.asList(Proxy, Aggregation, WebResource, EuropeanaAggregation, ProvidedCHO); + + private MediaTypes mediaTypes; + + public RecordJenaProcessor(MediaTypes mediaTypes) { + this.mediaTypes = mediaTypes; + } + + public Resource upgrade(Resource cho) { + + Model m = cho.getModel(); + List proxies = getAsList(m.listResourcesWithProperty(proxyFor, cho)); + Collections.sort(proxies, new ProxyComparator()); + + if ( hasLineage(proxies) ) { + for ( Resource proxy : proxies ) { + proxy.removeAll(lineage); + } + } + addLineage(proxies); + + //if ( !hasLineage(proxies) ) { addLineage(proxies); } + + Resource eaggr = getAggregation(proxies.get(proxies.size() - 1)); + removeCreatorAndAggregates(eaggr); + fixTimestamps(eaggr); + addTimestamps(eaggr, proxies); + removeEuropeanaProxy(proxies); + + List webResources = getAsList(m.listResourcesWithProperty(RDF.type, WebResource)); + removeLooseResources(webResources); + cleanPartRelations(webResources); + cleanTechMetadata(webResources); + fixWebResourcerReference(getObjects(m.listStatements(null, object, (RDFNode)null))); + fixWebResourcerReference(getObjects(m.listStatements(null, preview, (RDFNode)null))); + fixWebResourcerReference(getObjects(m.listStatements(null, isShownBy, (RDFNode)null))); + fixWebResourcerReference(getObjects(m.listStatements(null, isShownAt, (RDFNode)null))); + fixWebResourcerReference(getObjects(m.listStatements(null, hasView, (RDFNode)null))); + + removeLooseResources(getAsList(m.listResourcesWithProperty(RDF.type, Service))); + removeLooseResources(getAsList(m.listResourcesWithProperty(RDF.type, License))); + + fixNonNegative(getAsList(m.listStatements(null, spatialResolution, (RDFNode)null))); + fixNonNegative(getAsList(m.listStatements(null, duration, (RDFNode)null))); + fixComponentColor(getAsList(m.listStatements(null, componentColor, (RDFNode)null))); + + List entities = getEntities(m); + removeLooseResourcesByRecursion(entities); + cleanAgents(entities); + cleanMultiTypedEntities(entities); + cleanPartRelations(entities); + + return cho; + } + + private void fixNonNegative(List stmts) { + for ( Statement stmt : stmts ) { + RDFNode obj = stmt.getObject(); + if ( !obj.isLiteral() ) { continue; } + + String str = obj.asLiteral().getString(); + if ( !str.contains("-") ) { continue; } + log("Negative " + stmt.getPredicate().getLocalName() + ": " + str); + + stmt.getModel().remove(stmt); + } + } + + private void fixComponentColor(List stmts) { + for ( Statement stmt : stmts ) { + RDFNode obj = stmt.getObject(); + if ( !obj.isLiteral() ) { continue; } + + String str = obj.asLiteral().getString(); + if ( !str.contains("#") ) { continue; } + + Model model = stmt.getModel(); + str = str.replaceAll("#", ""); + obj = model.createTypedLiteral(str, XSD.NS + XSD.hexBinary); + stmt.getSubject().addProperty(componentColor, obj); + model.remove(stmt); + } + } + + private void cleanTechMetadata(List resources) + { + for ( Resource r : resources ) { + Statement stmt = r.getProperty(hasMimeType); + if ( stmt == null ) { continue; } + + Optional mediaType = mediaTypes.getMediaType(stmt.getString()); + if ( !mediaType.isEmpty() ) { continue; } + + r.getModel().remove(r, RDF.type, FullTextResource); + } + } + + private void fixWebResourcerReference(List resources) + { + for ( Resource r : resources ) { + if ( r.hasProperty(RDF.type) ) { continue; } + + if ( r.getURI().startsWith("http://data.europeana.eu/item") ) { + continue; + } + r.addProperty(RDF.type, WebResource); + } + } + + private void removeLooseResources(Collection entities) { + boolean removed = true; + while ( removed ) { + removed = false; + Iterator iter = entities.iterator(); + while ( iter.hasNext() ) { + Resource entity = iter.next(); + Model m = entity.getModel(); + boolean contains = existsBesidesSelfReference( + m.listStatements(null, null, entity)); + if ( contains ) { continue; } + log("Removing loose resource: " + entity.getURI()); + entity.removeProperties(); + iter.remove(); + removed = true; + } + } + } + + private void removeLooseResourcesByRecursion(Collection entities) { + Iterator iter = entities.iterator(); + Stack stack = new Stack(); + while ( iter.hasNext() ) { + Resource entity = iter.next(); + stack.push(entity); + boolean connected = isConnectedTo(stack); + stack.clear(); + + if ( connected ) { continue; } + + log("Removing loose resource: " + entity.getURI()); + entity.removeProperties(); + iter.remove(); + } + } + + private boolean isConnectedTo(Stack stack) { + Resource entity = stack.peek(); + StmtIterator iter = entity.getModel().listStatements(null, null, entity); + while ( iter.hasNext() ) { + Resource subject = iter.next().getSubject(); + if ( hasType(subject, CORE_CLASSES) ) { return true; } + if ( stack.contains(subject) ) { continue; } + + stack.push(subject); + if ( isConnectedTo(stack) ) { return true; } + stack.pop(); + } + return false; + } + + + + + private void cleanMultiTypedEntities(Collection entities) { + for ( Resource entity : entities ) { + int count = count(entity.listProperties(RDF.type)); + if ( count <= 1 ) { continue; } + + String uri = entity.getURI(); + log("Entity with multiple types: " + uri); + if ( uri.startsWith("http://vocab.getty.edu/tgn/") ) { + retainType(entity, Place); + continue; + } + if ( uri.startsWith("http://viaf.org/viaf/") ) { + retainType(entity, Agent); + continue; + } + if ( isPlace(entity) ) { + retainType(entity, Place); + continue; + } + + entity.getModel().remove(entity, RDF.type, Concept); + log("Removed type Concept for: " + entity.getURI()); + } + } + + private void cleanPartRelations(Collection entities) { + for ( Resource entity : entities ) { + retainOnlyReferences(entity, isPartOf); + retainOnlyReferences(entity, hasPart); + } + } + + private void retainOnlyReferences(Collection resources + , Property property) { + for ( Resource res : resources ) { retainOnlyReferences(res, property); } + } + + private void retainOnlyReferences(Resource res + , Property property) { + Model model = res.getModel(); + for ( Statement stmt : getAsList(res.listProperties(property)) ) { + if ( !stmt.getObject().isLiteral() ) { continue; } + model.remove(stmt); + log("Removed literal from " + property.getLocalName() + ": " + + stmt.getObject().asLiteral().getString()); + } + } + + private void retainOnlyLiterals(Collection resources + , Property property) { + for ( Resource res : resources ) { retainOnlyLiterals(res, property); } + } + + private void retainOnlyLiterals(Resource res, Property property) { + Model model = res.getModel(); + for ( Statement stmt : getAsList(res.listProperties(property)) ) { + if ( stmt.getObject().isLiteral() ) { continue; } + model.remove(stmt); + log("Removed reference from " + property.getLocalName() + ": " + + stmt.getObject().asResource().getURI()); + } + } + + private boolean isPlace(Resource entity) { + Model m = entity.getModel(); + if ( exists(m.listStatements(null, spatial, entity)) ) { return true; } + return false; + } + + private void retainType(Resource entity, Resource type) { + Model m = entity.getModel(); + for ( Statement stmt : getAsList(entity.listProperties(RDF.type)) ) { + if ( stmt.getObject().isResource() && + stmt.getObject().asResource().equals(type) ) { continue; } + + m.remove(stmt); + log("Removed type " + stmt.getObject().asResource().getLocalName() + + " for: " + stmt.getSubject().getURI()); + } + } + + private List getEntities(Model m) { + List list = new ArrayList(); + StmtIterator iter = m.listStatements(null, RDF.type, (Resource)null); + while (iter.hasNext()) { + Statement stmt = iter.next(); + Resource r = stmt.getObject().asResource(); + if ( ENTITIES.contains(r) ) { list.add(stmt.getSubject()); } + } + return list; + } + + private void cleanAgents(List entities) { + int count = 0; + for ( Resource entity : entities ) { + if ( !entity.hasProperty(RDF.type, Agent) ) { continue; } + + Model m = entity.getModel(); + count = count(entity.listProperties(placeOfBirth)); + if ( count > 1 ) { + retainFirst(getAsList(entity.listProperties(placeOfBirth))); + } + + count = count(entity.listProperties(placeOfDeath)); + if ( count > 1 ) { + retainFirst(getAsList(entity.listProperties(placeOfDeath))); + } + + retainOnlyLiterals(entity, bioInfo); + } + } + + private void removeEuropeanaProxy(List proxies) { + for ( Resource proxy : proxies ) { proxy.removeAll(europeanaProxy); } + } + + private boolean hasLineage(List proxies) { + boolean ret = false; + for ( Resource proxy : proxies ) { + ret = ret | proxy.hasProperty(lineage); + } + return ret; + } + + private void addLineage(List proxies) { + Resource last = null; + for ( Resource proxy : proxies ) { + if ( last != null ) { proxy.addProperty(lineage, last); } + last = proxy; + } + } + + + private void removeCreatorAndAggregates(Resource eaggr) { + eaggr.removeAll(creator); + eaggr.removeAll(aggregates); + } + + private void fixTimestamps(Resource eaggr) { + Model model = eaggr.getModel(); + Literal literal, newLiteral; + + literal = getTimestamp(eaggr, created); + newLiteral = fixTimestamp(literal); + if ( newLiteral != null ) { + model.remove(eaggr, created, literal); + model.add(eaggr, created, newLiteral); + } + + literal = getTimestamp(eaggr, modified); + newLiteral = fixTimestamp(literal); + if ( newLiteral != null ) { + model.remove(eaggr, modified, literal); + model.add(eaggr, modified, newLiteral); + } + } + + private Literal fixTimestamp(Literal literal) { + if ( literal == null ) { return null; } + + String str = literal.getString(); + if ( str.endsWith(".000Z") ) { + return literal.getModel().createTypedLiteral(str.replace(".000Z","Z") + , literal.getDatatypeURI()); + } + return null; + } + + + private void addTimestamps(Resource eaggr, List proxies) { + //Literal lcreated = getTimestamp(eaggr, created); + Literal lmodified = getTimestamp(eaggr, modified); + + Iterator iter = proxies.iterator(); + while (iter.hasNext()) { + Resource aggr = getAggregation(iter.next()); + if ( aggr == null || !iter.hasNext() ) { continue; } + + aggr.addLiteral(modified, lmodified); + } + } + + private Resource getAggregation(Resource proxy) { + Statement stmt = proxy.getProperty(proxyIn); + return ( stmt == null ? null : stmt.getObject().asResource() ); + } + + private Literal getTimestamp(Resource aggr, Property prop) { + Statement stmt = aggr.getProperty(prop); + return ( stmt == null || !stmt.getObject().isLiteral() + ? null : stmt.getObject().asLiteral() ); + } + + private void retainFirst(List stmts) { + Iterator iter = stmts.iterator(); + Model model = iter.next().getModel(); + while ( iter.hasNext() ) { + Statement stmt = iter.next(); + log("Removed duplicate " + stmt.getPredicate().getLocalName() + + " from " + stmt.getSubject().getURI()); + model.remove(stmt); + } + } + + private int count(StmtIterator iter) { + int count = 0; + try { + while ( iter.hasNext() ) { iter.next(); count++; } + return count; + } + finally { iter.close(); } + } + + private boolean existsBesidesSelfReference(StmtIterator iter) { + try { + while ( iter.hasNext() ) { + Statement stmt = iter.next(); + Resource subj = stmt.getSubject(); + Resource obj = (Resource)stmt.getObject(); + + //self or cycle reference + if ( subj.equals(obj) /*|| obj.hasProperty(null, subj)*/ ) { continue; } + + return true; + } + } + finally { iter.close(); } + return false; + } + + private boolean hasType(Resource r, Collection types) { + StmtIterator iter = r.listProperties(RDF.type); + while ( iter.hasNext() ) { + if ( types.contains(iter.next().getObject()) ) { return true; } + } + return false; + } + + private boolean exists(StmtIterator iter) { + try { + return ( iter.hasNext() ); + } + finally { iter.close(); } + } + + private List getObjects(StmtIterator iter) { + List ret = new ArrayList(); + try { + while ( iter.hasNext() ) { + RDFNode node = iter.next().getObject(); + if ( node.isResource() ) { ret.add(node.asResource()); } + } + return ret; + } + finally { iter.close(); } + } + + private List getAsList(StmtIterator iter) { + List ret = new ArrayList(); + try { + while ( iter.hasNext() ) { ret.add(iter.next()); } + return ret; + } + finally { iter.close(); } + } + + private List getAsList(ResIterator iter) { + List ret = new ArrayList(); + try { + while ( iter.hasNext() ) { ret.add(iter.next()); } + return ret; + } + finally { iter.close(); } + } + + private static class ProxyComparator implements Comparator { + + @Override + public int compare(Resource r1, Resource r2) + { + return (getOrder(r1) - getOrder(r2)); + } + + private int getOrder(Resource r) { + String uri = r.getURI(); + if ( uri.contains("/proxy/europeana/") ) { return 10; } + if ( uri.contains("/proxy/provider/" ) ) { return 1; } + return 2; + } + } +} diff --git a/record-api-migration/src/main/java/eu/europeana/api/record/migration/RunMigration.java b/record-api-migration/src/main/java/eu/europeana/api/record/migration/RunMigration.java new file mode 100755 index 0000000..d660817 --- /dev/null +++ b/record-api-migration/src/main/java/eu/europeana/api/record/migration/RunMigration.java @@ -0,0 +1,166 @@ +package eu.europeana.api.record.migration; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.CommandLineRunner; +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration; +import org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration; +import org.springframework.context.ConfigurableApplicationContext; +import org.w3c.dom.Document; + +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMResult; +import javax.xml.transform.stream.StreamSource; +import java.io.*; +import java.util.Collection; +import java.util.HashSet; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; + + +/** + * @author Hugo + * @since 17 Oct 2023 + */ +@SpringBootApplication( + scanBasePackages = {"eu.europeana.api.record.migration"}, + exclude = { + // Remove these exclusions to re-enable security + SecurityAutoConfiguration.class, + // DataSources are manually configured (for EM and batch DBs) + DataSourceAutoConfiguration.class + }) +public class RunMigration implements CommandLineRunner { + + private PrintStream progressLog; + private Transformer transformer = null; + private Collection processed = new HashSet(); + + @Autowired + private MigrationHandler handler; + + public static void main(String[] args) { + ConfigurableApplicationContext context = SpringApplication.run(RunMigration.class, args); + System.exit(SpringApplication.exit(context)); + } + + @Override + public void run(String... args) throws Exception { + File src = new File("C:\\Work\\incoming\\Record v3\\source\\"); + File logDir = new File("C:\\Work\\incoming\\Record v3\\target"); + PrintStream out = new PrintStream(new File(logDir, "run.log")); + PrintStream error = new PrintStream(new File(logDir, "error.log")); + try { + // handler settings + handler.setLoggingDir(logDir); + handler.setThreads(20); + handler.setSaveCopy(true); + handler.setValidateDB(true); + + // file and logger setting + File progressFile = new File(logDir, "progress.log"); + loadProcessed(progressFile); + progressLog = new PrintStream(new FileOutputStream(progressFile, true)); + transformer = TransformerFactory.newInstance().newTransformer(); + + // process + process(src); + } catch (Throwable throwable) { + throwable.printStackTrace(); + } finally { + out.flush(); + error.flush(); + IOUtils.closeQuietly(out); + IOUtils.closeQuietly(error); + } + } + + private void process(File dir) { + try { + handler.start(); + processInt(dir); + } finally { + handler.end(); + } + } + + private void processInt(File dir) { + for (File file : dir.listFiles()) { + if (file.isDirectory()) { + processInt(file); + continue; + } + + if (processed.contains(file.getPath())) { + continue; + } + + String name = file.getName(); + if (name.endsWith(".xml")) { + processFile(file); + } else if (name.endsWith(".zip")) { + processZipFile(file); + } + } + } + + private void loadProcessed(File file) throws IOException { + if (!file.exists()) { + return; + } + + try (BufferedReader reader = new BufferedReader(new FileReader(file))) { + while (reader.ready()) { + String path = reader.readLine().trim(); + if (!StringUtils.isEmpty(path)) { + processed.add(path); + } + } + } + } + + private void processFile(File file) { + try (InputStream is = new FileInputStream(file)) { + handle(is); + progressLog.println(file.getPath()); + } catch (IOException e) { + e.printStackTrace(System.err); + } + } + + private void processZipFile(File file) { + try (ZipInputStream zis = new ZipInputStream(new FileInputStream(file))) { + ZipEntry ze; + InputStream is = new FilterInputStream(zis) { + @Override + public void close() { + } + }; + while ((ze = zis.getNextEntry()) != null) { + String name = ze.getName(); + if (name.endsWith(".xml")) { + handle(is); + } + zis.closeEntry(); + } + progressLog.println(file.getPath()); + } catch (IOException e) { + e.printStackTrace(System.err); + } + } + + private void handle(InputStream is) { + try { + DOMResult domResult = new DOMResult(); + transformer.transform(new StreamSource(is), domResult); + handler.runTask((Document) domResult.getNode()); + } catch (TransformerException e) { + } + } + +} diff --git a/record-api-migration/src/main/java/eu/europeana/api/record/migration/ViewComparator.java b/record-api-migration/src/main/java/eu/europeana/api/record/migration/ViewComparator.java new file mode 100755 index 0000000..1619d11 --- /dev/null +++ b/record-api-migration/src/main/java/eu/europeana/api/record/migration/ViewComparator.java @@ -0,0 +1,33 @@ +package eu.europeana.api.record.migration; + +import java.util.Collections; +import java.util.Comparator; +import java.util.List; + +import eu.europeana.api.record.model.media.WebResource; + +/** + * @author Hugo + * @since 2 Nov 2023 + */ +public class ViewComparator implements Comparator { + + public static void sort(List list, List sorted) { + if ( list.size() == 1 ) { return; } + + Collections.sort(list, new ViewComparator(sorted)); + } + + private List sortedIDs; + + public ViewComparator(List sortedIDs) { + this.sortedIDs = sortedIDs; + } + + @Override + public int compare(WebResource wr1, WebResource wr2) { + return ( wr1.equals(wr2) ? 0 : (getOrder(wr1) - getOrder(wr2)) ); + } + + private int getOrder(WebResource o1) { return sortedIDs.indexOf(o1.getID()); } +} diff --git a/record-api-migration/src/main/resources/migration.properties b/record-api-migration/src/main/resources/migration.properties new file mode 100644 index 0000000..bb4d00d --- /dev/null +++ b/record-api-migration/src/main/resources/migration.properties @@ -0,0 +1,3 @@ +# mongo url +mongo.connectionUrl= +mongo.record.database= diff --git a/record-api-mongo/src/main/java/eu/europeana/api/record/db/repository/RecordRepository.java b/record-api-mongo/src/main/java/eu/europeana/api/record/db/repository/RecordRepository.java index 84c7750..41431f1 100644 --- a/record-api-mongo/src/main/java/eu/europeana/api/record/db/repository/RecordRepository.java +++ b/record-api-mongo/src/main/java/eu/europeana/api/record/db/repository/RecordRepository.java @@ -86,8 +86,11 @@ public MorphiaCursor findByRecordIds(List recordIds) { .iterator(new FindOptions()); } + public Datastore getDatastore() { + return datastore; + } -// public void save(EDMClass o) + // public void save(EDMClass o) // { // if ( o != null ) { // long count = datastore.find(o.getClass()).filter(Filters.eq("id", o.getID())).count(); From 1bfef94bc9f31af8332c40e17e38c75ac0137725 Mon Sep 17 00:00:00 2001 From: srishti Date: Tue, 2 Apr 2024 15:43:11 +0200 Subject: [PATCH 2/4] Ea-3681 add a repo for spring injections --- .../europeana/api/record/migration/MigrationHandler.java | 4 ++-- .../api/record/migration/MigrationRepository.java | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) create mode 100644 record-api-migration/src/main/java/eu/europeana/api/record/migration/MigrationRepository.java diff --git a/record-api-migration/src/main/java/eu/europeana/api/record/migration/MigrationHandler.java b/record-api-migration/src/main/java/eu/europeana/api/record/migration/MigrationHandler.java index 0a71faf..0b61603 100755 --- a/record-api-migration/src/main/java/eu/europeana/api/record/migration/MigrationHandler.java +++ b/record-api-migration/src/main/java/eu/europeana/api/record/migration/MigrationHandler.java @@ -61,12 +61,12 @@ public class MigrationHandler { private final FormatHandlerRegistry registry; - private final RecordRepository migrationRepository; + private final MigrationRepository migrationRepository; private final TemplateLibrary library; @Autowired - public MigrationHandler(MigrationSettings settings, FormatHandlerRegistry registry, RecordRepository migrationRepository, TemplateLibrary library) { + public MigrationHandler(MigrationSettings settings, FormatHandlerRegistry registry, MigrationRepository migrationRepository, TemplateLibrary library) { this.settings = settings; this.registry = registry; this.migrationRepository = migrationRepository; diff --git a/record-api-migration/src/main/java/eu/europeana/api/record/migration/MigrationRepository.java b/record-api-migration/src/main/java/eu/europeana/api/record/migration/MigrationRepository.java new file mode 100644 index 0000000..99662e1 --- /dev/null +++ b/record-api-migration/src/main/java/eu/europeana/api/record/migration/MigrationRepository.java @@ -0,0 +1,9 @@ +package eu.europeana.api.record.migration; + +import eu.europeana.api.record.db.repository.RecordRepository; +import org.springframework.stereotype.Repository; + +@Repository +public class MigrationRepository extends RecordRepository { + +} From f851710043a5dcf6a3709179116cb22dbfeadc02 Mon Sep 17 00:00:00 2001 From: srishti Date: Tue, 2 Apr 2024 15:51:26 +0200 Subject: [PATCH 3/4] EA-3681 add the source and tragte directory in properties file --- .../api/record/migration/MigrationSettings.java | 2 +- .../api/record/migration/RunMigration.java | 16 ++++++++++++++-- .../src/main/resources/migration.properties | 5 +++++ 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/record-api-migration/src/main/java/eu/europeana/api/record/migration/MigrationSettings.java b/record-api-migration/src/main/java/eu/europeana/api/record/migration/MigrationSettings.java index 45bd675..948f2c8 100644 --- a/record-api-migration/src/main/java/eu/europeana/api/record/migration/MigrationSettings.java +++ b/record-api-migration/src/main/java/eu/europeana/api/record/migration/MigrationSettings.java @@ -15,6 +15,7 @@ import eu.europeana.jena.encoder.library.DefaultUriNormalizer; import eu.europeana.jena.encoder.library.TemplateLibrary; import jakarta.annotation.Resource; +import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; @@ -30,7 +31,6 @@ public class MigrationSettings { @Resource(name = AppConfigConstants.BEAN_MEDIA_TYPES) private MediaTypes mediaTypes; - @Bean public FormatHandlerRegistry getFormatHandlerRegistry() { return new FormatHandlerRegistry( diff --git a/record-api-migration/src/main/java/eu/europeana/api/record/migration/RunMigration.java b/record-api-migration/src/main/java/eu/europeana/api/record/migration/RunMigration.java index d660817..f3c7604 100755 --- a/record-api-migration/src/main/java/eu/europeana/api/record/migration/RunMigration.java +++ b/record-api-migration/src/main/java/eu/europeana/api/record/migration/RunMigration.java @@ -3,12 +3,14 @@ import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.CommandLineRunner; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration; import org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration; import org.springframework.context.ConfigurableApplicationContext; +import org.springframework.context.annotation.PropertySource; import org.w3c.dom.Document; import javax.xml.transform.Transformer; @@ -35,8 +37,17 @@ // DataSources are manually configured (for EM and batch DBs) DataSourceAutoConfiguration.class }) +@PropertySource( + value = {"classpath:migration.properties", "classpath:migration.user.properties"}, + ignoreResourceNotFound = true) public class RunMigration implements CommandLineRunner { + @Value("${source.file}") + private String sourceDirectory; + + @Value("${target.file}") + private String targetDirectory; + private PrintStream progressLog; private Transformer transformer = null; private Collection processed = new HashSet(); @@ -44,6 +55,7 @@ public class RunMigration implements CommandLineRunner { @Autowired private MigrationHandler handler; + public static void main(String[] args) { ConfigurableApplicationContext context = SpringApplication.run(RunMigration.class, args); System.exit(SpringApplication.exit(context)); @@ -51,8 +63,8 @@ public static void main(String[] args) { @Override public void run(String... args) throws Exception { - File src = new File("C:\\Work\\incoming\\Record v3\\source\\"); - File logDir = new File("C:\\Work\\incoming\\Record v3\\target"); + File src = new File(sourceDirectory); + File logDir = new File(targetDirectory); PrintStream out = new PrintStream(new File(logDir, "run.log")); PrintStream error = new PrintStream(new File(logDir, "error.log")); try { diff --git a/record-api-migration/src/main/resources/migration.properties b/record-api-migration/src/main/resources/migration.properties index bb4d00d..90645e8 100644 --- a/record-api-migration/src/main/resources/migration.properties +++ b/record-api-migration/src/main/resources/migration.properties @@ -1,3 +1,8 @@ # mongo url mongo.connectionUrl= mongo.record.database= + + +## directories +source.file= +target.file= From 3f9d4e238ddb7515a618e827f12d41da47b16734 Mon Sep 17 00:00:00 2001 From: srishti Date: Tue, 2 Apr 2024 15:52:01 +0200 Subject: [PATCH 4/4] EA-3681 sonarq --- .../java/eu/europeana/api/record/migration/MigrationHandler.java | 1 - .../java/eu/europeana/api/record/migration/RunMigration.java | 1 - 2 files changed, 2 deletions(-) diff --git a/record-api-migration/src/main/java/eu/europeana/api/record/migration/MigrationHandler.java b/record-api-migration/src/main/java/eu/europeana/api/record/migration/MigrationHandler.java index 0b61603..e86ca73 100755 --- a/record-api-migration/src/main/java/eu/europeana/api/record/migration/MigrationHandler.java +++ b/record-api-migration/src/main/java/eu/europeana/api/record/migration/MigrationHandler.java @@ -3,7 +3,6 @@ import dev.morphia.query.filters.Filters; import eu.europeana.api.edm.EDM; import eu.europeana.api.format.RdfFormat; -import eu.europeana.api.record.db.repository.RecordRepository; import eu.europeana.api.record.io.FormatHandlerRegistry; import eu.europeana.api.record.migration.RecordDomProcessor.Result; import eu.europeana.api.record.model.*; diff --git a/record-api-migration/src/main/java/eu/europeana/api/record/migration/RunMigration.java b/record-api-migration/src/main/java/eu/europeana/api/record/migration/RunMigration.java index f3c7604..d29d8cb 100755 --- a/record-api-migration/src/main/java/eu/europeana/api/record/migration/RunMigration.java +++ b/record-api-migration/src/main/java/eu/europeana/api/record/migration/RunMigration.java @@ -55,7 +55,6 @@ public class RunMigration implements CommandLineRunner { @Autowired private MigrationHandler handler; - public static void main(String[] args) { ConfigurableApplicationContext context = SpringApplication.run(RunMigration.class, args); System.exit(SpringApplication.exit(context));