Skip to content

Commit

Permalink
Merge pull request #53 from ruegamer/datacite-xml-export
Browse files Browse the repository at this point in the history
Added datacite xml export in qucosa manager
  • Loading branch information
claussni authored Dec 14, 2017
2 parents 69846cb + e01ca96 commit fb24ab7
Show file tree
Hide file tree
Showing 8 changed files with 652 additions and 27 deletions.
100 changes: 73 additions & 27 deletions Classes/Controller/GetFileController.php
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,11 @@
* 3. METS from Kitodo.Publication (this extension)
* http://localhost/api/3/preview/
*
* 4. DataCite from Kitodo.Publication (this extension)
*
* @author Alexander Bigga <[email protected]>
* @author Ralf Claussnitzer <[email protected]>
* @author Florian Rügamer <[email protected]>
*/

use TYPO3\CMS\Core\Utility\GeneralUtility;
Expand All @@ -63,7 +65,6 @@ class GetFileController extends \EWW\Dpf\Controller\AbstractController
*/
protected $clientConfigurationManager;


public function attachmentAction()
{

Expand All @@ -82,38 +83,15 @@ public function attachmentAction()

if ($document) {

// Build METS-Data
$exporter = new \EWW\Dpf\Services\MetsExporter();

$fileData = $document->getCurrentFileData();

$exporter->setFileData($fileData);

$exporter->setMods($document->getXmlData());

$exporter->setSlubInfo($document->getSlubInfoData());

if (empty($document->getObjectIdentifier())) {

$exporter->setObjId($document->getUid());

} else {

$exporter->setObjId($document->getObjectIdentifier());

}

$exporter->buildMets();

$metsXml = $exporter->getMetsData();

$metsXml = $this->buildMetsXml($document);
$this->response->setHeader('Content-Type', 'text/xml; charset=UTF-8');

return $metsXml;

} else {

$this->response->setStatus(404);
return 'No such document';

}

case 'attachment':
Expand Down Expand Up @@ -155,6 +133,37 @@ public function attachmentAction()

break;

case 'dataCite':

$qid = $piVars['qid'];
$source = explode(':', $qid);
if($source[0] == 'qucosa') {

$path = rtrim('http://' . $fedoraHost,"/").'/fedora/objects/'.$piVars['qid'].'/methods/qucosa:SDef/getMETSDissemination?supplement=yes';
$metsXml = str_replace('&', '&amp;', file_get_contents($path));
$dataCiteXml = \EWW\Dpf\Helper\DataCiteXml::convertFromMetsXml($metsXml);

} elseif($document = $this->documentRepository->findByUid($piVars['qid'])) {

$metsXml = str_replace('&', '&amp;', $this->buildMetsXml($document));
$dataCiteXml = \EWW\Dpf\Helper\DataCiteXml::convertFromMetsXml($metsXml);

} else {

$this->response->setStatus(404);
return 'No such document';

}
$dom = new \DOMDocument('1.0', 'UTF-8');
$dom->loadXML($dataCiteXml);
$title = $dom->getElementsByTagName('title')[0];

$this->response->setHeader('Content-Disposition', 'attachment; filename="' . self::sanitizeFilename($title->nodeValue) . '.DataCite.xml"');
$this->response->setHeader('Content-Type', 'text/xml; charset=UTF-8');
return $dataCiteXml;

break;

default:

$this->response->setStatus(404);
Expand Down Expand Up @@ -223,5 +232,42 @@ public function attachmentAction()

}

private static function sanitizeFilename($filename)
{
// remove anything which isn't a word, whitespace, number or any of the following caracters -_~,;[]().
$filename = mb_ereg_replace("([^\w\s\d\-_~,;\[\]\(\).])", '', $filename);
// turn diacritical characters to ASCII
setlocale(LC_ALL, 'en_US.utf8');
$filename = iconv('utf-8', 'us-ascii//TRANSLIT', trim($filename));
// replace whitespaces with underscore
$filename = preg_replace('/\s+/', '_', $filename);

return $filename;
}

private function buildMetsXml($document)
{

$exporter = new \EWW\Dpf\Services\MetsExporter();
$fileData = $document->getCurrentFileData();
$exporter->setFileData($fileData);
$exporter->setMods($document->getXmlData());
$exporter->setSlubInfo($document->getSlubInfoData());

if (empty($document->getObjectIdentifier())) {

$exporter->setObjId($document->getUid());

} else {

$exporter->setObjId($document->getObjectIdentifier());

}

$exporter->buildMets();
$metsXml = $exporter->getMetsData();

return $metsXml;
}
}

138 changes: 138 additions & 0 deletions Classes/Helper/DataCiteXml.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
<?php
namespace EWW\Dpf\Helper;

/*
* This file is part of the TYPO3 CMS project.
*
* It is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License, either version 2
* of the License, or any later version.
*
* For the full copyright and license information, please read the
* LICENSE.txt file that was distributed with this source code.
*
* The TYPO3 project - inspiring people to share!
*/

use TYPO3\CMS\Core\Utility\GeneralUtility;

class DataCiteXml
{

/**
* Generates DataCite.xml from a given METS.xml
*
* @param string $metsXml
* @return string $dataCiteXml
*/
public static function convertFromMetsXml($metsXml)
{

$metsXml = simplexml_load_string($metsXml, NULL, NULL, "http://www.w3.org/2001/XMLSchema-instance");
$metsXml->registerXPathNamespace('mods', 'http://www.loc.gov/mods/v3');
$metsXml->registerXPathNamespace('slub', 'http://slub-dresden.de/');

// doi
$metsDoi = $metsXml->xpath("//mods:identifier[@type='qucosa:doi']");
if(!empty($metsDoi)) {
$dataCiteDoi = $metsDoi[0];
} else {
$dataCiteDoi = '10.1000/1'; // http://www.doi.org/index.html as default
}

// creators
$metsCreator = $metsXml->xpath("//mods:name[@type='personal']");
$dataCiteCreator = array();
foreach($metsCreator as $creator)
{
$creator->registerXPathNamespace('mods', 'http://www.loc.gov/mods/v3');
$names = array();
$givenName = $creator->xpath(".//mods:namePart[@type='given']");
$familyName = $creator->xpath(".//mods:namePart[@type='family']");
$creatorName = $creator->xpath(".//mods:namePart[@type='displayForm']");
if(empty($creatorName)) {
if(!empty($givenName) && !empty($familyName)) {
$creatorName = "<creatorName>{$familyName[0]}, {$givenName[0]}</creatorName>";
} else {
$creatorName = "";
}
} else {
$creatorName = "<creatorName>{$creatorName[0]}</creatorName>";
}
$givenName = (!empty($givenName)) ? "<givenName>{$givenName[0]}</givenName>" : "";
$familyName = (!empty($familyName)) ? "<familyName>{$familyName[0]}</familyName>" : "";
array_push($names, $creatorName, $givenName, $familyName);
$names = implode("", $names);
array_push($dataCiteCreator, "<creator>{$names}</creator>");
};
$dataCiteCreator = implode('', array_unique($dataCiteCreator));

// title
$metsTitle = $metsXml->xpath("//mods:titleInfo[@usage='primary']/mods:title");
$dataCiteTitle = (!empty($metsTitle)) ? "<title>{$metsTitle[0]}</title>" : "";

// subtitles
$metsSubTitles = $metsXml->xpath("//mods:titleInfo[@usage='primary']/mods:subTitle");
foreach($metsSubTitles as $title) {
$dataCiteTitle .= (!empty($title)) ? "<title titleType=\"Subtitle\">{$title}</title>" : "";
}

// publisher
$metsPublisher = $metsXml->xpath("//mods:name[@type='corporate'][@displayLabel='mapping-hack-other']/mods:namePart");
$dataCitePublisher = (!empty($metsPublisher)) ? $metsPublisher[0] : "";

// publication year
$metsPublicationYear = $metsXml->xpath("//mods:originInfo[@eventType='publication']/mods:dateIssued");
if(!empty($metsPublicationYear)) {
$dataCitePublicationYear = $metsPublicationYear[0];
} else {
$metsPublicationYear = $metsXml->xpath("//mods:originInfo/mods:dateIssued");
$dataCitePublicationYear = (!empty($metsPublicationYear)) ? $metsPublicationYear[0] : "";
}
if(strlen($dataCitePublicationYear) != 4) {
$dataCitePublicationYear = substr($dataCitePublicationYear, 0, 4);
}
$dataCitePublicationYear = (preg_match('/(19|20)\d{2}/', $dataCitePublicationYear)) ? $dataCitePublicationYear : "";

// subjects
$metsSubjects = $metsXml->xpath("//mods:classification[@authority='z']");
$dataCiteSubjects = '';
foreach(GeneralUtility::trimExplode(',', $metsSubjects[0]) as $subject) {
$dataCiteSubjects .= "<subject>{$subject}</subject>";
}

// language
$metsLanguage = $metsXml->xpath("//mods:language/mods:languageTerm[@authority='iso639-2b'][@type='code']");
$dataCiteLanguage = \EWW\Dpf\Helper\LanguageCode::convertFrom6392Bto6391($metsLanguage[0]);

/* // description
$metsDescription = $metsXml->xpath("//mods:abstract[@type='summary']");
$dataCiteDescription = (!empty($metsDescription)) ? "<description descriptionType=\"Abstract\">{$metsDescription[0]}</description>" : ""; */

// resource type
$slubResourceType = $metsXml->xpath("//slub:documentType");
$dataCiteResourceType = (!empty($slubResourceType)) ? $slubResourceType[0] : "";

$xml = simplexml_load_string(<<< XML
<?xml version="1.0" encoding="UTF-8"?>
<resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://datacite.org/schema/kernel-4" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4/metadata.xsd">
<identifier identifierType="DOI">{$dataCiteDoi}</identifier>
<creators>{$dataCiteCreator}</creators>
<titles>{$dataCiteTitle}</titles>
<publisher>{$dataCitePublisher}</publisher>
<publicationYear>{$dataCitePublicationYear}</publicationYear>
<subjects>{$dataCiteSubjects}</subjects>
<language>{$dataCiteLanguage}</language>
<resourceType resourceTypeGeneral="Text">{$dataCiteResourceType}</resourceType>
</resource>
XML
);

$dataCiteXml = new \DOMDocument('1.0', 'UTF-8');
$dataCiteXml->preserveWhiteSpace = false;
$dataCiteXml->formatOutput = true;
$dataCiteXml->loadXML($xml->asXML());

return($dataCiteXml->saveXML());
}
}
Loading

0 comments on commit fb24ab7

Please sign in to comment.