Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/ted4 173 #1

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions ted_sws/alignment_oracle/adapters/limes_alignment_engine.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import pathlib
import subprocess
import tempfile
Expand All @@ -23,10 +24,12 @@ def execute(self, limes_config_params: LimesConfigParams):
:return:
"""
limes_xml_config = generate_xml_config_from_limes_config(limes_config_params=limes_config_params)
temp_file = tempfile.NamedTemporaryFile()
temp_file.write(limes_xml_config.encode(encoding="utf-8"))
temp_file_name = None
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
temp_file.write(limes_xml_config.encode(encoding="utf-8"))
temp_file_name = temp_file.name
self.execute_from_file_config(config_file_path=pathlib.Path(temp_file.name))
temp_file.close()
os.unlink(temp_file_name)

def execute_from_file_config(self, config_file_path: pathlib.Path):
"""
Expand Down
10 changes: 6 additions & 4 deletions ted_sws/alignment_oracle/services/generate_alignment_links.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import pathlib
import tempfile

Expand Down Expand Up @@ -57,9 +58,10 @@ def generate_alignment_links_for_notice(notice: Notice, sparql_endpoint: str,
:return:
"""
notice_rdf_manifestation = notice.distilled_rdf_manifestation.object_data
notice_rdf_file = tempfile.NamedTemporaryFile(suffix=".ttl")
notice_rdf_file.write(notice_rdf_manifestation.encode(encoding="utf-8"))
notice_rdf_file_path = notice_rdf_file.name
notice_rdf_file_path = None
with tempfile.NamedTemporaryFile(suffix=".ttl", delete=False) as notice_rdf_file:
notice_rdf_file.write(notice_rdf_manifestation.encode(encoding="utf-8"))
notice_rdf_file_path = notice_rdf_file.name
with tempfile.TemporaryDirectory() as tmp_result_dir_path:
limes_config_params = limes_config_generator(source_sparql_endpoint=notice_rdf_file_path,
target_sparql_endpoint=sparql_endpoint,
Expand All @@ -71,5 +73,5 @@ def generate_alignment_links_for_notice(notice: Notice, sparql_endpoint: str,
delta=delta,
use_caching=use_caching
)
notice_rdf_file.close()
os.unlink(notice_rdf_file_path)
return result_alignment_links
41 changes: 23 additions & 18 deletions ted_sws/data_sampler/services/notice_xml_indexer.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import pathlib
import re
import tempfile
Expand Down Expand Up @@ -42,21 +43,23 @@ def index_notice_xslt(notice: Notice, xslt_transformer=None) -> Notice:
:return:
"""

with tempfile.NamedTemporaryFile() as fp:
xml_path = None
with tempfile.NamedTemporaryFile(delete=False) as fp:
fp.write(notice.xml_manifestation.object_data.encode("utf-8"))
xml_path = pathlib.Path(fp.name)
xslt_path = XSLT_FILES_PATH
xslt_path /= UNIQUE_XPATHS_XSLT_FILE_PATH

if xslt_transformer is None:
xslt_transformer = XMLPreprocessor()
result = xslt_transformer.transform_with_xslt_to_string(xml_path=xml_path,
xslt_path=xslt_path)
xpaths = result[len(XSLT_PREFIX_RESULT):].split(",")
xml_metadata = XMLMetadata()
xml_metadata.unique_xpaths = xpaths
xslt_path = XSLT_FILES_PATH
xslt_path /= UNIQUE_XPATHS_XSLT_FILE_PATH

if xslt_transformer is None:
xslt_transformer = XMLPreprocessor()
result = xslt_transformer.transform_with_xslt_to_string(xml_path=xml_path,
xslt_path=xslt_path)
os.unlink(xml_path)
xpaths = result[len(XSLT_PREFIX_RESULT):].split(",")
xml_metadata = XMLMetadata()
xml_metadata.unique_xpaths = xpaths

notice.set_xml_metadata(xml_metadata=xml_metadata)
notice.set_xml_metadata(xml_metadata=xml_metadata)

return notice

Expand Down Expand Up @@ -131,13 +134,15 @@ def _xpath_generator(xml_file):
else:
path.pop()

with tempfile.NamedTemporaryFile() as fp:
xml_temp_file = None
with tempfile.NamedTemporaryFile(delete=False) as fp:
fp.write(notice.xml_manifestation.object_data.encode("utf-8"))

xpaths = list(set(_xpath_generator(fp.name)))
xml_metadata = XMLMetadata()
xml_metadata.unique_xpaths = xpaths
notice.set_xml_metadata(xml_metadata=xml_metadata)
xml_temp_file = fp.name
xpaths = list(set(_xpath_generator(xml_temp_file)))
os.unlink(xml_temp_file)
xml_metadata = XMLMetadata()
xml_metadata.unique_xpaths = xpaths
notice.set_xml_metadata(xml_metadata=xml_metadata)

return notice

Expand Down
10 changes: 6 additions & 4 deletions ted_sws/master_data_registry/services/entity_deduplication.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import pathlib
import tempfile
from io import StringIO
Expand Down Expand Up @@ -34,9 +35,10 @@ def generate_mdr_alignment_links(merged_rdf_fragments: rdflib.Graph, cet_uri: st
:param mdr_sparql_endpoint:
:return:
"""
tmp_rdf_file = tempfile.NamedTemporaryFile(suffix=".ttl")
tmp_rdf_file.write(str(merged_rdf_fragments.serialize(format="turtle")).encode(encoding="utf-8"))
tmp_rdf_file_path = tmp_rdf_file.name
tmp_rdf_file_path = None
with tempfile.NamedTemporaryFile(suffix=".ttl", delete=False) as tmp_rdf_file:
tmp_rdf_file.write(str(merged_rdf_fragments.serialize(format="turtle")).encode(encoding="utf-8"))
tmp_rdf_file_path = tmp_rdf_file.name
limes_config_generator = get_limes_config_generator_by_cet_uri(cet_uri=cet_uri)
with tempfile.TemporaryDirectory() as tmp_result_dir_path:
target_sparql_endpoint = mdr_sparql_endpoint if mdr_sparql_endpoint else tmp_rdf_file_path
Expand All @@ -49,7 +51,7 @@ def generate_mdr_alignment_links(merged_rdf_fragments: rdflib.Graph, cet_uri: st
limes_config_params.target.data_type = TURTLE_SOURCE_DATA_TYPE
alignment_links = generate_alignment_links(limes_config_params=limes_config_params, threshold=0.95,
use_caching=False)
tmp_rdf_file.close()
os.unlink(tmp_rdf_file_path)
alignment_graph = rdflib.Graph()
alignment_graph.parse(StringIO(alignment_links), format="nt")
return alignment_graph
Expand Down
11 changes: 8 additions & 3 deletions ted_sws/notice_publisher/services/notice_publisher.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import base64
import os
import pathlib
import tempfile

Expand Down Expand Up @@ -31,16 +32,20 @@ def publish_notice(notice: Notice, publisher: SFTPPublisherABC = None,

package_content = base64.b64decode(bytes(mets_manifestation.object_data, encoding='utf-8'), validate=True)
remote_notice_path = f"{remote_folder_path}/{package_name}"
source_file = tempfile.NamedTemporaryFile()
source_file.write(package_content)
source_file_path = None
with tempfile.NamedTemporaryFile(delete=False) as source_file:
source_file.write(package_content)
source_file_path = source_file.name
try:
publisher.connect()
if publisher.publish(source_path=str(pathlib.Path(source_file.name)),
if publisher.publish(source_path=str(pathlib.Path(source_file_path)),
remote_path=remote_notice_path):
notice.update_status_to(NoticeStatus.PUBLISHED)
publisher.disconnect()
except Exception as e:
raise Exception(f"Notice {notice.ted_id} could not be published: " + str(e))
finally:
os.unlink(source_file_path)

return notice.status == NoticeStatus.PUBLISHED

Expand Down
14 changes: 9 additions & 5 deletions tests/e2e/notice_publisher/adapters/test_notice_publisher.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import tempfile

import pytest
Expand All @@ -17,8 +18,10 @@ def test_sftp_notice_publisher():
sftp_publisher.port = config.SFTP_PUBLISH_PORT
sftp_publisher.connect()

source_file = tempfile.NamedTemporaryFile()
source_file.write(bytes("NOTICE", encoding='utf-8'))
source_file_path = None
with tempfile.NamedTemporaryFile(delete=False) as source_file:
source_file.write(bytes("NOTICE", encoding='utf-8'))
source_file_path = source_file.name

invalid_remote_path = "/upload"
remote_path = "/upload/sftp_notice.zip"
Expand All @@ -27,18 +30,19 @@ def test_sftp_notice_publisher():
sftp_publisher.remove(remote_path)

with pytest.raises(Exception):
sftp_publisher.publish(source_file.name + "invalid", invalid_remote_path)
sftp_publisher.publish(source_file_path + "invalid", invalid_remote_path)

with pytest.raises(Exception):
sftp_publisher.publish(source_file.name, None)
sftp_publisher.publish(source_file_path, None)

assert not sftp_publisher.exists(remote_path)
published = sftp_publisher.publish(source_file.name, remote_path)
published = sftp_publisher.publish(source_file_path, remote_path)
assert published
assert sftp_publisher.exists(remote_path)
sftp_publisher.remove(remote_path)
assert not sftp_publisher.exists(remote_path)

os.unlink(source_file_path)
sftp_publisher.disconnect()


Expand Down
Loading