Skip to content

Commit

Permalink
Reorganize IO operations for remaining write-and-read cases
Browse files Browse the repository at this point in the history
  • Loading branch information
gkostkowski committed Jul 29, 2024
1 parent caeebe6 commit 0f43571
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 32 deletions.
9 changes: 6 additions & 3 deletions ted_sws/alignment_oracle/adapters/limes_alignment_engine.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import pathlib
import subprocess
import tempfile
Expand All @@ -23,10 +24,12 @@ def execute(self, limes_config_params: LimesConfigParams):
:return:
"""
limes_xml_config = generate_xml_config_from_limes_config(limes_config_params=limes_config_params)
temp_file = tempfile.NamedTemporaryFile()
temp_file.write(limes_xml_config.encode(encoding="utf-8"))
temp_file_name = None
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
temp_file.write(limes_xml_config.encode(encoding="utf-8"))
temp_file_name = temp_file.name
self.execute_from_file_config(config_file_path=pathlib.Path(temp_file.name))
temp_file.close()
os.unlink(temp_file_name)

def execute_from_file_config(self, config_file_path: pathlib.Path):
"""
Expand Down
10 changes: 6 additions & 4 deletions ted_sws/alignment_oracle/services/generate_alignment_links.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import pathlib
import tempfile

Expand Down Expand Up @@ -57,9 +58,10 @@ def generate_alignment_links_for_notice(notice: Notice, sparql_endpoint: str,
:return:
"""
notice_rdf_manifestation = notice.distilled_rdf_manifestation.object_data
notice_rdf_file = tempfile.NamedTemporaryFile(suffix=".ttl")
notice_rdf_file.write(notice_rdf_manifestation.encode(encoding="utf-8"))
notice_rdf_file_path = notice_rdf_file.name
notice_rdf_file_path = None
with tempfile.NamedTemporaryFile(suffix=".ttl", delete=False) as notice_rdf_file:
notice_rdf_file.write(notice_rdf_manifestation.encode(encoding="utf-8"))
notice_rdf_file_path = notice_rdf_file.name
with tempfile.TemporaryDirectory() as tmp_result_dir_path:
limes_config_params = limes_config_generator(source_sparql_endpoint=notice_rdf_file_path,
target_sparql_endpoint=sparql_endpoint,
Expand All @@ -71,5 +73,5 @@ def generate_alignment_links_for_notice(notice: Notice, sparql_endpoint: str,
delta=delta,
use_caching=use_caching
)
notice_rdf_file.close()
os.unlink(notice_rdf_file_path)
return result_alignment_links
28 changes: 15 additions & 13 deletions ted_sws/data_sampler/services/notice_xml_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,21 +43,23 @@ def index_notice_xslt(notice: Notice, xslt_transformer=None) -> Notice:
:return:
"""

with tempfile.NamedTemporaryFile() as fp:
xml_path = None
with tempfile.NamedTemporaryFile(delete=False) as fp:
fp.write(notice.xml_manifestation.object_data.encode("utf-8"))
xml_path = pathlib.Path(fp.name)
xslt_path = XSLT_FILES_PATH
xslt_path /= UNIQUE_XPATHS_XSLT_FILE_PATH

if xslt_transformer is None:
xslt_transformer = XMLPreprocessor()
result = xslt_transformer.transform_with_xslt_to_string(xml_path=xml_path,
xslt_path=xslt_path)
xpaths = result[len(XSLT_PREFIX_RESULT):].split(",")
xml_metadata = XMLMetadata()
xml_metadata.unique_xpaths = xpaths

notice.set_xml_metadata(xml_metadata=xml_metadata)
xslt_path = XSLT_FILES_PATH
xslt_path /= UNIQUE_XPATHS_XSLT_FILE_PATH

if xslt_transformer is None:
xslt_transformer = XMLPreprocessor()
result = xslt_transformer.transform_with_xslt_to_string(xml_path=xml_path,
xslt_path=xslt_path)
os.unlink(xml_path)
xpaths = result[len(XSLT_PREFIX_RESULT):].split(",")
xml_metadata = XMLMetadata()
xml_metadata.unique_xpaths = xpaths

notice.set_xml_metadata(xml_metadata=xml_metadata)

return notice

Expand Down
10 changes: 6 additions & 4 deletions ted_sws/master_data_registry/services/entity_deduplication.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import pathlib
import tempfile
from io import StringIO
Expand Down Expand Up @@ -34,9 +35,10 @@ def generate_mdr_alignment_links(merged_rdf_fragments: rdflib.Graph, cet_uri: st
:param mdr_sparql_endpoint:
:return:
"""
tmp_rdf_file = tempfile.NamedTemporaryFile(suffix=".ttl")
tmp_rdf_file.write(str(merged_rdf_fragments.serialize(format="turtle")).encode(encoding="utf-8"))
tmp_rdf_file_path = tmp_rdf_file.name
tmp_rdf_file_path = None
with tempfile.NamedTemporaryFile(suffix=".ttl", delete=False) as tmp_rdf_file:
tmp_rdf_file.write(str(merged_rdf_fragments.serialize(format="turtle")).encode(encoding="utf-8"))
tmp_rdf_file_path = tmp_rdf_file.name
limes_config_generator = get_limes_config_generator_by_cet_uri(cet_uri=cet_uri)
with tempfile.TemporaryDirectory() as tmp_result_dir_path:
target_sparql_endpoint = mdr_sparql_endpoint if mdr_sparql_endpoint else tmp_rdf_file_path
Expand All @@ -49,7 +51,7 @@ def generate_mdr_alignment_links(merged_rdf_fragments: rdflib.Graph, cet_uri: st
limes_config_params.target.data_type = TURTLE_SOURCE_DATA_TYPE
alignment_links = generate_alignment_links(limes_config_params=limes_config_params, threshold=0.95,
use_caching=False)
tmp_rdf_file.close()
os.unlink(tmp_rdf_file_path)
alignment_graph = rdflib.Graph()
alignment_graph.parse(StringIO(alignment_links), format="nt")
return alignment_graph
Expand Down
11 changes: 8 additions & 3 deletions ted_sws/notice_publisher/services/notice_publisher.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import base64
import os
import pathlib
import tempfile

Expand Down Expand Up @@ -31,16 +32,20 @@ def publish_notice(notice: Notice, publisher: SFTPPublisherABC = None,

package_content = base64.b64decode(bytes(mets_manifestation.object_data, encoding='utf-8'), validate=True)
remote_notice_path = f"{remote_folder_path}/{package_name}"
source_file = tempfile.NamedTemporaryFile()
source_file.write(package_content)
source_file_path = None
with tempfile.NamedTemporaryFile(delete=False) as source_file:
source_file.write(package_content)
source_file_path = source_file.name
try:
publisher.connect()
if publisher.publish(source_path=str(pathlib.Path(source_file.name)),
if publisher.publish(source_path=str(pathlib.Path(source_file_path)),
remote_path=remote_notice_path):
notice.update_status_to(NoticeStatus.PUBLISHED)
publisher.disconnect()
except Exception as e:
raise Exception(f"Notice {notice.ted_id} could not be published: " + str(e))
finally:
os.unlink(source_file_path)

return notice.status == NoticeStatus.PUBLISHED

Expand Down
14 changes: 9 additions & 5 deletions tests/e2e/notice_publisher/adapters/test_notice_publisher.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import tempfile

import pytest
Expand All @@ -17,8 +18,10 @@ def test_sftp_notice_publisher():
sftp_publisher.port = config.SFTP_PUBLISH_PORT
sftp_publisher.connect()

source_file = tempfile.NamedTemporaryFile()
source_file.write(bytes("NOTICE", encoding='utf-8'))
source_file_path = None
with tempfile.NamedTemporaryFile(delete=False) as source_file:
source_file.write(bytes("NOTICE", encoding='utf-8'))
source_file_path = source_file.name

invalid_remote_path = "/upload"
remote_path = "/upload/sftp_notice.zip"
Expand All @@ -27,18 +30,19 @@ def test_sftp_notice_publisher():
sftp_publisher.remove(remote_path)

with pytest.raises(Exception):
sftp_publisher.publish(source_file.name + "invalid", invalid_remote_path)
sftp_publisher.publish(source_file_path + "invalid", invalid_remote_path)

with pytest.raises(Exception):
sftp_publisher.publish(source_file.name, None)
sftp_publisher.publish(source_file_path, None)

assert not sftp_publisher.exists(remote_path)
published = sftp_publisher.publish(source_file.name, remote_path)
published = sftp_publisher.publish(source_file_path, remote_path)
assert published
assert sftp_publisher.exists(remote_path)
sftp_publisher.remove(remote_path)
assert not sftp_publisher.exists(remote_path)

os.unlink(source_file_path)
sftp_publisher.disconnect()


Expand Down

0 comments on commit 0f43571

Please sign in to comment.