Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DRAFT] Publish collection membership data to purl #823

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 77 additions & 3 deletions app/services/publish_metadata_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def publish
release_tags = ReleaseTags.for(item: item)

transfer_metadata(release_tags)
bookkeep_collections
publish_notify_on_success
end

Expand All @@ -37,8 +38,61 @@ def transfer_metadata(release_tags)
transfer_to_document_store(PublicDescMetadataService.new(item).to_xml, 'mods')
end

# Maintain bidirectional symlinks from:
# - an item to the collections it belongs to
# - a collection to the items within
def bookkeep_collections
FileUtils.mkdir_p(item_collections_dir)
existing_collections = Dir.children(item_collections_dir)

# Write bidirectional symlinks for collection membership
item.collections.each do |coll|
Rails.logger.debug("[Publish][#{item.pid}] Adding collection association with #{coll.pid}")
collection_items_dir = collection_member_dir(coll.pid)
FileUtils.mkdir_p(collection_items_dir)
FileUtils.ln_s(item.content_dir, File.join(collection_items_dir, local_part(item.pid)), force: true)
FileUtils.ln_s(coll.content_dir, File.join(item_collections_dir, local_part(coll.pid)), force: true)
end

# Remove bidirectional collection membership for collections no longer asserted
(existing_collections - item.collections.map { |coll| local_part(coll.pid) }).each do |coll_pid|
Rails.logger.debug("[Publish][#{item.pid}] Removing collection association with #{coll_pid}")
FileUtils.rm(File.join(item_collections_dir, coll_pid), force: true)

collection_items_dir = collection_member_dir(coll_pid)
next unless Dir.exist? collection_items_dir

FileUtils.rm(File.join(collection_items_dir, local_part(item.pid)), force: true)
end
end

# Remove all collection membership symlinks
def unbookkeep_collections
return unless Dir.exist? item_collections_dir

existing_collections = Dir.children(item_collections_dir)
existing_collections.each do |coll_pid|
collection_items_dir = collection_member_dir(coll_pid)
next unless Dir.exist? collection_items_dir

FileUtils.rm(File.join(collection_items_dir, local_part(item.pid)))
end

members_dir = collection_member_dir(item.pid)
if Dir.exist? members_dir
existing_members = Dir.children(members_dir)
existing_members.each do |item_pid|
item_dir = item_collections_dir(item_pid)
next unless Dir.exist? item_dir

FileUtils.rm(File.join(item_dir, local_part(item.pid)))
end
end
end

# Clear out the document cache for this item
def unpublish
unbookkeep_collections
PruneService.new(druid: purl_druid).prune!
publish_delete_on_success
end
Expand All @@ -62,6 +116,24 @@ def purl_druid
@purl_druid ||= DruidTools::PurlDruid.new item.pid, Settings.stacks.local_document_cache_root
end

# Get the collection membership directory for an item
def item_collections_dir(item_pid = nil)
item_druid = if item_pid
DruidTools::PurlDruid.new item_pid, Settings.stacks.local_document_cache_root
else
purl_druid
end

File.join(item_druid.content_dir, 'is_member_of_collection')
end

# Get the collection members directory for a collection
def collection_member_dir(collection_pid)
collection_druid = DruidTools::PurlDruid.new collection_pid, Settings.stacks.local_document_cache_root
File.join(collection_druid.content_dir, 'has_member_of_collection')
end


##
# When publishing a PURL, we notify purl-fetcher of changes.
#
Expand All @@ -77,10 +149,12 @@ def publish_delete_on_success
end

def purl_services_url
id = item.pid.gsub(/^druid:/, '')

raise 'You have not configured perl-fetcher (Settings.purl_services_url).' unless Settings.purl_services_url

"#{Settings.purl_services_url}/purls/#{id}"
"#{Settings.purl_services_url}/purls/#{local_part(item.pid)}"
end

def local_part(pid)
Dor::PidUtils.remove_druid_prefix(pid)
end
end
59 changes: 54 additions & 5 deletions spec/services/publish_metadata_service_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,44 @@
druid1 = DruidTools::Druid.new item.pid, purl_root
druid1.mkdir
File.open(File.join(druid1.path, 'tmpfile'), 'w') { |f| f.write 'junk' }
expect(service).to receive(:unbookkeep_collections)

service.publish
expect(File).not_to exist(druid1.path) # it should now be gone
expect(WebMock).to have_requested(:delete, 'example.com/purl/purls/ab123cd4567')
end
end

let(:release_tags) do
{ 'Searchworks' => { 'release' => true }, 'Some_special_place' => { 'release' => true } }
end

# the individual steps are tested below
context 'a public item' do
before do
allow(ReleaseTags).to receive(:for).and_return(release_tags)
item.rightsMetadata.content = "<rightsMetadata><access type='discover'><machine><world/></machine></access></rightsMetadata>"
end

it 'calls the appropriate subfunctions' do
allow(service).to receive(:transfer_metadata)
allow(service).to receive(:bookkeep_collections)
allow(service).to receive(:publish_notify_on_success)

service.publish

expect(service).to have_received(:transfer_metadata).with(release_tags)
expect(service).to have_received(:bookkeep_collections)
expect(service).to have_received(:publish_notify_on_success)
end
end
end

describe '#transfer_metadata' do
before do
allow(OpenURI).to receive(:open_uri).with('https://purl-test.stanford.edu/ab123cd4567.xml').and_return('<xml/>')
end

context 'copies to the document cache' do
let(:mods) do
<<-EOXML
Expand Down Expand Up @@ -102,7 +134,6 @@
expect_any_instance_of(described_class).to receive(:transfer_to_document_store).with(/<oai_dc:dc/, 'dc')
expect_any_instance_of(described_class).to receive(:transfer_to_document_store).with(/<publicObject/, 'public')
expect_any_instance_of(described_class).to receive(:transfer_to_document_store).with(/<mods:mods/, 'mods')
expect_any_instance_of(described_class).to receive(:publish_notify_on_success).with(no_args)
end

let(:release_tags) do
Expand All @@ -111,7 +142,7 @@

it 'identityMetadta, contentMetadata, rightsMetadata, generated dublin core, and public xml' do
item.rightsMetadata.content = "<rightsMetadata><access type='discover'><machine><world/></machine></access></rightsMetadata>"
service.publish
service.send(:transfer_metadata, release_tags)
expect(DublinCoreService).to have_received(:new).with(item)
expect(PublicXmlService).to have_received(:new).with(item, released_for: release_tags)
expect(PublicDescMetadataService).to have_received(:new).with(item)
Expand All @@ -120,7 +151,7 @@
it 'even when rightsMetadata uses xml namespaces' do
item.rightsMetadata.content = %q(<rightsMetadata xmlns="http://hydra-collab.stanford.edu/schemas/rightsMetadata/v1">
<access type='discover'><machine><world/></machine></access></rightsMetadata>)
service.publish
service.send(:transfer_metadata, release_tags)
end
end

Expand All @@ -140,16 +171,34 @@
expect_any_instance_of(described_class).to receive(:transfer_to_document_store).with(/<oai_dc:dc/, 'dc')
expect_any_instance_of(described_class).to receive(:transfer_to_document_store).with(/<publicObject/, 'public')
expect_any_instance_of(described_class).to receive(:transfer_to_document_store).with(/<mods:mods/, 'mods')
expect_any_instance_of(described_class).to receive(:publish_notify_on_success).with(no_args)
end

it 'ignores missing data' do
service.publish
service.send(:transfer_metadata, {})
end
end
end
end

describe '#bookkeep_collections' do
it 'adds a link from the item to the collection' do
end

it 'adds a link from the collection to the item' do
end

it 'cleans up links that are not expressed in the item' do
end
end

describe '#unbookkeep_collections' do
it 'cleans up links from collections to this item' do
end

it 'cleans up links from items to this collection' do
end
end

describe '#publish_notify_on_success' do
subject(:notify) { service.send(:publish_notify_on_success) }

Expand Down