Skip to content

Commit

Permalink
Merge pull request #130 from mlibrary/ARC-13-Fix-display-of-titles-th…
Browse files Browse the repository at this point in the history
…at-have-unitdate-before-unittitle-content

Arc 13 fix display of titles that have unitdate before unittitle content
  • Loading branch information
gkostin1966 authored Jun 5, 2024
2 parents 9e2e2e4 + 56e105d commit 5e9fc54
Show file tree
Hide file tree
Showing 2 changed files with 122 additions and 32 deletions.
14 changes: 10 additions & 4 deletions lib/dul_arclight/normalized_title.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@ module Arclight
class NormalizedTitle
# @param [String] `title` from the `unittitle`
# @param [String] `date` from the `unitdate`
def initialize(title, date = nil)
@title = title.gsub(/\s*,\s*$/, '').strip if title.present?
def initialize(title, date = nil, append = true)
# @title = title.gsub(/\s*,\s*$/, '').strip if title.present?
@title = title.strip if title.present?
@date = date.strip if date.present?
@append = append
end

# @return [String] the normalized title/date
Expand All @@ -19,10 +21,14 @@ def to_s

private

attr_reader :title, :date, :default
attr_reader :title, :date, :append

def normalize
result = [title, date].compact.join(', ')
result = if append
[title, date].compact.join(', ')
else
title || date
end
raise Arclight::Exceptions::TitleNotFound if result.blank?

result
Expand Down
140 changes: 112 additions & 28 deletions lib/dul_arclight/traject/ead2_config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -171,18 +171,64 @@
# end

to_field 'title_filing_si', extract_xpath('/ead/eadheader/filedesc/titlestmt/titleproper[@type="filing"]')
to_field 'title_ssm' do |record, accumulator|
result = record.xpath('/ead/archdesc/did/unittitle[not(@type) or ( @type != "sort" )]')
result = result.collect do |n|
n.xpath('child::node()[not(self::unitdate)]').map(&:text)
end.join(' ')
to_field 'title_ssm' do |record, accumulator, context|
context.clipboard[:title_ssm] = true
result = +''
nodeset = record.xpath('/ead/archdesc/did/unittitle[not(@type) or ( @type != "sort" )]')
nodeset.each do |n|
# n.xpath('child::node()[not(self::unitdate)]').map(&:text)
first_date = true
n.children.each do |c|
result << case c.name
when 'unitdate'
context.clipboard[:title_ssm] = false
rv = +""
rv << "," if first_date
first_date = false
rv << if c['type'] == 'bulk'
" (majority within " + c.text.strip + ")"
else
" " + c.text.strip
end
else
c.text
end
end
end
result = result.gsub(/\s+,/, ',') # remove leading whitespace before comma
result = result.gsub(/,(\s|,)*,/, ',') # reduce multiple commas and interior whitespace to single comma
result = result.gsub(/(,)(\S)/, '\1 \2') # add whitespace after comma
result = result.gsub(/^,\s*/, '') # remove leading comma
accumulator << result
end
to_field 'title_formatted_ssm' do |record, accumulator|
result = record.xpath('/ead/archdesc/did/unittitle[not(@type) or ( @type != "sort" )]')
result = result.collect do |n|
n.xpath('child::node()[not(self::unitdate)]').to_s
end.join(' ')
to_field 'title_formatted_ssm' do |record, accumulator, context|
context.clipboard[:title_formatted_ssm] = true
result = +''
nodeset = record.xpath('/ead/archdesc/did/unittitle[not(@type) or ( @type != "sort" )]')
nodeset.each do |n|
# n.xpath('child::node()[not(self::unitdate)]').to_s
first_date = true
n.children.each do |c|
result << case c.name
when 'unitdate'
context.clipboard[:title_formatted_ssm] = false
rv = +""
rv << "," if first_date
first_date = false
rv << if c['type'] == 'bulk'
" (majority within " + c.to_s.strip + ")"
else
" " + c.to_s.strip
end
else
c.to_s
end
end
end
result = result.gsub(/\s+,/, ',') # remove leading whitespace before comma
result = result.gsub(/,(\s|,)*,/, ',') # reduce multiple commas and interior whitespace to single comma
result = result.gsub(/(,)(\S)/, '\1 \2') # add whitespace after comma
result = result.gsub(/^,\s*/, '') # remove leading comma
accumulator << result
end
to_field 'title_teim', extract_xpath('/ead/archdesc/did/unittitle[not(@type) or ( @type != "sort" )]')
Expand Down Expand Up @@ -238,16 +284,16 @@

# DUL CUSTOMIZATION: use DUL rules for NormalizedDate in title normalization
to_field 'normalized_title_ssm' do |_record, accumulator, context|
dates = context.output_hash['normalized_date_ssm']&.first
date = context.output_hash['normalized_date_ssm']&.first
title = context.output_hash['title_ssm']&.first
accumulator << Arclight::NormalizedTitle.new(title, dates).to_s
accumulator << Arclight::NormalizedTitle.new(title, date, context.clipboard[:title_ssm]).to_s
end

# DUL CUSTOMIZATION: preserve formatting tags in titles
to_field 'normalized_title_formatted_ssm' do |_record, accumulator, context|
dates = context.output_hash['normalized_date_ssm']&.first
date = context.output_hash['normalized_date_ssm']&.first
title = context.output_hash['title_formatted_ssm']&.first.to_s
accumulator << Arclight::NormalizedTitle.new(title, dates).to_s
accumulator << Arclight::NormalizedTitle.new(title, date, context.clipboard[:title_formatted_ssm]).to_s
end

to_field 'collection_ssm' do |_record, accumulator, context|
Expand Down Expand Up @@ -525,18 +571,56 @@
end

to_field 'title_filing_si', extract_xpath('./did/unittitle[not(@type) or ( @type != "sort" )]'), first_only
to_field 'title_ssm' do |record, accumulator|
result = record.xpath('./did/unittitle[not(@type) or ( @type != "sort" )]')
result = result.collect do |n|
n.xpath('child::node()[not(self::unitdate)]').map(&:text)
end.join(' ')
to_field 'title_ssm' do |record, accumulator, context|
context.clipboard[:title_ssm] = true
result = +''
nodeset = record.xpath('./did/unittitle[not(@type) or ( @type != "sort" )]')
nodeset.each do |n|
# n.xpath('child::node()[not(self::unitdate)]').map(&:text)
n.children.each do |c|
result << case c.name
when 'unitdate'
context.clipboard[:title_ssm] = false
if c['type'] == 'bulk'
" (majority within " + c.text.strip + ")"
else
" " + c.text.strip
end
else
c.text
end
end
end
result = result.gsub(/\s+,/, ',') # remove leading whitespace before comma
result = result.gsub(/,(\s|,)*,/, ',') # reduce multiple commas and interior whitespace to single comma
result = result.gsub(/(,)(\S)/, '\1 \2') # add whitespace after comma
result = result.gsub(/^,\s*/, '') # remove leading comma
accumulator << result
end
to_field 'title_formatted_ssm' do |record, accumulator|
result = record.xpath('./did/unittitle[not(@type) or ( @type != "sort" )]')
result = result.collect do |n|
n.xpath('child::node()[not(self::unitdate)]').to_s
end.join(' ')
to_field 'title_formatted_ssm' do |record, accumulator, context|
context.clipboard[:title_formatted_ssm] = true
result = +''
nodeset = record.xpath('./did/unittitle[not(@type) or ( @type != "sort" )]')
nodeset.each do |n|
# n.xpath('child::node()[not(self::unitdate)]').to_s
n.children.each do |c|
result << case c.name
when 'unitdate'
context.clipboard[:title_formatted_ssm] = false
if c['type'] == 'bulk'
" (majority within " + c.to_s.strip + ")"
else
" " + c.to_s.strip
end
else
c.to_s
end
end
end
result = result.gsub(/\s+,/, ',') # remove leading whitespace before comma
result = result.gsub(/,(\s|,)*,/, ',') # reduce multiple commas and interior whitespace to single comma
result = result.gsub(/(,)(\S)/, '\1 \2') # add whitespace after comma
result = result.gsub(/^,\s*/, '') # remove leading comma
accumulator << result
end
to_field 'title_teim', extract_xpath('./did/unittitle[not(@type) or ( @type != "sort" )]')
Expand All @@ -556,16 +640,16 @@

# DUL CUSTOMIZATION: use DUL rules for NormalizedDate in title normalization
to_field 'normalized_title_ssm' do |_record, accumulator, context|
dates = context.output_hash['normalized_date_ssm']&.first
date = context.output_hash['normalized_date_ssm']&.first
title = context.output_hash['title_ssm']&.first
accumulator << Arclight::NormalizedTitle.new(title, dates).to_s
accumulator << Arclight::NormalizedTitle.new(title, date, context.clipboard[:title_ssm]).to_s
end

# DUL CUSTOMIZATION: use DUL rules for NormalizedDate
to_field 'normalized_title_formatted_ssm' do |_record, accumulator, context|
dates = context.output_hash['normalized_date_ssm']&.first
date = context.output_hash['normalized_date_ssm']&.first
title = context.output_hash['title_formatted_ssm']&.first.to_s
accumulator << Arclight::NormalizedTitle.new(title, dates).to_s
accumulator << Arclight::NormalizedTitle.new(title, date, context.clipboard[:title_formatted_ssm]).to_s
end

# Aleph ID (esp. for request integration)
Expand Down

0 comments on commit 5e9fc54

Please sign in to comment.