Skip to content

Commit

Permalink
Use mapper 6.0.1 (#218)
Browse files Browse the repository at this point in the history
* Bump to collectionspace-mapper 6.0.1

* Update error class to match new mapper version

* Whitespace/formatting only

* Update to use new Term classes from collectionspace-mapper

* Rubocop autocorrect code changes

* Add domain to WebMock allowed list
  • Loading branch information
kspurgin authored Sep 2, 2024
1 parent f3b555e commit 0e891c6
Show file tree
Hide file tree
Showing 10 changed files with 121 additions and 122 deletions.
6 changes: 3 additions & 3 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ gem 'aws-sdk-s3', require: false
gem 'bulma-rails', '~> 0.9.0'

gem 'collectionspace-client', tag: 'v0.15.1', git: 'https://github.com/collectionspace/collectionspace-client.git'
gem 'collectionspace-mapper', tag: 'v6.0.1', git: 'https://github.com/collectionspace/collectionspace-mapper.git'
gem 'collectionspace-refcache', tag: 'v1.0.0', git: 'https://github.com/collectionspace/collectionspace-refcache.git'
gem 'collectionspace-mapper', tag: 'v4.1.3', git: 'https://github.com/collectionspace/collectionspace-mapper.git'

gem 'csvlint',
git: 'https://github.com/lyrasis/csvlint.rb.git',
tag: 'v1.4.0'
git: 'https://github.com/lyrasis/csvlint.rb.git',
tag: 'v1.4.0'
gem 'devise'
gem 'font-awesome-rails'
# gem 'hiredis'
Expand Down
8 changes: 4 additions & 4 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,18 @@ GIT

GIT
remote: https://github.com/collectionspace/collectionspace-mapper.git
revision: 1c786e91097d2723389abc5f59316382f431be80
tag: v4.1.3
revision: 30c025d42e8cb0fbb438c9f376d43e217c13a67b
tag: v6.0.1
specs:
collectionspace-mapper (4.1.3)
collectionspace-mapper (6.0.1)
activesupport (= 6.0.4.7)
chronic
collectionspace-client (~> 0.15.0)
collectionspace-refcache (~> 1.0.0)
dry-configurable (~> 0.14)
dry-monads (~> 1.4)
memo_wise (~> 1.1.0)
nokogiri (~> 1.13.3)
nokogiri (>= 1.13.3, < 1.15.0)
xxhash (>= 0.4.0)
zeitwerk (~> 2.5)

Expand Down
17 changes: 10 additions & 7 deletions app/jobs/preprocess_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def perform(preprocess)
manager.exception!
Rails.logger.error(e.message)
Rails.logger.error(e.backtrace)
rescue CollectionSpace::Mapper::DataValidator::IdFieldNotInMapperError => e
rescue CollectionSpace::Mapper::IdFieldNotInMapperError => e
manager.add_error!
manager.add_message('The import tool cannot determine the unique ID field for this record type. Contact import tool admin and ask them to fix the RecordMapper.')
manager.exception!
Expand All @@ -49,7 +49,9 @@ def perform(preprocess)
validated = handler.validate(data)

unless validated.valid?
missing_required = validated.errors.select { |err| err.start_with?('required field missing') }
missing_required = validated.errors.select do |err|
err.start_with?('required field missing')
end
unless missing_required.empty?
missing_required.each { |msg| manager.add_message(msg) }
manager.add_error!
Expand All @@ -60,30 +62,31 @@ def perform(preprocess)
validated = handler.validate(data)

unless validated.valid?
errs = validated.errors.reject { |err| err.start_with?('required field missing') }
errs = validated.errors.reject do |err|
err.start_with?('required field missing')
end
errs.each { |e| empty_required[e] = nil } unless errs.empty?
end
end

unless empty_required.empty?
empty_required.each_key { |msg| manager.add_message("In one or more rows, #{msg}") }
empty_required.each_key do |msg|
manager.add_message("In one or more rows, #{msg}")
end
manager.add_error!
end

manager.complete!

rescue StandardError => e
manager.exception!
Rails.logger.error(e.message)
Rails.logger.error(e.backtrace)
end
end


private

def missing_headers?(data)
!data.keys.select(&:blank?).empty?
end
end

47 changes: 24 additions & 23 deletions app/jobs/process_job.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# frozen_string_literal: true

require 'pp'

class ProcessJob < ApplicationJob
queue_as :default
sidekiq_options retry: false
Expand All @@ -18,7 +16,7 @@ def perform(process)
begin
handler = process.batch.handler
service_type = process.batch
.record_mapper['config']['service_type']
.record_mapper['config']['service_type']
rcs = RecordCacheService.new(batch_id: process.batch.id)

rep = ReportService.new(name: "#{manager.filename_base}_processed",
Expand All @@ -43,8 +41,8 @@ def perform(process)

manager.add_warning!
rep.append({ row: row_num,
header: 'ERR: mapper',
message: "Mapper did not return result for unexpected reason. Please send a copy of this report to [email protected]. We will use the following info to diagnose and fix the problem, but you may ignore it: #{e.message} -- #{e.backtrace.first}" })
header: 'ERR: mapper',
message: "Mapper did not return result for unexpected reason. Please send a copy of this report to [email protected]. We will use the following info to diagnose and fix the problem, but you may ignore it: #{e.message} -- #{e.backtrace.first}" })
manager.add_message('Mapping failed for one or more records')
next
end
Expand All @@ -53,37 +51,37 @@ def perform(process)
row_occ = "#{row_num}.#{i + 1}"
# write row number for later merge with transfer results
rep.append({ row: row_num,
row_occ: row_occ,
header: 'INFO: rownum',
message: row_num })
row_occ: row_occ,
header: 'INFO: rownum',
message: row_num })
# write row occurrence number for later merge with transfer results
rep.append({ row: row_num,
row_occ: row_occ,
header: 'INFO: rowoccurrence',
message: row_occ })
row_occ: row_occ,
header: 'INFO: rowoccurrence',
message: row_occ })
# write record status for collation into final report
rep.append({ row: row_num,
row_occ: row_occ,
header: 'INFO: record status',
message: result.record_status })
row_occ: row_occ,
header: 'INFO: record status',
message: result.record_status })

id = result.identifier
puts "Handling record identifier: #{id}"
if id.nil? || id.empty?
manager.add_error!
rep.append({ row: row_num,
row_occ: row_occ,
header: 'ERR: record id',
message: 'Identifier for record not found or created' })
row_occ: row_occ,
header: 'ERR: record id',
message: 'Identifier for record not found or created' })
manager.add_message('No identifier value for one or more records')
else
rus.add(row: row_num, row_occ: row_occ, rec_id: id)

if service_type == 'relation'
rep.append({ row: row_num,
row_occ: row_occ,
header: 'INFO: relationship id',
message: id })
row_occ: row_occ,
header: 'INFO: relationship id',
message: id })
end
end

Expand All @@ -96,7 +94,9 @@ def perform(process)

unless result.warnings.empty?
puts 'Handling warnings'
result.warnings.each { |warning| manager.handle_processing_warning(rep, row_occ, warning) }
result.warnings.each do |warning|
manager.handle_processing_warning(rep, row_occ, warning)
end
end

if result.errors.empty?
Expand All @@ -107,9 +107,10 @@ def perform(process)
end
else
puts 'Handling errors'
result.errors.each { |err| manager.handle_processing_error(rep, row_occ, err) }
result.errors.each do |err|
manager.handle_processing_error(rep, row_occ, err)
end
end

end
process.save
end
Expand Down
66 changes: 18 additions & 48 deletions app/services/missing_term_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,22 @@

class MissingTermService
attr_reader :missing_term_occurrence_file, :uniq_missing_terms_file

FILE_TYPE = 'csv'
MISSING_TERM_OCCURRENCE_HEADERS = %i[row_number row_occ input_column category type subtype value].freeze
MISSING_TERM_OCCURRENCE_HEADERS = %i[row_number row_occ input_column category type
subtype value].freeze
UNIQ_MISSING_TERMS_HEADERS = %i[type subtype value].freeze

# mts = MissingTermService.new(batch: 38, save_to_file: true)
# CSV.foreach(mts.file, headers: true) { |row| puts row }
def initialize(batch:, save_to_file: false)
@save_to_file = save_to_file
@all = {}
@all = []
time = Time.now
filename_stub = "#{batch.name.parameterize}-#{time.strftime('%F').delete('-')}-#{time.strftime('%R').delete(':')}-"
missing_term_occurrence_filename = "#{filename_stub}missing_term_occurrences.#{FILE_TYPE}"
@missing_term_occurrence_file = Rails.root.join('tmp', missing_term_occurrence_filename)
@missing_term_occurrence_file = Rails.root.join('tmp',
missing_term_occurrence_filename)
@missing_term_occurrence_headers = MISSING_TERM_OCCURRENCE_HEADERS
if @save_to_file
append_headers(@missing_term_occurrence_file, @missing_term_occurrence_headers)
Expand All @@ -29,15 +32,9 @@ def initialize(batch:, save_to_file: false)
end

def add(term, row_number, row_occ)
return if term[:found]

type = term[:refname].type
subtype = term[:refname].subtype
val = term[:refname].display_name
@all[type] = {} unless @all.key?(type)
@all[type][subtype] = {} unless @all[type].key?(subtype)
@all[type][subtype][val] = [] unless @all[type][subtype].key?(val)
@all[type][subtype][val] << term
return if term.found?

@all << term
append(term, row_number, row_occ) if @save_to_file
end

Expand All @@ -47,42 +44,20 @@ def report_uniq_missing_terms
umt
end

def message(term)
"#{term[:field]}: #{term[:refname].display_name} (#{term[:refname].type}/#{term[:refname].subtype})"
end

def get_missing(terms)
terms.select { |termhash| termhash[:found] == false }
end
def get_missing(terms) = terms.select(&:missing?)

def total_terms
compile_uniq_missing_terms if @total_term_count.nil?
@uniq_term_count
end

def total_term_occurrences
@term_occ_count = 0
@all.each do |_type, subtypehash|
subtypehash.each do |_subtype, valhash|
valhash.each do |_val, valterms|
@term_occ_count += valterms.length
end
end
end
@term_occ_count
end
def total_term_occurrences = @all.length

private

def compile_uniq_missing_terms
terms = []
@all.each do |type, subtypehash|
subtypehash.each do |subtype, valhash|
valhash.each do |val, valterms|
terms << [type, subtype, val]
end
end
end
terms = @all.group_by(&:key)
.map { |_key, arr| [arr[0].type, arr[0].subtype, arr[0].display_name] }
@uniq_term_count = terms.length
terms
end
Expand All @@ -104,18 +79,13 @@ def append_headers(file, headers)
def append(term, row_number, row_occ)
return unless @save_to_file

puts "Writing #{row_number}: #{term[:refname].display_name} to CSV"
vals = [row_number,
row_occ,
term[:field],
term[:category],
term[:refname].type,
term[:refname].subtype,
term[:refname].display_name]
term.field,
term.category,
term.type,
term.subtype,
term.display_name]
CSV.open(@missing_term_occurrence_file, 'a') { |csv| csv << vals }
end

def total(where)
@all[where].inject(0) { |t, h| t + h[1].size }
end
end
3 changes: 2 additions & 1 deletion test/controllers/step/preprocesses_controller_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ class PreprocessesControllerTest < ActionDispatch::IntegrationTest
@batch_preprocessed = batches(:superuser_batch_preprocessed)
@batch_preprocessed_step = step_preprocesses(:preprocess_superuser_batch)
@batch.mapper.config.attach(
io: File.open(Rails.root.join('test', 'fixtures', 'files', 'core-cataloging.json')),
io: File.open(Rails.root.join('test', 'fixtures', 'files',
'core-cataloging.json')),
filename: 'core-cataloging.json',
content_type: 'application/json',
identify: false
Expand Down
6 changes: 0 additions & 6 deletions test/jobs/process_job_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,6 @@ class ProcessJobTest < ActiveJob::TestCase

test 'finishes the job' do
assert @process.batch.pending?
stub_request(:get, "https://core.dev.collectionspace.org/cspace-services/collectionobjects?as=collectionobjects_common:objectNumber%20=%20'1'&pgSz=25&sortBy=collectionspace_core:updatedAt%20DESC&wf_deleted=false")
.to_return(status: 200, body: '', headers: {})

stub_request(:get, 'https://core.dev.collectionspace.org/cspace-services/personauthorities?pgNum=0&pgSz=1&wf_deleted=false')
.to_return(status: 200, body: '', headers: {})

ProcessJob.perform_now(@process)
assert_equal :finished, @process.batch.current_status
end
Expand Down
4 changes: 2 additions & 2 deletions test/models/batch_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class BatchTest < ActiveSupport::TestCase

test 'json batch_config is received by handler' do
batch = Batch.new(@params)
value = batch.handler.mapper.batchconfig.default_values['collection']
value = batch.handler.batch.default_values['collection']
assert 'library-collection', value
end

Expand Down Expand Up @@ -103,7 +103,7 @@ class BatchTest < ActiveSupport::TestCase
assert batches(:superuser_batch_archived).expired?

expired = 0
Batch.expired { |b| expired += 1 }
Batch.expired { |_b| expired += 1 }
assert 2, expired
end
end
Loading

0 comments on commit 0e891c6

Please sign in to comment.