diff --git a/Gemfile b/Gemfile index 7dad5aa..121d77a 100644 --- a/Gemfile +++ b/Gemfile @@ -2,11 +2,11 @@ source "https://rubygems.org" -gem "kiba-extend", "~> 3", github: "lyrasis/kiba-extend", branch: "main" +gem "kiba-extend", "~> 4", github: "lyrasis/kiba-extend", branch: "main" group :development, :test do - gem "pry", "~> 0.14" - gem "rake", "~> 13.0" + gem "pry" + gem "rake" gem "rspec" gem "almost_standard", github: "kspurgin/almost_standard", branch: "main" end diff --git a/Gemfile.lock b/Gemfile.lock index ff1b0f4..8b59895 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -11,10 +11,10 @@ GIT GIT remote: https://github.com/lyrasis/kiba-extend.git - revision: c8f29c39bc66b0266e6863463d8db29f1e684631 + revision: 6b933b773da5d256da12261d84c76d44315dc3b4 branch: main specs: - kiba-extend (3.3.0) + kiba-extend (4.0.0) activesupport (>= 6, < 8) amazing_print (~> 1.4) csv (~> 3) @@ -31,17 +31,17 @@ GIT GEM remote: https://rubygems.org/ specs: - activesupport (7.0.4.3) + activesupport (7.0.8) concurrent-ruby (~> 1.0, >= 1.0.2) i18n (>= 1.6, < 2) minitest (>= 5.1) tzinfo (~> 2.0) - amazing_print (1.4.0) + amazing_print (1.5.0) ast (2.4.2) base64 (0.1.1) coderay (1.1.3) concurrent-ruby (1.2.2) - csv (3.2.6) + csv (3.2.7) diff-lcs (1.5.0) dry-configurable (0.15.0) concurrent-ruby (~> 1.0) @@ -66,7 +66,7 @@ GEM measured (2.8.2) activesupport (>= 5.2) method_source (1.0.0) - minitest (5.18.0) + minitest (5.20.0) parallel (1.23.0) parser (3.2.2.3) ast (~> 2.4.1) @@ -131,7 +131,7 @@ GEM standard-performance (1.2.0) lint_roller (~> 1.1) rubocop-performance (~> 1.19.0) - thor (1.2.1) + thor (1.2.2) tzinfo (2.0.6) concurrent-ruby (~> 1.0) unf (0.1.4) @@ -139,7 +139,7 @@ GEM unf_ext (0.0.8.2) unicode-display_width (2.4.2) xxhash (0.5.0) - zeitwerk (2.6.8) + zeitwerk (2.6.11) PLATFORMS x86_64-darwin-20 @@ -147,9 +147,9 @@ PLATFORMS DEPENDENCIES almost_standard! - kiba-extend (~> 3)! - pry (~> 0.14) - rake (~> 13.0) + kiba-extend (~> 4)! + pry + rake rspec BUNDLED WITH diff --git a/README.adoc b/README.adoc index 08fee80..63eefc0 100644 --- a/README.adoc +++ b/README.adoc @@ -23,6 +23,7 @@ Includes heavily commented/explained code for: ** https://github.com/lyrasis/kiba-extend-project/blob/main/lib/ke_project/transforms/locations/loc_name_reverser.rb[defining] ** https://github.com/lyrasis/kiba-extend-project/blob/main/spec/ke_project/transforms/locations/loc_name_reverser_spec.rb[testing] ** https://github.com/lyrasis/kiba-extend-project/blob/4e4458ee7a9ed7e56e0a5e88b82a3b6bcf1fc89d/lib/ke_project/source_system/locations.rb#L40[using in jobs] +* Setting up and using the `IterativeCleanup` mixin added in `kiba-extend` v4.0.0 (mixed throughout - do a search for "IterativeCleanup" to find stuff) toc::[] @@ -50,6 +51,10 @@ Now you should be able to run the thor tasks for the project. In your terminal, `thor reg:list` +Jobs tagged with place and cleanup are dynamically set up for you via the `IterativeCleanup` mixin added in `kiba-extend` v4.0.0: + +`thor jobs tagged_and --tags=place cleanup` + Run the `locations__to_json` job (which has the effect of running all other jobs as its dependencies): `thor run:job locations__to_json` @@ -118,5 +123,5 @@ Using Zeitwerk to handle code loading introduces a bit some constraint on how yo * https://lyrasis.github.io/kiba-extend/[kiba-extend documentation] ** transforms *** https://lyrasis.github.io/kiba-extend/Kiba/Extend/Transforms.html[documentation] -*** a lot of the documentation still needs to be written, but all the examples in the docs are tested in the kiba-extend spec. Consult https://github.com/lyrasis/kiba-extend/tree/main/spec/kiba/extend/transforms[the tests] for examples of what each undocumented transform does. +*** a lot of the documentation still needs to be written, but all the examples in the docs are tested in the kiba-extend spec. Consult https://github.com/lyrasis/kiba-extend/tree/main/spec/kiba/extend/transforms[the tests] for examples of what each undocumented transform does. ** https://lyrasis.github.io/kiba-extend/file_list.html[List of non-code-specific documentation pages] that give a bigger picture explanation, or provide a reference diff --git a/data/source_system_data/places.csv b/data/source_system_data/places.csv new file mode 100644 index 0000000..baa0311 --- /dev/null +++ b/data/source_system_data/places.csv @@ -0,0 +1,33 @@ +"place" +"country: USA|||state: North Carolina" +"country: USA|||state: North Carolina|||city: Asheville" +"country: USA|||state: North Carolina|||city: Bakersville" +"country: USA|||state: North Carolina|||city: near Beria" +"country: USA|||state: North Carolina|||city: Charlotte" +"country: USA|||state: North Carolina|||city: Cherokee" +"country: USA|||state: North Carolina|||city: Currituck" +"country: USA|||state: North Carolina|||city: Durham" +"country: USA|||state: North Carolina|||city: Guntertown [sic]" +"country: USA|||state: North Carolina|||city: Highlands" +"country: USA|||state: North Carolina|||city: Hot Springs" +"country: USA|||state: North Carolina|||city: Jacksonville" +"country: USA|||state: North Carolina|||city: Kitty Hawk" +"country: USA|||state: North Carolina|||city: Marshall" +"country: USA|||state: North Carolina|||city: Morganton" +"country: USA|||state: North Carolina|||city: PENLAND (?)" +"country: USA|||state: North Carolina|||city: Penland" +"country: USA|||state: North Carolina|||city: Raleigh" +"country: USA|||state: North Carolina|||city: Shelton Laurel" +"country: USA|||state: North Carolina|||city: Spruce Pine" +"country: USA|||state: North Carolina|||city: Stem" +"country: USA|||state: North Carolina|||city: Tallyho" +"country: USA|||state: North Carolina|||city: Tallyho , Granville County|||notes: Stem" +"country: USA|||state: North Carolina|||city: Wadesboro" +"country: USA|||state: North Carolina|||city: Wendell" +"country: USA|||state: North Carolina|||city: Wing" +"country: USA|||state: North Carolina|||city: Yanceyville" +"country: United States of America|||state: North Carolina|||city: Waynesville" +"country: United States|||state: North Carolina|||county: Burke County|||city: Salem" +"country: United States|||state: North Carolina|||county: Wake County|||city: Raleigh" +"state: North Carolina" +"state: North Carolina|||city: Beria" diff --git a/data/supplied/places_cleanup_worksheet_done_1.csv b/data/supplied/places_cleanup_worksheet_done_1.csv new file mode 100644 index 0000000..1819e98 --- /dev/null +++ b/data/supplied/places_cleanup_worksheet_done_1.csv @@ -0,0 +1,33 @@ +"country","state","county","city","proximity","uncertainty","notes","clean_fingerprint","fingerprints" +"United States","North Carolina",,"Asheville",,,,"bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9Bc2hldmlsbGXikJ9uaWzikJ9uaWw=","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9Bc2hldmlsbGXikJ9uaWzikJ9uaWw=" +"United States","North Carolina",,"Bakersville",,,,"bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9CYWtlcnN2aWxsZeKQn25pbOKQn25pbA==","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9CYWtlcnN2aWxsZeKQn25pbOKQn25pbA==" +"United States","North Carolina",,"Beria","near",,,"bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9uZWFyIEJlcmlh4pCfbmls4pCfbmls","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9uZWFyIEJlcmlh4pCfbmls4pCfbmls" +"United States","North Carolina",,"Beria",,,,"bmls4pCfbmls4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9CZXJpYeKQn25pbOKQn25pbA==","bmls4pCfbmls4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9CZXJpYeKQn25pbOKQn25pbA==" +"United States","North Carolina",,"Charlotte",,,,"bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9DaGFybG90dGXikJ9uaWzikJ9uaWw=","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9DaGFybG90dGXikJ9uaWzikJ9uaWw=" +"United States","North Carolina",,"Cherokee",,,,"bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9DaGVyb2tlZeKQn25pbOKQn25pbA==","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9DaGVyb2tlZeKQn25pbOKQn25pbA==" +"United States","North Carolina",,"Currituck",,,,"bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9DdXJyaXR1Y2vikJ9uaWzikJ9uaWw=","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9DdXJyaXR1Y2vikJ9uaWzikJ9uaWw=" +"United States","North Carolina",,"Durham",,,,"bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9EdXJoYW3ikJ9uaWzikJ9uaWw=","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9EdXJoYW3ikJ9uaWzikJ9uaWw=" +"United States","North Carolina",,"Guntertown",,,"[sic]","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9HdW50ZXJ0b3duIFtzaWNd4pCfbmls4pCfbmls","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9HdW50ZXJ0b3duIFtzaWNd4pCfbmls4pCfbmls" +"United States","North Carolina",,"Highlands",,,,"bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9IaWdobGFuZHPikJ9uaWzikJ9uaWw=","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9IaWdobGFuZHPikJ9uaWzikJ9uaWw=" +"United States","North Carolina",,"Hot Springs",,,,"bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9Ib3QgU3ByaW5nc+KQn25pbOKQn25pbA==","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9Ib3QgU3ByaW5nc+KQn25pbOKQn25pbA==" +"United States","North Carolina",,"Jacksonville",,,,"bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9KYWNrc29udmlsbGXikJ9uaWzikJ9uaWw=","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9KYWNrc29udmlsbGXikJ9uaWzikJ9uaWw=" +"United States","North Carolina",,"Kitty Hawk",,,,"bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9LaXR0eSBIYXdr4pCfbmls4pCfbmls","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9LaXR0eSBIYXdr4pCfbmls4pCfbmls" +"United States","North Carolina",,"Marshall",,,,"bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9NYXJzaGFsbOKQn25pbOKQn25pbA==","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9NYXJzaGFsbOKQn25pbOKQn25pbA==" +"United States","North Carolina",,"Morganton",,,,"bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9Nb3JnYW50b27ikJ9uaWzikJ9uaWw=","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9Nb3JnYW50b27ikJ9uaWzikJ9uaWw=" +"United States","North Carolina",,"Penland",,"uncertain",,"bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9QRU5MQU5EICg/KeKQn25pbOKQn25pbA==","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9QRU5MQU5EICg/KeKQn25pbOKQn25pbA==" +"United States","North Carolina",,"Penland",,,,"bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9QZW5sYW5k4pCfbmls4pCfbmls","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9QZW5sYW5k4pCfbmls4pCfbmls" +"United States","North Carolina","Wake County","Raleigh",,,,"bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9SYWxlaWdo4pCfbmls4pCfbmls","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9SYWxlaWdo4pCfbmls4pCfbmls" +"United States","North Carolina","Wake County","Raleigh",,,,"bmls4pCfVW5pdGVkIFN0YXRlc+KQn05vcnRoIENhcm9saW5h4pCfV2FrZSBDb3VudHnikJ9SYWxlaWdo4pCfbmls4pCfbmls","bmls4pCfVW5pdGVkIFN0YXRlc+KQn05vcnRoIENhcm9saW5h4pCfV2FrZSBDb3VudHnikJ9SYWxlaWdo4pCfbmls4pCfbmls" +"United States","North Carolina","Burke County","Salem",,,,"bmls4pCfVW5pdGVkIFN0YXRlc+KQn05vcnRoIENhcm9saW5h4pCfQnVya2UgQ291bnR54pCfU2FsZW3ikJ9uaWzikJ9uaWw=","bmls4pCfVW5pdGVkIFN0YXRlc+KQn05vcnRoIENhcm9saW5h4pCfQnVya2UgQ291bnR54pCfU2FsZW3ikJ9uaWzikJ9uaWw=" +"United States","North Carolina",,"Shelton Laurel",,,,"bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9TaGVsdG9uIExhdXJlbOKQn25pbOKQn25pbA==","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9TaGVsdG9uIExhdXJlbOKQn25pbOKQn25pbA==" +"United States","North Carolina",,"Spruce Pine",,,,"bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9TcHJ1Y2UgUGluZeKQn25pbOKQn25pbA==","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9TcHJ1Y2UgUGluZeKQn25pbOKQn25pbA==" +"United States","North Carolina","Granville County","Stem",,,,"bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9TdGVt4pCfbmls4pCfbmls","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9TdGVt4pCfbmls4pCfbmls" +"United States","North Carolina","Granville County","Stem",,,"as Tallyho","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9UYWxseWhv4pCfbmls4pCfbmls","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9UYWxseWhv4pCfbmls4pCfbmls" +"United States","North Carolina","Granville County","Stem",,,"as Tallyho","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9UYWxseWhvICwgR3JhbnZpbGxlIENvdW50eeKQn25pbOKQn25pbA==","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9UYWxseWhvICwgR3JhbnZpbGxlIENvdW50eeKQn25pbOKQn25pbA==" +"United States","North Carolina",,"Wadesboro",,,,"bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9XYWRlc2Jvcm/ikJ9uaWzikJ9uaWw=","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9XYWRlc2Jvcm/ikJ9uaWzikJ9uaWw=" +"United States","North Carolina",,"Waynesville",,,,"bmls4pCfVW5pdGVkIFN0YXRlcyBvZiBBbWVyaWNh4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9XYXluZXN2aWxsZeKQn25pbOKQn25pbA==","bmls4pCfVW5pdGVkIFN0YXRlcyBvZiBBbWVyaWNh4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9XYXluZXN2aWxsZeKQn25pbOKQn25pbA==" +"United States","North Carolina",,"Wendell",,,,"bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9XZW5kZWxs4pCfbmls4pCfbmls","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9XZW5kZWxs4pCfbmls4pCfbmls" +"United States","North Carolina",,"Wing",,,,"bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9XaW5n4pCfbmls4pCfbmls","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9XaW5n4pCfbmls4pCfbmls" +"United States","North Carolina",,"Yanceyville",,,,"bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9ZYW5jZXl2aWxsZeKQn25pbOKQn25pbA==","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9ZYW5jZXl2aWxsZeKQn25pbOKQn25pbA==" +"United States","North Carolina",,,,,,"bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9uaWzikJ9uaWzikJ9uaWw=","bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9uaWzikJ9uaWzikJ9uaWw=" +"United States","North Carolina",,,,,,"bmls4pCfbmls4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9uaWzikJ9uaWzikJ9uaWw=","bmls4pCfbmls4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9uaWzikJ9uaWzikJ9uaWw=" diff --git a/data/to_client/places_cleanup_worksheet_1.csv b/data/to_client/places_cleanup_worksheet_1.csv new file mode 100644 index 0000000..aabbed5 --- /dev/null +++ b/data/to_client/places_cleanup_worksheet_1.csv @@ -0,0 +1,33 @@ +country,state,county,city,proximity,uncertainty,notes,clean_fingerprint,fingerprints +USA,North Carolina,,,,,,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9uaWzikJ9uaWzikJ9uaWw=,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9uaWzikJ9uaWzikJ9uaWw= +USA,North Carolina,,Asheville,,,,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9Bc2hldmlsbGXikJ9uaWzikJ9uaWw=,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9Bc2hldmlsbGXikJ9uaWzikJ9uaWw= +USA,North Carolina,,Bakersville,,,,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9CYWtlcnN2aWxsZeKQn25pbOKQn25pbA==,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9CYWtlcnN2aWxsZeKQn25pbOKQn25pbA== +USA,North Carolina,,near Beria,,,,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9uZWFyIEJlcmlh4pCfbmls4pCfbmls,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9uZWFyIEJlcmlh4pCfbmls4pCfbmls +USA,North Carolina,,Charlotte,,,,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9DaGFybG90dGXikJ9uaWzikJ9uaWw=,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9DaGFybG90dGXikJ9uaWzikJ9uaWw= +USA,North Carolina,,Cherokee,,,,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9DaGVyb2tlZeKQn25pbOKQn25pbA==,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9DaGVyb2tlZeKQn25pbOKQn25pbA== +USA,North Carolina,,Currituck,,,,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9DdXJyaXR1Y2vikJ9uaWzikJ9uaWw=,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9DdXJyaXR1Y2vikJ9uaWzikJ9uaWw= +USA,North Carolina,,Durham,,,,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9EdXJoYW3ikJ9uaWzikJ9uaWw=,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9EdXJoYW3ikJ9uaWzikJ9uaWw= +USA,North Carolina,,Guntertown [sic],,,,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9HdW50ZXJ0b3duIFtzaWNd4pCfbmls4pCfbmls,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9HdW50ZXJ0b3duIFtzaWNd4pCfbmls4pCfbmls +USA,North Carolina,,Highlands,,,,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9IaWdobGFuZHPikJ9uaWzikJ9uaWw=,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9IaWdobGFuZHPikJ9uaWzikJ9uaWw= +USA,North Carolina,,Hot Springs,,,,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9Ib3QgU3ByaW5nc+KQn25pbOKQn25pbA==,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9Ib3QgU3ByaW5nc+KQn25pbOKQn25pbA== +USA,North Carolina,,Jacksonville,,,,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9KYWNrc29udmlsbGXikJ9uaWzikJ9uaWw=,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9KYWNrc29udmlsbGXikJ9uaWzikJ9uaWw= +USA,North Carolina,,Kitty Hawk,,,,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9LaXR0eSBIYXdr4pCfbmls4pCfbmls,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9LaXR0eSBIYXdr4pCfbmls4pCfbmls +USA,North Carolina,,Marshall,,,,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9NYXJzaGFsbOKQn25pbOKQn25pbA==,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9NYXJzaGFsbOKQn25pbOKQn25pbA== +USA,North Carolina,,Morganton,,,,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9Nb3JnYW50b27ikJ9uaWzikJ9uaWw=,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9Nb3JnYW50b27ikJ9uaWzikJ9uaWw= +USA,North Carolina,,PENLAND (?),,,,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9QRU5MQU5EICg/KeKQn25pbOKQn25pbA==,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9QRU5MQU5EICg/KeKQn25pbOKQn25pbA== +USA,North Carolina,,Penland,,,,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9QZW5sYW5k4pCfbmls4pCfbmls,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9QZW5sYW5k4pCfbmls4pCfbmls +USA,North Carolina,,Raleigh,,,,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9SYWxlaWdo4pCfbmls4pCfbmls,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9SYWxlaWdo4pCfbmls4pCfbmls +USA,North Carolina,,Shelton Laurel,,,,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9TaGVsdG9uIExhdXJlbOKQn25pbOKQn25pbA==,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9TaGVsdG9uIExhdXJlbOKQn25pbOKQn25pbA== +USA,North Carolina,,Spruce Pine,,,,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9TcHJ1Y2UgUGluZeKQn25pbOKQn25pbA==,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9TcHJ1Y2UgUGluZeKQn25pbOKQn25pbA== +USA,North Carolina,,Stem,,,,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9TdGVt4pCfbmls4pCfbmls,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9TdGVt4pCfbmls4pCfbmls +USA,North Carolina,,Tallyho,,,,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9UYWxseWhv4pCfbmls4pCfbmls,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9UYWxseWhv4pCfbmls4pCfbmls +USA,North Carolina,,"Tallyho , Granville County",,,Stem,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9UYWxseWhvICwgR3JhbnZpbGxlIENvdW50eeKQn25pbOKQn25pbA==,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9UYWxseWhvICwgR3JhbnZpbGxlIENvdW50eeKQn25pbOKQn25pbA== +USA,North Carolina,,Wadesboro,,,,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9XYWRlc2Jvcm/ikJ9uaWzikJ9uaWw=,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9XYWRlc2Jvcm/ikJ9uaWzikJ9uaWw= +USA,North Carolina,,Wendell,,,,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9XZW5kZWxs4pCfbmls4pCfbmls,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9XZW5kZWxs4pCfbmls4pCfbmls +USA,North Carolina,,Wing,,,,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9XaW5n4pCfbmls4pCfbmls,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9XaW5n4pCfbmls4pCfbmls +USA,North Carolina,,Yanceyville,,,,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9ZYW5jZXl2aWxsZeKQn25pbOKQn25pbA==,bmls4pCfVVNB4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9ZYW5jZXl2aWxsZeKQn25pbOKQn25pbA== +United States of America,North Carolina,,Waynesville,,,,bmls4pCfVW5pdGVkIFN0YXRlcyBvZiBBbWVyaWNh4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9XYXluZXN2aWxsZeKQn25pbOKQn25pbA==,bmls4pCfVW5pdGVkIFN0YXRlcyBvZiBBbWVyaWNh4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9XYXluZXN2aWxsZeKQn25pbOKQn25pbA== +United States,North Carolina,Burke County,Salem,,,,bmls4pCfVW5pdGVkIFN0YXRlc+KQn05vcnRoIENhcm9saW5h4pCfQnVya2UgQ291bnR54pCfU2FsZW3ikJ9uaWzikJ9uaWw=,bmls4pCfVW5pdGVkIFN0YXRlc+KQn05vcnRoIENhcm9saW5h4pCfQnVya2UgQ291bnR54pCfU2FsZW3ikJ9uaWzikJ9uaWw= +United States,North Carolina,Wake County,Raleigh,,,,bmls4pCfVW5pdGVkIFN0YXRlc+KQn05vcnRoIENhcm9saW5h4pCfV2FrZSBDb3VudHnikJ9SYWxlaWdo4pCfbmls4pCfbmls,bmls4pCfVW5pdGVkIFN0YXRlc+KQn05vcnRoIENhcm9saW5h4pCfV2FrZSBDb3VudHnikJ9SYWxlaWdo4pCfbmls4pCfbmls +,North Carolina,,,,,,bmls4pCfbmls4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9uaWzikJ9uaWzikJ9uaWw=,bmls4pCfbmls4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9uaWzikJ9uaWzikJ9uaWw= +,North Carolina,,Beria,,,,bmls4pCfbmls4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9CZXJpYeKQn25pbOKQn25pbA==,bmls4pCfbmls4pCfTm9ydGggQ2Fyb2xpbmHikJ9uaWzikJ9CZXJpYeKQn25pbOKQn25pbA== diff --git a/lib/ke_project.rb b/lib/ke_project.rb index 473c751..64849f1 100644 --- a/lib/ke_project.rb +++ b/lib/ke_project.rb @@ -27,8 +27,6 @@ def reload! @loader.reload end - loader - extend Dry::Configurable # ## OVERRIDE KIBA::EXTEND'S DEFAULT OPTIONS # @@ -61,7 +59,7 @@ def reload! # # Base directory for project files setting :datadir, default: File.expand_path("data"), reader: true - # + # If I want to be lazy I can define this to avoid typing out full directory # paths. It also makes a nice example for using a constructor: setting :derived_dirs, @@ -116,8 +114,37 @@ def reload! # specific code, instead of `Kiba::Extend.delim`, while ensuring a # consistent default :delim is used across the board. setting :delim, default: Kiba::Extend.delim, reader: true - - # This sets up your file registry. Dig into `lib/ke_project/registry_data.rb` - # for more details on this. - KeProject::RegistryData.register end + +KeProject.loader + +# The following line is necessary if you wish to use +# `Kiba::Extend::Mixins::IterativeCleanup` in your project. +Kiba::Extend.config.config_namespaces = [KeProject] + +# This sets up your file registry. Dig into +# `lib/ke_project/registry_data.rb` for more details on this. +# +# If you are not using IterativeCleanup in your project, this can go +# at the end of the main KeProject (or equivalent) module definition +# (or it can stay here). However, if you are using IterativeCleanup, +# the following things need to happen in order: (1) Your client +# project gets loaded, which loads kiba-extend (and kiba-tms or any +# other intervening application layer); (2) kiba-extend +# `config_namespaces` gets set, so it will know where to look for +# config modules that may extend IterativeCleanup; and (3) all job +# entries are registered---those manually and programmatically +# defined in `RegistryData`, and those defined by IterativeCleanup +# mixin. +KeProject::RegistryData.register + +# # The following settings are actually set in +# # `lib/ke_project/places_cleanup.rb`, +# # but are commented here to show an alternate place where you could set +# # them. +# KeProject::PlacesCleanup.config.provided_worksheets = [ +# "places_cleanup_worksheet_1.csv" +# ] +# KeProject::PlacesCleanup.config.returned_files = [ +# "places_cleanup_worksheet_done_1.csv" +# ] diff --git a/lib/ke_project/jobs/places/prep_for_cleanup.rb b/lib/ke_project/jobs/places/prep_for_cleanup.rb new file mode 100644 index 0000000..281dfca --- /dev/null +++ b/lib/ke_project/jobs/places/prep_for_cleanup.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +module KeProject::Jobs::Places::PrepForCleanup + module_function + + def job + Kiba::Extend::Jobs::Job.new( + files: { + source: :orig__places, + destination: :places__prep_for_cleanup + }, + transformer: xforms + ) + end + + def xforms + Kiba.job_segment do + transform do |row| + row[:place].split("|||") + .each do |pairstr| + pair = pairstr.split(": ") + row[pair[0]] = pair[1] + end + row + end + transform Clean::EnsureConsistentFields + transform Fingerprint::Add, + target: :fingerprint, + fields: KeProject::Places.fingerprint_fields + transform Delete::Fields, + fields: :place + end + end +end diff --git a/lib/ke_project/places.rb b/lib/ke_project/places.rb new file mode 100644 index 0000000..d538344 --- /dev/null +++ b/lib/ke_project/places.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +module KeProject::Places + module_function + + extend Dry::Configurable + + setting :fingerprint_fields, + default: %i[place country state county city], + reader: true, + constructor: ->(val) do + [val, + KeProject::PlacesCleanup.worksheet_add_fields].flatten + end +end diff --git a/lib/ke_project/places_cleanup.rb b/lib/ke_project/places_cleanup.rb new file mode 100644 index 0000000..012e733 --- /dev/null +++ b/lib/ke_project/places_cleanup.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true + +module KeProject::PlacesCleanup + module_function + + extend Dry::Configurable + + setting :base_job, default: :places__prep_for_cleanup, reader: true + setting :job_tags, default: %i[place cleanup], reader: true + setting :worksheet_add_fields, + default: %i[proximity uncertainty], + reader: true + + def fingerprint_fields + KeProject::Places.fingerprint_fields + end + + setting :fingerprint_flag_ignore_fields, + default: [:place], + reader: true + + def worksheet_field_order + fingerprint_fields - fingerprint_flag_ignore_fields + end + + extend Kiba::Extend::Mixins::IterativeCleanup +end + +# Extending `IterativeCleanup` in the above module definition defines +# the following config settings with empty arrays as the default +# value, and constructor logic to generate full file paths from the +# file names. The "Defines settings in the extending config module" +# section at the link below explains these settings and their +# assumptions: +# +# https://lyrasis.github.io/kiba-extend/Kiba/Extend/Mixins/IterativeCleanup.html +# +# When we actually have files to record, we set the setting value, *from +# outside* the body of the module definition. +# +# If I want to see all my project config in one place, I can set these from the +# end of `lib/ke_project.rb`. If it makes more sense to me to handle all the +# config/settings for place cleanup in this file, I can set them here. +KeProject::PlacesCleanup.config.provided_worksheets = [ + "places_cleanup_worksheet_1.csv" +] +KeProject::PlacesCleanup.config.returned_files = [ + "places_cleanup_worksheet_done_1.csv" +] diff --git a/lib/ke_project/registry_data.rb b/lib/ke_project/registry_data.rb index 4f95b6b..3a944c4 100644 --- a/lib/ke_project/registry_data.rb +++ b/lib/ke_project/registry_data.rb @@ -34,6 +34,11 @@ def register # This populates the registry with the manually defined entries register_files + # This needs to be added if you are using the IterativeCleanup mixin + # in your project. It causes all the automagically defined cleanup jobs + # to be registered. + Kiba::Extend::Utils::IterativeCleanupJobRegistrar.call + # Calling :finalize on the registry just calls :transform and then :freeze # on the registry. # @@ -216,6 +221,18 @@ def register_files tags: %i[json authority location] } end + + # This namespace registers a job which is used as + # the base job for an iterative cleanup process defined in + # `lib/ke_project/places_cleanup.rb` + KeProject.registry.namespace("places") do + register :prep_for_cleanup, { + path: File.join(KeProject.datadir, "working", + "places_prep_for_cleanup.csv"), + creator: KeProject::Jobs::Places::PrepForCleanup, + tags: %i[authority place] + } + end end private_class_method :register_files end