Skip to content

Commit

Permalink
Add complex text XLSX test, improve text extraction, update version t…
Browse files Browse the repository at this point in the history
…o 3.0.8
  • Loading branch information
weilandia committed Jul 12, 2024
1 parent b0c1599 commit e141231
Show file tree
Hide file tree
Showing 6 changed files with 13 additions and 4 deletions.
3 changes: 3 additions & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -111,5 +111,8 @@ Lint/MissingSuper:
Lint/ConstantDefinitionInBlock:
Enabled: false

Lint/SuppressedException:
Enabled: false

Style/SingleArgumentDig:
Enabled: false
5 changes: 3 additions & 2 deletions lib/simple_text_extract/extract.rb
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,14 @@ def xlsx_extract
spreadsheet = Roo::Spreadsheet.open(file, only_visible_sheets: true)

text = []

spreadsheet.sheets.each_with_index do |name, i|
text << "# Sheet Index: #{i}"
text << "# Sheet Name: #{name}"

spreadsheet.sheet(name)&.each_row_streaming do |row|
text << row.map(&:to_s).join(" ")
text << row.map do |cell|
cell.value.to_s
end.join(" ")
end
end

Expand Down
2 changes: 1 addition & 1 deletion lib/simple_text_extract/version.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# frozen_string_literal: true

module SimpleTextExtract
VERSION = "3.0.7"
VERSION = "3.0.8"
end
2 changes: 1 addition & 1 deletion simple_text_extract.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,6 @@ Gem::Specification.new do |spec|
spec.requirements << "pdftotext/poppler"

spec.add_dependency "roo", "~> 2.10.0"
spec.add_dependency "spreadsheet", "~> 1.3.0"
spec.add_dependency "rubyzip", "~> 2.3.2"
spec.add_dependency "spreadsheet", "~> 1.3.0"
end
Binary file added test/fixtures/test_file_with_complex_text.xlsx
Binary file not shown.
5 changes: 5 additions & 0 deletions test/simple_text_extract_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,11 @@ def test_it_parses_xlsx_files_to_text_from_raw_excludes_hidden
assert_equal "# Sheet Index: 0\n# Sheet Name: Sheet1\nruby 25\n# Sheet Index: 1\n# Sheet Name: Sheet2\njs 35", result
end

def test_it_parses_xlsx_files_to_text_with_complex_text
result = SimpleTextExtract.extract(filename: "test_xlsx.xlsx", raw: File.read("test/fixtures/test_file_with_complex_text.xlsx"))
assert result.include?("Pricing Template")
end

def test_nil_to_integer
result = SimpleTextExtract.extract(filename: "roo_bad_link.xlsx", raw: File.read("test/fixtures/roo_bad_link.xlsx"))

Expand Down

0 comments on commit e141231

Please sign in to comment.