Skip to content

Commit

Permalink
Merge branch 'main' into patch-3
Browse files Browse the repository at this point in the history
  • Loading branch information
soldni authored Sep 20, 2024
2 parents 5f77f03 + 621a6f4 commit ec0429e
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ jobs:
if: "startsWith(github.ref, 'refs/tags/')"
needs: [build-linux, build-windows, build-macos, sdist]
steps:
- uses: actions/download-artifact@v3
- uses: actions/download-artifact@v4.1.7
with:
name: wheels
- name: Publish to PyPI
Expand Down
5 changes: 3 additions & 2 deletions src/deduper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,13 +297,14 @@ fn write_attributes(
// skip empty documents if text_length is 0
for p in paragraphs {
let par_start = offset;
offset += p.chars().count();
let par_char_length = p.chars().count();
offset += par_char_length;
if offset < text_length - 1 {
offset += 1; // For the newline
}
let par_end = offset;

if offset < min_content_length {
if par_char_length < min_content_length {
// skip length 0 paragraphs
continue;
}
Expand Down

0 comments on commit ec0429e

Please sign in to comment.