Skip to content

Commit

Permalink
feat: take Ignore List into consideration when refreshing directory
Browse files Browse the repository at this point in the history
  • Loading branch information
yedpodtrzitko committed Sep 11, 2024
1 parent 2e8efa2 commit 6947026
Show file tree
Hide file tree
Showing 10 changed files with 83 additions and 54 deletions.
11 changes: 1 addition & 10 deletions tagstudio/src/core/library/alchemy/library.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@
)

if TYPE_CHECKING:
from ...utils.dupe_files import DupeRegistry
from ...utils.missing_files import MissingRegistry
pass

LIBRARY_FILENAME: str = "ts_library.sqlite"

Expand Down Expand Up @@ -100,11 +99,6 @@ class Library:
engine: Engine | None
folder: Folder | None

ignored_extensions: list[str]

missing_tracker: "MissingRegistry"
dupe_tracker: "DupeRegistry"

def open_library(
self, library_dir: Path | str, storage_path: str | None = None
) -> None:
Expand Down Expand Up @@ -175,9 +169,6 @@ def open_library(
session.commit()
self.folder = folder

# load ignored extensions
self.ignored_extensions = self.prefs(LibraryPrefs.EXTENSION_LIST)

@property
def default_fields(self) -> list[BaseField]:
with Session(self.engine) as session:
Expand Down
1 change: 1 addition & 0 deletions tagstudio/src/core/library/alchemy/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ class Entry(Base):
folder: Mapped[Folder] = relationship("Folder")

path: Mapped[Path] = mapped_column(PathType, unique=True)
# encoding: Mapped[str | None] = mapped_column()

text_fields: Mapped[list[TextField]] = relationship(
back_populates="entry",
Expand Down
74 changes: 48 additions & 26 deletions tagstudio/src/core/utils/refresh_dir.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
import time
from time import time
from collections.abc import Iterator
from dataclasses import dataclass, field
from pathlib import Path

from src.core.constants import TS_FOLDER_NAME
import structlog

from src.core.constants import TS_FOLDER_NAME, LibraryPrefs
from src.core.library import Library, Entry

logger = structlog.get_logger(__name__)


@dataclass
class RefreshDirTracker:
library: Library
dir_file_count: int = 0
files_not_in_library: list[Path] = field(default_factory=list)

@property
Expand All @@ -36,38 +39,57 @@ def save_new_files(self) -> Iterator[int]:

self.files_not_in_library = []

def refresh_dir(self) -> Iterator[int]:
def refresh_dir(self, lib_path: Path) -> Iterator[int]:
"""Scan a directory for files, and add those relative filenames to internal variables."""
if self.library.folder is None:
raise ValueError("No folder set.")
if self.library.library_dir is None:
raise ValueError("No library directory set.")

is_exclude_list = self.library.prefs(LibraryPrefs.IS_EXCLUDE_LIST)
exclude_list = set(self.library.prefs(LibraryPrefs.EXTENSION_LIST))

def skip_suffix(suffix: str) -> bool:
"""Determine if the file extension should be skipped.
Declared as local function as it's faster.
- check if the suffix is in the library's "exclude list"
- if library uses "exclude mode", and extensions is in the list, we skip
- if library uses "include mode", and extensions is not in the list, we skip
"""
return (suffix.lower() in exclude_list) == is_exclude_list

start_time_total = time()
start_time_loop = time()

start_time = time.time()
self.files_not_in_library = []
self.dir_file_count = 0

lib_path = self.library.folder.path

for path in lib_path.glob("**/*"):
str_path = str(path)
if (
path.is_dir()
or "$RECYCLE.BIN" in str_path
or TS_FOLDER_NAME in str_path
or "tagstudio_thumbs" in str_path
):
dir_file_count = 0

for path_item in lib_path.glob("**/*"):
str_path = str(path_item)
if path_item.is_dir():
continue

suffix = path.suffix.lower().lstrip(".")
if suffix in self.library.ignored_extensions:
if "$RECYCLE.BIN" in str_path or TS_FOLDER_NAME in str_path:
continue

self.dir_file_count += 1
relative_path = path.relative_to(lib_path)
if skip_suffix(path_item.suffix):
continue

dir_file_count += 1
relative_path = path_item.relative_to(lib_path)
# TODO - load these in batch somehow
if not self.library.has_path_entry(relative_path):
self.files_not_in_library.append(relative_path)

end_time = time.time()
# Yield output every 1/30 of a second
if (end_time - start_time) > 0.034:
yield self.dir_file_count
if (time() - start_time_loop) > 0.034:
yield dir_file_count
start_time_loop = time()

end_time_total = time()
logger.info(
"Directory scan time",
path=lib_path,
duration=(end_time_total - start_time_total),
new_files_count=dir_file_count,
)
2 changes: 1 addition & 1 deletion tagstudio/src/qt/ts_qt.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,7 +671,7 @@ def add_new_files_callback(self):
)
pw.show()

iterator = FunctionIterator(tracker.refresh_dir)
iterator = FunctionIterator(lambda: tracker.refresh_dir(self.lib.library_dir))
iterator.value.connect(
lambda x: (
pw.update_progress(x + 1),
Expand Down
4 changes: 2 additions & 2 deletions tagstudio/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def cwd():
@pytest.fixture
def library(request):
# when no param is passed, use the default
library_path = "/tmp/"
library_path = "/dev/null/"
if hasattr(request, "param"):
if isinstance(request.param, TemporaryDirectory):
library_path = request.param.name
Expand Down Expand Up @@ -55,7 +55,7 @@ def library(request):
# default item with deterministic name
entry = Entry(
folder=lib.folder,
path=pathlib.Path("foo.txt"),
path=pathlib.Path("foo.ext"),
fields=lib.default_fields,
)

Expand Down
6 changes: 3 additions & 3 deletions tagstudio/tests/fixtures/result.dupeguru
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
<results>
<group>
<file path="/tmp/bar/foo.txt" words="" is_ref="n" marked="n"/>
<file path="/tmp/foo.txt" words="" is_ref="n" marked="n"/>
<file path="/tmp/foo/foo.txt" words="" is_ref="n" marked="n"/>
<file path="/tmp/bar/foo.ext" words="" is_ref="n" marked="n"/>
<file path="/tmp/foo.ext" words="" is_ref="n" marked="n"/>
<file path="/tmp/foo/foo.ext" words="" is_ref="n" marked="n"/>
<match first="1" second="0" percentage="100"/>
<match first="0" second="2" percentage="100"/>
<match first="1" second="2" percentage="100"/>
Expand Down
11 changes: 6 additions & 5 deletions tagstudio/tests/macros/test_dupe_entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,16 @@


def test_refresh_dupe_files(library):
library.library_dir = "/tmp/"
entry = Entry(
folder=library.folder,
path=pathlib.Path("bar/foo.txt"),
path=pathlib.Path("bar/foo.ext"),
fields=library.default_fields,
)

entry2 = Entry(
folder=library.folder,
path=pathlib.Path("foo/foo.txt"),
path=pathlib.Path("foo/foo.ext"),
fields=library.default_fields,
)

Expand All @@ -29,7 +30,7 @@ def test_refresh_dupe_files(library):
assert len(registry.groups) == 1
paths = [entry.path for entry in registry.groups[0]]
assert paths == [
pathlib.Path("bar/foo.txt"),
pathlib.Path("foo.txt"),
pathlib.Path("foo/foo.txt"),
pathlib.Path("bar/foo.ext"),
pathlib.Path("foo.ext"),
pathlib.Path("foo/foo.ext"),
]
23 changes: 17 additions & 6 deletions tagstudio/tests/macros/test_refresh_dir.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,29 @@
from tempfile import TemporaryDirectory

import pytest

from src.core.constants import LibraryPrefs
from src.core.utils.refresh_dir import RefreshDirTracker

CWD = pathlib.Path(__file__).parent


@pytest.mark.parametrize("exclude_mode", [True, False])
@pytest.mark.parametrize("library", [TemporaryDirectory()], indirect=True)
def test_refresh_new_files(library):
def test_refresh_new_files(library, exclude_mode):
# Given
library.set_prefs(LibraryPrefs.IS_EXCLUDE_LIST, exclude_mode)
library.set_prefs(LibraryPrefs.EXTENSION_LIST, [".md"])
registry = RefreshDirTracker(library=library)
(library.library_dir / "FOO.MD").touch()

# touch new files to simulate new files
(library.library_dir / "foo.md").touch()

assert not list(registry.refresh_dir())
# When
assert not list(registry.refresh_dir(library.library_dir))

assert registry.files_not_in_library == [pathlib.Path("foo.md")]
# Then
if exclude_mode:
# .md is in the list & is_exclude_list is True - should not be registered
assert not registry.files_not_in_library
else:
# .md is in the list & is_exclude_list is False - should be registered
assert registry.files_not_in_library == [pathlib.Path("FOO.MD")]
3 changes: 3 additions & 0 deletions tagstudio/tests/qt/test_preview_panel.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from pathlib import Path
from tempfile import TemporaryDirectory

import pytest

from src.core.library import Entry
from src.core.library.alchemy.enums import FieldTypeEnum
Expand All @@ -18,6 +20,7 @@ def test_update_widgets_not_selected(qt_driver, library):
assert panel.file_label.text() == "No Items Selected"


@pytest.mark.parametrize("library", [TemporaryDirectory()], indirect=True)
def test_update_widgets_single_selected(qt_driver, library):
qt_driver.frame_content = list(library.get_entries())
qt_driver.selected = [0]
Expand Down
2 changes: 1 addition & 1 deletion tagstudio/tests/test_library.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def test_search_filter_extensions(library, is_exclude):
assert len(items) == 1

entry = items[0]
assert (entry.path.suffix == ".txt") == is_exclude
assert (entry.path.suffix == ".ext") == is_exclude


def test_search_library_case_insensitive(library):
Expand Down

0 comments on commit 6947026

Please sign in to comment.