diff --git a/tagstudio/src/core/library/alchemy/library.py b/tagstudio/src/core/library/alchemy/library.py index e57e35174..263bcb5ce 100644 --- a/tagstudio/src/core/library/alchemy/library.py +++ b/tagstudio/src/core/library/alchemy/library.py @@ -25,7 +25,6 @@ selectinload, make_transient, ) -from typing import TYPE_CHECKING from .db import make_tables from .enums import TagColor, FilterState, FieldTypeEnum @@ -46,10 +45,6 @@ BACKUP_FOLDER_NAME, ) -if TYPE_CHECKING: - from ...utils.dupe_files import DupeRegistry - from ...utils.missing_files import MissingRegistry - LIBRARY_FILENAME: str = "ts_library.sqlite" logger = structlog.get_logger(__name__) @@ -100,11 +95,6 @@ class Library: engine: Engine | None folder: Folder | None - ignored_extensions: list[str] - - missing_tracker: "MissingRegistry" - dupe_tracker: "DupeRegistry" - def close(self): if self.engine: self.engine.dispose() @@ -182,9 +172,6 @@ def open_library( session.commit() self.folder = folder - # load ignored extensions - self.ignored_extensions = self.prefs(LibraryPrefs.EXTENSION_LIST) - @property def default_fields(self) -> list[BaseField]: with Session(self.engine) as session: diff --git a/tagstudio/src/core/utils/refresh_dir.py b/tagstudio/src/core/utils/refresh_dir.py index b265f3953..7e2224e32 100644 --- a/tagstudio/src/core/utils/refresh_dir.py +++ b/tagstudio/src/core/utils/refresh_dir.py @@ -1,16 +1,19 @@ -import time +from time import time from collections.abc import Iterator from dataclasses import dataclass, field from pathlib import Path -from src.core.constants import TS_FOLDER_NAME +import structlog + +from src.core.constants import TS_FOLDER_NAME, LibraryPrefs from src.core.library import Library, Entry +logger = structlog.get_logger(__name__) + @dataclass class RefreshDirTracker: library: Library - dir_file_count: int = 0 files_not_in_library: list[Path] = field(default_factory=list) @property @@ -36,38 +39,57 @@ def save_new_files(self) -> Iterator[int]: self.files_not_in_library = [] - def refresh_dir(self) -> Iterator[int]: + def refresh_dir(self, lib_path: Path) -> Iterator[int]: """Scan a directory for files, and add those relative filenames to internal variables.""" - if self.library.folder is None: - raise ValueError("No folder set.") + if self.library.library_dir is None: + raise ValueError("No library directory set.") + + is_exclude_list = self.library.prefs(LibraryPrefs.IS_EXCLUDE_LIST) + exclude_list = set(self.library.prefs(LibraryPrefs.EXTENSION_LIST)) + + def skip_suffix(suffix: str) -> bool: + """Determine if the file extension should be skipped. + + Declared as local function as it's faster. + + - check if the suffix is in the library's "exclude list" + - if library uses "exclude mode", and extensions is in the list, we skip + - if library uses "include mode", and extensions is not in the list, we skip + """ + return (suffix.lower() in exclude_list) == is_exclude_list + + start_time_total = time() + start_time_loop = time() - start_time = time.time() self.files_not_in_library = [] - self.dir_file_count = 0 - - lib_path = self.library.folder.path - - for path in lib_path.glob("**/*"): - str_path = str(path) - if ( - path.is_dir() - or "$RECYCLE.BIN" in str_path - or TS_FOLDER_NAME in str_path - or "tagstudio_thumbs" in str_path - ): + dir_file_count = 0 + + for path_item in lib_path.glob("**/*"): + str_path = str(path_item) + if path_item.is_dir(): continue - suffix = path.suffix.lower().lstrip(".") - if suffix in self.library.ignored_extensions: + if "$RECYCLE.BIN" in str_path or TS_FOLDER_NAME in str_path: continue - self.dir_file_count += 1 - relative_path = path.relative_to(lib_path) + if skip_suffix(path_item.suffix): + continue + + dir_file_count += 1 + relative_path = path_item.relative_to(lib_path) # TODO - load these in batch somehow if not self.library.has_path_entry(relative_path): self.files_not_in_library.append(relative_path) - end_time = time.time() # Yield output every 1/30 of a second - if (end_time - start_time) > 0.034: - yield self.dir_file_count + if (time() - start_time_loop) > 0.034: + yield dir_file_count + start_time_loop = time() + + end_time_total = time() + logger.info( + "Directory scan time", + path=lib_path, + duration=(end_time_total - start_time_total), + new_files_count=dir_file_count, + ) diff --git a/tagstudio/src/qt/ts_qt.py b/tagstudio/src/qt/ts_qt.py index d9ae4b7d8..0b215090a 100644 --- a/tagstudio/src/qt/ts_qt.py +++ b/tagstudio/src/qt/ts_qt.py @@ -675,7 +675,7 @@ def add_new_files_callback(self): ) pw.show() - iterator = FunctionIterator(tracker.refresh_dir) + iterator = FunctionIterator(lambda: tracker.refresh_dir(self.lib.library_dir)) iterator.value.connect( lambda x: ( pw.update_progress(x + 1), diff --git a/tagstudio/tests/conftest.py b/tagstudio/tests/conftest.py index b5899f8ec..2c299739f 100644 --- a/tagstudio/tests/conftest.py +++ b/tagstudio/tests/conftest.py @@ -24,7 +24,7 @@ def cwd(): @pytest.fixture def library(request): # when no param is passed, use the default - library_path = "/tmp/" + library_path = "/dev/null/" if hasattr(request, "param"): if isinstance(request.param, TemporaryDirectory): library_path = request.param.name diff --git a/tagstudio/tests/macros/test_dupe_entries.py b/tagstudio/tests/macros/test_dupe_entries.py index 2272e1fcd..711391988 100644 --- a/tagstudio/tests/macros/test_dupe_entries.py +++ b/tagstudio/tests/macros/test_dupe_entries.py @@ -7,6 +7,7 @@ def test_refresh_dupe_files(library): + library.library_dir = "/tmp/" entry = Entry( folder=library.folder, path=pathlib.Path("bar/foo.txt"), diff --git a/tagstudio/tests/macros/test_refresh_dir.py b/tagstudio/tests/macros/test_refresh_dir.py index f72572de1..8c4864ff9 100644 --- a/tagstudio/tests/macros/test_refresh_dir.py +++ b/tagstudio/tests/macros/test_refresh_dir.py @@ -2,18 +2,29 @@ from tempfile import TemporaryDirectory import pytest + +from src.core.constants import LibraryPrefs from src.core.utils.refresh_dir import RefreshDirTracker CWD = pathlib.Path(__file__).parent +@pytest.mark.parametrize("exclude_mode", [True, False]) @pytest.mark.parametrize("library", [TemporaryDirectory()], indirect=True) -def test_refresh_new_files(library): +def test_refresh_new_files(library, exclude_mode): + # Given + library.set_prefs(LibraryPrefs.IS_EXCLUDE_LIST, exclude_mode) + library.set_prefs(LibraryPrefs.EXTENSION_LIST, [".md"]) registry = RefreshDirTracker(library=library) + (library.library_dir / "FOO.MD").touch() - # touch new files to simulate new files - (library.library_dir / "foo.md").touch() - - assert not list(registry.refresh_dir()) + # When + assert not list(registry.refresh_dir(library.library_dir)) - assert registry.files_not_in_library == [pathlib.Path("foo.md")] + # Then + if exclude_mode: + # .md is in the list & is_exclude_list is True - should not be registered + assert not registry.files_not_in_library + else: + # .md is in the list & is_exclude_list is False - should be registered + assert registry.files_not_in_library == [pathlib.Path("FOO.MD")] diff --git a/tagstudio/tests/qt/test_preview_panel.py b/tagstudio/tests/qt/test_preview_panel.py index 7d66aa8d7..044fd8c63 100644 --- a/tagstudio/tests/qt/test_preview_panel.py +++ b/tagstudio/tests/qt/test_preview_panel.py @@ -1,5 +1,7 @@ from pathlib import Path +from tempfile import TemporaryDirectory +import pytest from src.core.library import Entry from src.core.library.alchemy.enums import FieldTypeEnum @@ -18,6 +20,7 @@ def test_update_widgets_not_selected(qt_driver, library): assert panel.file_label.text() == "No Items Selected" +@pytest.mark.parametrize("library", [TemporaryDirectory()], indirect=True) def test_update_widgets_single_selected(qt_driver, library): qt_driver.frame_content = list(library.get_entries()) qt_driver.selected = [0]