mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 02:31:45 +01:00
fix: eliminate async safety violations and migrate to pathlib (#697)
## ℹ️ Description Eliminate all blocking I/O operations in async contexts and modernize file path handling by migrating from os.path to pathlib.Path. - Link to the related issue(s): #692 - Get rid of the TODO in pyproject.toml - The added debug logging will ease the troubleshooting for path related issues. ## 📋 Changes Summary - Enable ASYNC210, ASYNC230, ASYNC240, ASYNC250 Ruff rules - Wrap blocking urllib.request.urlopen() in run_in_executor - Wrap blocking file operations (open, write) in run_in_executor - Replace blocking os.path calls with async helpers using run_in_executor - Replace blocking input() with await ainput() - Migrate extract.py from os.path to pathlib.Path - Use Path() constructor and / operator for path joining - Use Path.mkdir(), Path.rename() in executor instead of os functions - Create mockable _path_exists() and _path_is_dir() helpers - Add debug logging for all file system operations ### ⚙️ Type of Change Select the type(s) of change(s) included in this pull request: - [X] 🐞 Bug fix (non-breaking change which fixes an issue) - [ ] ✨ New feature (adds new functionality without breaking existing usage) - [ ] 💥 Breaking change (changes that might break existing user setups, scripts, or configurations) ## ✅ Checklist Before requesting a review, confirm the following: - [X] I have reviewed my changes to ensure they meet the project's standards. - [X] I have tested my changes and ensured that all tests pass (`pdm run test`). - [X] I have formatted the code (`pdm run format`). - [X] I have verified that linting passes (`pdm run lint`). - [X] I have updated documentation where necessary. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **Refactor** * Made user prompt non‑blocking to improve responsiveness. * Converted filesystem/path handling and prefs I/O to async‑friendly operations; moved blocking network and file work to background tasks. * Added async file/path helpers and async port‑check before browser connections. * **Tests** * Expanded unit tests for path helpers, image download success/failure, prefs writing, and directory creation/renaming workflows. <sub>✏️ Tip: You can customize this high-level summary in your review settings.</sub> <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -25,7 +25,7 @@ from nodriver.core.element import Element
|
||||
from nodriver.core.tab import Tab as Page
|
||||
|
||||
from kleinanzeigen_bot.model.config_model import Config
|
||||
from kleinanzeigen_bot.utils import loggers
|
||||
from kleinanzeigen_bot.utils import files, loggers
|
||||
from kleinanzeigen_bot.utils.web_scraping_mixin import By, Is, WebScrapingMixin, _is_admin # noqa: PLC2701
|
||||
|
||||
|
||||
@@ -95,6 +95,30 @@ def web_scraper(mock_browser:AsyncMock, mock_page:TrulyAwaitableMockPage) -> Web
|
||||
return scraper
|
||||
|
||||
|
||||
def test_write_initial_prefs(tmp_path:Path) -> None:
|
||||
"""Test _write_initial_prefs helper function."""
|
||||
from kleinanzeigen_bot.utils.web_scraping_mixin import _write_initial_prefs # noqa: PLC0415, PLC2701
|
||||
|
||||
prefs_file = tmp_path / "Preferences"
|
||||
_write_initial_prefs(str(prefs_file))
|
||||
|
||||
# Verify file was created
|
||||
assert prefs_file.exists()
|
||||
|
||||
# Verify content is valid JSON with expected structure
|
||||
with open(prefs_file, encoding = "UTF-8") as f:
|
||||
prefs = json.load(f)
|
||||
|
||||
assert prefs["credentials_enable_service"] is False
|
||||
assert prefs["enable_do_not_track"] is True
|
||||
assert prefs["google"]["services"]["consented_to_sync"] is False
|
||||
assert prefs["profile"]["password_manager_enabled"] is False
|
||||
assert prefs["profile"]["default_content_setting_values"]["notifications"] == 2
|
||||
assert prefs["signin"]["allowed"] is False
|
||||
assert "www.kleinanzeigen.de" in prefs["translate_site_blacklist"]
|
||||
assert prefs["devtools"]["preferences"]["currentDockState"] == '"bottom"'
|
||||
|
||||
|
||||
class TestWebScrapingErrorHandling:
|
||||
"""Test error handling scenarios in WebScrapingMixin."""
|
||||
|
||||
@@ -728,7 +752,7 @@ class TestWebScrapingBrowserConfiguration:
|
||||
chrome_path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
|
||||
real_exists = os.path.exists
|
||||
|
||||
def mock_exists(path:str) -> bool:
|
||||
def mock_exists_sync(path:str) -> bool:
|
||||
# Handle all browser paths
|
||||
if path in {
|
||||
# Linux paths
|
||||
@@ -754,7 +778,12 @@ class TestWebScrapingBrowserConfiguration:
|
||||
if "Preferences" in str(path) and str(tmp_path) in str(path):
|
||||
return real_exists(path)
|
||||
return False
|
||||
monkeypatch.setattr(os.path, "exists", mock_exists)
|
||||
|
||||
async def mock_exists_async(path:str | Path) -> bool:
|
||||
return mock_exists_sync(str(path))
|
||||
|
||||
monkeypatch.setattr(os.path, "exists", mock_exists_sync)
|
||||
monkeypatch.setattr(files, "exists", mock_exists_async)
|
||||
|
||||
# Create test profile directory
|
||||
profile_dir = tmp_path / "Default"
|
||||
@@ -762,8 +791,7 @@ class TestWebScrapingBrowserConfiguration:
|
||||
prefs_file = profile_dir / "Preferences"
|
||||
|
||||
# Test with existing preferences file
|
||||
with open(prefs_file, "w", encoding = "UTF-8") as f:
|
||||
json.dump({"existing": "prefs"}, f)
|
||||
prefs_file.write_text(json.dumps({"existing": "prefs"}), encoding = "UTF-8")
|
||||
|
||||
scraper = WebScrapingMixin()
|
||||
scraper.browser_config.user_data_dir = str(tmp_path)
|
||||
@@ -771,22 +799,20 @@ class TestWebScrapingBrowserConfiguration:
|
||||
await scraper.create_browser_session()
|
||||
|
||||
# Verify preferences file was not overwritten
|
||||
with open(prefs_file, "r", encoding = "UTF-8") as f:
|
||||
prefs = json.load(f)
|
||||
assert prefs["existing"] == "prefs"
|
||||
prefs = json.loads(prefs_file.read_text(encoding = "UTF-8"))
|
||||
assert prefs["existing"] == "prefs"
|
||||
|
||||
# Test with missing preferences file
|
||||
prefs_file.unlink()
|
||||
await scraper.create_browser_session()
|
||||
|
||||
# Verify new preferences file was created with correct settings
|
||||
with open(prefs_file, "r", encoding = "UTF-8") as f:
|
||||
prefs = json.load(f)
|
||||
assert prefs["credentials_enable_service"] is False
|
||||
assert prefs["enable_do_not_track"] is True
|
||||
assert prefs["profile"]["password_manager_enabled"] is False
|
||||
assert prefs["signin"]["allowed"] is False
|
||||
assert "www.kleinanzeigen.de" in prefs["translate_site_blacklist"]
|
||||
prefs = json.loads(prefs_file.read_text(encoding = "UTF-8"))
|
||||
assert prefs["credentials_enable_service"] is False
|
||||
assert prefs["enable_do_not_track"] is True
|
||||
assert prefs["profile"]["password_manager_enabled"] is False
|
||||
assert prefs["signin"]["allowed"] is False
|
||||
assert "www.kleinanzeigen.de" in prefs["translate_site_blacklist"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_browser_arguments_configuration(self, tmp_path:Path, monkeypatch:pytest.MonkeyPatch) -> None:
|
||||
@@ -815,6 +841,10 @@ class TestWebScrapingBrowserConfiguration:
|
||||
# Mock os.path.exists to return True for both Chrome and Edge paths
|
||||
monkeypatch.setattr(os.path, "exists", lambda p: p in {"/usr/bin/chrome", "/usr/bin/edge"})
|
||||
|
||||
async def mock_exists_async(path:str | Path) -> bool:
|
||||
return str(path) in {"/usr/bin/chrome", "/usr/bin/edge"}
|
||||
monkeypatch.setattr(files, "exists", mock_exists_async)
|
||||
|
||||
# Test with custom arguments
|
||||
scraper = WebScrapingMixin()
|
||||
scraper.browser_config.arguments = ["--custom-arg=value", "--another-arg"]
|
||||
@@ -875,27 +905,41 @@ class TestWebScrapingBrowserConfiguration:
|
||||
# Mock Config class
|
||||
monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue,attr-defined]
|
||||
|
||||
# Mock os.path.exists to return True for browser binaries and extension files, real_exists for others
|
||||
real_exists = os.path.exists
|
||||
monkeypatch.setattr(
|
||||
os.path,
|
||||
"exists",
|
||||
lambda p: p in {"/usr/bin/chrome", "/usr/bin/edge", str(ext1), str(ext2)} or real_exists(p),
|
||||
)
|
||||
# Mock files.exists and files.is_dir to return appropriate values
|
||||
async def mock_exists(path:str | Path) -> bool:
|
||||
path_str = str(path)
|
||||
# Resolve real paths to handle symlinks (e.g., /var -> /private/var on macOS)
|
||||
real_path = str(Path(path_str).resolve()) # noqa: ASYNC240 Test mock, runs synchronously
|
||||
real_ext1 = str(Path(ext1).resolve()) # noqa: ASYNC240 Test mock, runs synchronously
|
||||
real_ext2 = str(Path(ext2).resolve()) # noqa: ASYNC240 Test mock, runs synchronously
|
||||
return path_str in {"/usr/bin/chrome", "/usr/bin/edge"} or real_path in {real_ext1, real_ext2} or os.path.exists(path_str) # noqa: ASYNC240
|
||||
|
||||
async def mock_is_dir(path:str | Path) -> bool:
|
||||
path_str = str(path)
|
||||
# Resolve real paths to handle symlinks
|
||||
real_path = str(Path(path_str).resolve()) # noqa: ASYNC240 Test mock, runs synchronously
|
||||
real_ext1 = str(Path(ext1).resolve()) # noqa: ASYNC240 Test mock, runs synchronously
|
||||
real_ext2 = str(Path(ext2).resolve()) # noqa: ASYNC240 Test mock, runs synchronously
|
||||
# Nodriver extracts CRX files to temp directories, so they appear as directories
|
||||
if real_path in {real_ext1, real_ext2}:
|
||||
return True
|
||||
return Path(path_str).is_dir() # noqa: ASYNC240 Test mock, runs synchronously
|
||||
|
||||
monkeypatch.setattr(files, "exists", mock_exists)
|
||||
monkeypatch.setattr(files, "is_dir", mock_is_dir)
|
||||
|
||||
# Test extension loading
|
||||
scraper = WebScrapingMixin()
|
||||
scraper.browser_config.extensions = [str(ext1), str(ext2)]
|
||||
scraper.browser_config.binary_location = "/usr/bin/chrome"
|
||||
# Removed monkeypatch for os.path.exists so extension files are detected
|
||||
await scraper.create_browser_session()
|
||||
|
||||
# Verify extensions were loaded
|
||||
config = _nodriver_start_mock().call_args[0][0]
|
||||
assert len(config._extensions) == 2
|
||||
for ext_path in config._extensions:
|
||||
assert os.path.exists(ext_path)
|
||||
assert os.path.isdir(ext_path)
|
||||
assert await files.exists(ext_path)
|
||||
assert await files.is_dir(ext_path)
|
||||
|
||||
# Test with non-existent extension
|
||||
scraper.browser_config.extensions = ["non_existent.crx"]
|
||||
@@ -976,8 +1020,7 @@ class TestWebScrapingBrowserConfiguration:
|
||||
scraper.browser_config.user_data_dir = str(tmp_path)
|
||||
scraper.browser_config.profile_name = "Default"
|
||||
await scraper.create_browser_session()
|
||||
with open(state_file, "w", encoding = "utf-8") as f:
|
||||
f.write('{"foo": "bar"}')
|
||||
state_file.write_text('{"foo": "bar"}', encoding = "utf-8")
|
||||
scraper.browser._process_pid = 12345
|
||||
scraper.browser.stop = MagicMock()
|
||||
with patch("psutil.Process") as mock_proc:
|
||||
@@ -989,8 +1032,7 @@ class TestWebScrapingBrowserConfiguration:
|
||||
scraper2.browser_config.user_data_dir = str(tmp_path)
|
||||
scraper2.browser_config.profile_name = "Default"
|
||||
await scraper2.create_browser_session()
|
||||
with open(state_file, "r", encoding = "utf-8") as f:
|
||||
data = f.read()
|
||||
data = state_file.read_text(encoding = "utf-8")
|
||||
assert data == '{"foo": "bar"}'
|
||||
scraper2.browser._process_pid = 12346
|
||||
scraper2.browser.stop = MagicMock()
|
||||
@@ -1814,6 +1856,7 @@ class TestWebScrapingMixinPortRetry:
|
||||
) -> None:
|
||||
"""Test error handling when browser connection fails."""
|
||||
with patch("os.path.exists", return_value = True), \
|
||||
patch("kleinanzeigen_bot.utils.web_scraping_mixin.files.exists", AsyncMock(return_value = True)), \
|
||||
patch("kleinanzeigen_bot.utils.web_scraping_mixin.net.is_port_open", return_value = True), \
|
||||
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Failed to connect as root user")), \
|
||||
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class:
|
||||
@@ -1833,6 +1876,7 @@ class TestWebScrapingMixinPortRetry:
|
||||
) -> None:
|
||||
"""Test error handling when browser connection fails with non-root error."""
|
||||
with patch("os.path.exists", return_value = True), \
|
||||
patch("kleinanzeigen_bot.utils.web_scraping_mixin.files.exists", AsyncMock(return_value = True)), \
|
||||
patch("kleinanzeigen_bot.utils.web_scraping_mixin.net.is_port_open", return_value = True), \
|
||||
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Connection timeout")), \
|
||||
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class:
|
||||
@@ -1860,6 +1904,7 @@ class TestWebScrapingMixinPortRetry:
|
||||
) -> None:
|
||||
"""Test error handling when browser startup fails with root error."""
|
||||
with patch("os.path.exists", return_value = True), \
|
||||
patch("kleinanzeigen_bot.utils.web_scraping_mixin.files.exists", AsyncMock(return_value = True)), \
|
||||
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Failed to start as root user")), \
|
||||
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class:
|
||||
|
||||
@@ -1878,6 +1923,7 @@ class TestWebScrapingMixinPortRetry:
|
||||
) -> None:
|
||||
"""Test error handling when browser startup fails with non-root error."""
|
||||
with patch("os.path.exists", return_value = True), \
|
||||
patch("kleinanzeigen_bot.utils.web_scraping_mixin.files.exists", AsyncMock(return_value = True)), \
|
||||
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Browser binary not found")), \
|
||||
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class:
|
||||
|
||||
|
||||
Reference in New Issue
Block a user