fix: eliminate async safety violations and migrate to pathlib (#697)

## ℹ️ Description
Eliminate all blocking I/O operations in async contexts and modernize
file path handling by migrating from os.path to pathlib.Path.

- Link to the related issue(s): #692 
- Get rid of the TODO in pyproject.toml
- The added debug logging will ease the troubleshooting for path related
issues.

## 📋 Changes Summary

- Enable ASYNC210, ASYNC230, ASYNC240, ASYNC250 Ruff rules
- Wrap blocking urllib.request.urlopen() in run_in_executor
- Wrap blocking file operations (open, write) in run_in_executor
- Replace blocking os.path calls with async helpers using
run_in_executor
- Replace blocking input() with await ainput()
- Migrate extract.py from os.path to pathlib.Path
- Use Path() constructor and / operator for path joining
- Use Path.mkdir(), Path.rename() in executor instead of os functions
- Create mockable _path_exists() and _path_is_dir() helpers
- Add debug logging for all file system operations

### ⚙️ Type of Change
Select the type(s) of change(s) included in this pull request:
- [X] 🐞 Bug fix (non-breaking change which fixes an issue)
- [ ]  New feature (adds new functionality without breaking existing
usage)
- [ ] 💥 Breaking change (changes that might break existing user setups,
scripts, or configurations)


##  Checklist
Before requesting a review, confirm the following:
- [X] I have reviewed my changes to ensure they meet the project's
standards.
- [X] I have tested my changes and ensured that all tests pass (`pdm run
test`).
- [X] I have formatted the code (`pdm run format`).
- [X] I have verified that linting passes (`pdm run lint`).
- [X] I have updated documentation where necessary.

By submitting this pull request, I confirm that you can use, modify,
copy, and redistribute this contribution, under the terms of your
choice.


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **Refactor**
  * Made user prompt non‑blocking to improve responsiveness.
* Converted filesystem/path handling and prefs I/O to async‑friendly
operations; moved blocking network and file work to background tasks.
* Added async file/path helpers and async port‑check before browser
connections.

* **Tests**
* Expanded unit tests for path helpers, image download success/failure,
prefs writing, and directory creation/renaming workflows.

<sub>✏️ Tip: You can customize this high-level summary in your review
settings.</sub>
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
Jens
2025-12-05 20:53:40 +01:00
committed by GitHub
parent 6cbc25b54c
commit 220c01f257
9 changed files with 527 additions and 303 deletions

View File

@@ -25,7 +25,7 @@ from nodriver.core.element import Element
from nodriver.core.tab import Tab as Page
from kleinanzeigen_bot.model.config_model import Config
from kleinanzeigen_bot.utils import loggers
from kleinanzeigen_bot.utils import files, loggers
from kleinanzeigen_bot.utils.web_scraping_mixin import By, Is, WebScrapingMixin, _is_admin # noqa: PLC2701
@@ -95,6 +95,30 @@ def web_scraper(mock_browser:AsyncMock, mock_page:TrulyAwaitableMockPage) -> Web
return scraper
def test_write_initial_prefs(tmp_path:Path) -> None:
"""Test _write_initial_prefs helper function."""
from kleinanzeigen_bot.utils.web_scraping_mixin import _write_initial_prefs # noqa: PLC0415, PLC2701
prefs_file = tmp_path / "Preferences"
_write_initial_prefs(str(prefs_file))
# Verify file was created
assert prefs_file.exists()
# Verify content is valid JSON with expected structure
with open(prefs_file, encoding = "UTF-8") as f:
prefs = json.load(f)
assert prefs["credentials_enable_service"] is False
assert prefs["enable_do_not_track"] is True
assert prefs["google"]["services"]["consented_to_sync"] is False
assert prefs["profile"]["password_manager_enabled"] is False
assert prefs["profile"]["default_content_setting_values"]["notifications"] == 2
assert prefs["signin"]["allowed"] is False
assert "www.kleinanzeigen.de" in prefs["translate_site_blacklist"]
assert prefs["devtools"]["preferences"]["currentDockState"] == '"bottom"'
class TestWebScrapingErrorHandling:
"""Test error handling scenarios in WebScrapingMixin."""
@@ -728,7 +752,7 @@ class TestWebScrapingBrowserConfiguration:
chrome_path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
real_exists = os.path.exists
def mock_exists(path:str) -> bool:
def mock_exists_sync(path:str) -> bool:
# Handle all browser paths
if path in {
# Linux paths
@@ -754,7 +778,12 @@ class TestWebScrapingBrowserConfiguration:
if "Preferences" in str(path) and str(tmp_path) in str(path):
return real_exists(path)
return False
monkeypatch.setattr(os.path, "exists", mock_exists)
async def mock_exists_async(path:str | Path) -> bool:
return mock_exists_sync(str(path))
monkeypatch.setattr(os.path, "exists", mock_exists_sync)
monkeypatch.setattr(files, "exists", mock_exists_async)
# Create test profile directory
profile_dir = tmp_path / "Default"
@@ -762,8 +791,7 @@ class TestWebScrapingBrowserConfiguration:
prefs_file = profile_dir / "Preferences"
# Test with existing preferences file
with open(prefs_file, "w", encoding = "UTF-8") as f:
json.dump({"existing": "prefs"}, f)
prefs_file.write_text(json.dumps({"existing": "prefs"}), encoding = "UTF-8")
scraper = WebScrapingMixin()
scraper.browser_config.user_data_dir = str(tmp_path)
@@ -771,22 +799,20 @@ class TestWebScrapingBrowserConfiguration:
await scraper.create_browser_session()
# Verify preferences file was not overwritten
with open(prefs_file, "r", encoding = "UTF-8") as f:
prefs = json.load(f)
assert prefs["existing"] == "prefs"
prefs = json.loads(prefs_file.read_text(encoding = "UTF-8"))
assert prefs["existing"] == "prefs"
# Test with missing preferences file
prefs_file.unlink()
await scraper.create_browser_session()
# Verify new preferences file was created with correct settings
with open(prefs_file, "r", encoding = "UTF-8") as f:
prefs = json.load(f)
assert prefs["credentials_enable_service"] is False
assert prefs["enable_do_not_track"] is True
assert prefs["profile"]["password_manager_enabled"] is False
assert prefs["signin"]["allowed"] is False
assert "www.kleinanzeigen.de" in prefs["translate_site_blacklist"]
prefs = json.loads(prefs_file.read_text(encoding = "UTF-8"))
assert prefs["credentials_enable_service"] is False
assert prefs["enable_do_not_track"] is True
assert prefs["profile"]["password_manager_enabled"] is False
assert prefs["signin"]["allowed"] is False
assert "www.kleinanzeigen.de" in prefs["translate_site_blacklist"]
@pytest.mark.asyncio
async def test_browser_arguments_configuration(self, tmp_path:Path, monkeypatch:pytest.MonkeyPatch) -> None:
@@ -815,6 +841,10 @@ class TestWebScrapingBrowserConfiguration:
# Mock os.path.exists to return True for both Chrome and Edge paths
monkeypatch.setattr(os.path, "exists", lambda p: p in {"/usr/bin/chrome", "/usr/bin/edge"})
async def mock_exists_async(path:str | Path) -> bool:
return str(path) in {"/usr/bin/chrome", "/usr/bin/edge"}
monkeypatch.setattr(files, "exists", mock_exists_async)
# Test with custom arguments
scraper = WebScrapingMixin()
scraper.browser_config.arguments = ["--custom-arg=value", "--another-arg"]
@@ -875,27 +905,41 @@ class TestWebScrapingBrowserConfiguration:
# Mock Config class
monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue,attr-defined]
# Mock os.path.exists to return True for browser binaries and extension files, real_exists for others
real_exists = os.path.exists
monkeypatch.setattr(
os.path,
"exists",
lambda p: p in {"/usr/bin/chrome", "/usr/bin/edge", str(ext1), str(ext2)} or real_exists(p),
)
# Mock files.exists and files.is_dir to return appropriate values
async def mock_exists(path:str | Path) -> bool:
path_str = str(path)
# Resolve real paths to handle symlinks (e.g., /var -> /private/var on macOS)
real_path = str(Path(path_str).resolve()) # noqa: ASYNC240 Test mock, runs synchronously
real_ext1 = str(Path(ext1).resolve()) # noqa: ASYNC240 Test mock, runs synchronously
real_ext2 = str(Path(ext2).resolve()) # noqa: ASYNC240 Test mock, runs synchronously
return path_str in {"/usr/bin/chrome", "/usr/bin/edge"} or real_path in {real_ext1, real_ext2} or os.path.exists(path_str) # noqa: ASYNC240
async def mock_is_dir(path:str | Path) -> bool:
path_str = str(path)
# Resolve real paths to handle symlinks
real_path = str(Path(path_str).resolve()) # noqa: ASYNC240 Test mock, runs synchronously
real_ext1 = str(Path(ext1).resolve()) # noqa: ASYNC240 Test mock, runs synchronously
real_ext2 = str(Path(ext2).resolve()) # noqa: ASYNC240 Test mock, runs synchronously
# Nodriver extracts CRX files to temp directories, so they appear as directories
if real_path in {real_ext1, real_ext2}:
return True
return Path(path_str).is_dir() # noqa: ASYNC240 Test mock, runs synchronously
monkeypatch.setattr(files, "exists", mock_exists)
monkeypatch.setattr(files, "is_dir", mock_is_dir)
# Test extension loading
scraper = WebScrapingMixin()
scraper.browser_config.extensions = [str(ext1), str(ext2)]
scraper.browser_config.binary_location = "/usr/bin/chrome"
# Removed monkeypatch for os.path.exists so extension files are detected
await scraper.create_browser_session()
# Verify extensions were loaded
config = _nodriver_start_mock().call_args[0][0]
assert len(config._extensions) == 2
for ext_path in config._extensions:
assert os.path.exists(ext_path)
assert os.path.isdir(ext_path)
assert await files.exists(ext_path)
assert await files.is_dir(ext_path)
# Test with non-existent extension
scraper.browser_config.extensions = ["non_existent.crx"]
@@ -976,8 +1020,7 @@ class TestWebScrapingBrowserConfiguration:
scraper.browser_config.user_data_dir = str(tmp_path)
scraper.browser_config.profile_name = "Default"
await scraper.create_browser_session()
with open(state_file, "w", encoding = "utf-8") as f:
f.write('{"foo": "bar"}')
state_file.write_text('{"foo": "bar"}', encoding = "utf-8")
scraper.browser._process_pid = 12345
scraper.browser.stop = MagicMock()
with patch("psutil.Process") as mock_proc:
@@ -989,8 +1032,7 @@ class TestWebScrapingBrowserConfiguration:
scraper2.browser_config.user_data_dir = str(tmp_path)
scraper2.browser_config.profile_name = "Default"
await scraper2.create_browser_session()
with open(state_file, "r", encoding = "utf-8") as f:
data = f.read()
data = state_file.read_text(encoding = "utf-8")
assert data == '{"foo": "bar"}'
scraper2.browser._process_pid = 12346
scraper2.browser.stop = MagicMock()
@@ -1814,6 +1856,7 @@ class TestWebScrapingMixinPortRetry:
) -> None:
"""Test error handling when browser connection fails."""
with patch("os.path.exists", return_value = True), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin.files.exists", AsyncMock(return_value = True)), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin.net.is_port_open", return_value = True), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Failed to connect as root user")), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class:
@@ -1833,6 +1876,7 @@ class TestWebScrapingMixinPortRetry:
) -> None:
"""Test error handling when browser connection fails with non-root error."""
with patch("os.path.exists", return_value = True), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin.files.exists", AsyncMock(return_value = True)), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin.net.is_port_open", return_value = True), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Connection timeout")), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class:
@@ -1860,6 +1904,7 @@ class TestWebScrapingMixinPortRetry:
) -> None:
"""Test error handling when browser startup fails with root error."""
with patch("os.path.exists", return_value = True), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin.files.exists", AsyncMock(return_value = True)), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Failed to start as root user")), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class:
@@ -1878,6 +1923,7 @@ class TestWebScrapingMixinPortRetry:
) -> None:
"""Test error handling when browser startup fails with non-root error."""
with patch("os.path.exists", return_value = True), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin.files.exists", AsyncMock(return_value = True)), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Browser binary not found")), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class: