mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 10:31:50 +01:00
fix: eliminate async safety violations and migrate to pathlib (#697)
## ℹ️ Description Eliminate all blocking I/O operations in async contexts and modernize file path handling by migrating from os.path to pathlib.Path. - Link to the related issue(s): #692 - Get rid of the TODO in pyproject.toml - The added debug logging will ease the troubleshooting for path related issues. ## 📋 Changes Summary - Enable ASYNC210, ASYNC230, ASYNC240, ASYNC250 Ruff rules - Wrap blocking urllib.request.urlopen() in run_in_executor - Wrap blocking file operations (open, write) in run_in_executor - Replace blocking os.path calls with async helpers using run_in_executor - Replace blocking input() with await ainput() - Migrate extract.py from os.path to pathlib.Path - Use Path() constructor and / operator for path joining - Use Path.mkdir(), Path.rename() in executor instead of os functions - Create mockable _path_exists() and _path_is_dir() helpers - Add debug logging for all file system operations ### ⚙️ Type of Change Select the type(s) of change(s) included in this pull request: - [X] 🐞 Bug fix (non-breaking change which fixes an issue) - [ ] ✨ New feature (adds new functionality without breaking existing usage) - [ ] 💥 Breaking change (changes that might break existing user setups, scripts, or configurations) ## ✅ Checklist Before requesting a review, confirm the following: - [X] I have reviewed my changes to ensure they meet the project's standards. - [X] I have tested my changes and ensured that all tests pass (`pdm run test`). - [X] I have formatted the code (`pdm run format`). - [X] I have verified that linting passes (`pdm run lint`). - [X] I have updated documentation where necessary. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **Refactor** * Made user prompt non‑blocking to improve responsiveness. * Converted filesystem/path handling and prefs I/O to async‑friendly operations; moved blocking network and file work to background tasks. * Added async file/path helpers and async port‑check before browser connections. * **Tests** * Expanded unit tests for path helpers, image download success/failure, prefs writing, and directory creation/renaming workflows. <sub>✏️ Tip: You can customize this high-level summary in your review settings.</sub> <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -1,7 +1,8 @@
|
||||
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||
import os
|
||||
import asyncio, os # isort: skip
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def abspath(relative_path:str, relative_to:str | None = None) -> str:
|
||||
@@ -24,3 +25,23 @@ def abspath(relative_path:str, relative_to:str | None = None) -> str:
|
||||
base = os.path.dirname(base)
|
||||
|
||||
return os.path.normpath(os.path.join(base, relative_path))
|
||||
|
||||
|
||||
async def exists(path:str | Path) -> bool:
|
||||
"""
|
||||
Asynchronously check if a file or directory exists.
|
||||
|
||||
:param path: Path to check
|
||||
:return: True if path exists, False otherwise
|
||||
"""
|
||||
return await asyncio.get_running_loop().run_in_executor(None, Path(path).exists)
|
||||
|
||||
|
||||
async def is_dir(path:str | Path) -> bool:
|
||||
"""
|
||||
Asynchronously check if a path is a directory.
|
||||
|
||||
:param path: Path to check
|
||||
:return: True if path is a directory, False otherwise
|
||||
"""
|
||||
return await asyncio.get_running_loop().run_in_executor(None, Path(path).is_dir)
|
||||
|
||||
@@ -22,7 +22,7 @@ from nodriver.core.tab import Tab as Page
|
||||
from kleinanzeigen_bot.model.config_model import Config as BotConfig
|
||||
from kleinanzeigen_bot.model.config_model import TimeoutConfig
|
||||
|
||||
from . import loggers, net
|
||||
from . import files, loggers, net
|
||||
from .chrome_version_detector import (
|
||||
ChromeVersionInfo,
|
||||
detect_chrome_version_from_binary,
|
||||
@@ -100,6 +100,37 @@ class BrowserConfig:
|
||||
self.profile_name:str | None = None
|
||||
|
||||
|
||||
def _write_initial_prefs(prefs_file:str) -> None:
|
||||
with open(prefs_file, "w", encoding = "UTF-8") as fd:
|
||||
json.dump({
|
||||
"credentials_enable_service": False,
|
||||
"enable_do_not_track": True,
|
||||
"google": {
|
||||
"services": {
|
||||
"consented_to_sync": False
|
||||
}
|
||||
},
|
||||
"profile": {
|
||||
"default_content_setting_values": {
|
||||
"popups": 0,
|
||||
"notifications": 2 # 1 = allow, 2 = block browser notifications
|
||||
},
|
||||
"password_manager_enabled": False
|
||||
},
|
||||
"signin": {
|
||||
"allowed": False
|
||||
},
|
||||
"translate_site_blacklist": [
|
||||
"www.kleinanzeigen.de"
|
||||
],
|
||||
"devtools": {
|
||||
"preferences": {
|
||||
"currentDockState": '"bottom"'
|
||||
}
|
||||
}
|
||||
}, fd)
|
||||
|
||||
|
||||
class WebScrapingMixin:
|
||||
|
||||
def __init__(self) -> None:
|
||||
@@ -174,7 +205,7 @@ class WebScrapingMixin:
|
||||
LOG.info("Creating Browser session...")
|
||||
|
||||
if self.browser_config.binary_location:
|
||||
ensure(os.path.exists(self.browser_config.binary_location), f"Specified browser binary [{self.browser_config.binary_location}] does not exist.")
|
||||
ensure(await files.exists(self.browser_config.binary_location), f"Specified browser binary [{self.browser_config.binary_location}] does not exist.")
|
||||
else:
|
||||
self.browser_config.binary_location = self.get_compatible_browser()
|
||||
LOG.info(" -> Browser binary location: %s", self.browser_config.binary_location)
|
||||
@@ -289,41 +320,14 @@ class WebScrapingMixin:
|
||||
profile_dir = os.path.join(cfg.user_data_dir, self.browser_config.profile_name or "Default")
|
||||
os.makedirs(profile_dir, exist_ok = True)
|
||||
prefs_file = os.path.join(profile_dir, "Preferences")
|
||||
if not os.path.exists(prefs_file):
|
||||
if not await files.exists(prefs_file):
|
||||
LOG.info(" -> Setting chrome prefs [%s]...", prefs_file)
|
||||
with open(prefs_file, "w", encoding = "UTF-8") as fd:
|
||||
json.dump({
|
||||
"credentials_enable_service": False,
|
||||
"enable_do_not_track": True,
|
||||
"google": {
|
||||
"services": {
|
||||
"consented_to_sync": False
|
||||
}
|
||||
},
|
||||
"profile": {
|
||||
"default_content_setting_values": {
|
||||
"popups": 0,
|
||||
"notifications": 2 # 1 = allow, 2 = block browser notifications
|
||||
},
|
||||
"password_manager_enabled": False
|
||||
},
|
||||
"signin": {
|
||||
"allowed": False
|
||||
},
|
||||
"translate_site_blacklist": [
|
||||
"www.kleinanzeigen.de"
|
||||
],
|
||||
"devtools": {
|
||||
"preferences": {
|
||||
"currentDockState": '"bottom"'
|
||||
}
|
||||
}
|
||||
}, fd)
|
||||
await asyncio.get_running_loop().run_in_executor(None, _write_initial_prefs, prefs_file)
|
||||
|
||||
# load extensions
|
||||
for crx_extension in self.browser_config.extensions:
|
||||
LOG.info(" -> Adding Browser extension: [%s]", crx_extension)
|
||||
ensure(os.path.exists(crx_extension), f"Configured extension-file [{crx_extension}] does not exist.")
|
||||
ensure(await files.exists(crx_extension), f"Configured extension-file [{crx_extension}] does not exist.")
|
||||
cfg.add_extension(crx_extension)
|
||||
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user