fix: eliminate async safety violations and migrate to pathlib (#697)

## ℹ️ Description
Eliminate all blocking I/O operations in async contexts and modernize
file path handling by migrating from os.path to pathlib.Path.

- Link to the related issue(s): #692 
- Get rid of the TODO in pyproject.toml
- The added debug logging will ease the troubleshooting for path related
issues.

## 📋 Changes Summary

- Enable ASYNC210, ASYNC230, ASYNC240, ASYNC250 Ruff rules
- Wrap blocking urllib.request.urlopen() in run_in_executor
- Wrap blocking file operations (open, write) in run_in_executor
- Replace blocking os.path calls with async helpers using
run_in_executor
- Replace blocking input() with await ainput()
- Migrate extract.py from os.path to pathlib.Path
- Use Path() constructor and / operator for path joining
- Use Path.mkdir(), Path.rename() in executor instead of os functions
- Create mockable _path_exists() and _path_is_dir() helpers
- Add debug logging for all file system operations

### ⚙️ Type of Change
Select the type(s) of change(s) included in this pull request:
- [X] 🐞 Bug fix (non-breaking change which fixes an issue)
- [ ]  New feature (adds new functionality without breaking existing
usage)
- [ ] 💥 Breaking change (changes that might break existing user setups,
scripts, or configurations)


##  Checklist
Before requesting a review, confirm the following:
- [X] I have reviewed my changes to ensure they meet the project's
standards.
- [X] I have tested my changes and ensured that all tests pass (`pdm run
test`).
- [X] I have formatted the code (`pdm run format`).
- [X] I have verified that linting passes (`pdm run lint`).
- [X] I have updated documentation where necessary.

By submitting this pull request, I confirm that you can use, modify,
copy, and redistribute this contribution, under the terms of your
choice.


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **Refactor**
  * Made user prompt non‑blocking to improve responsiveness.
* Converted filesystem/path handling and prefs I/O to async‑friendly
operations; moved blocking network and file work to background tasks.
* Added async file/path helpers and async port‑check before browser
connections.

* **Tests**
* Expanded unit tests for path helpers, image download success/failure,
prefs writing, and directory creation/renaming workflows.

<sub>✏️ Tip: You can customize this high-level summary in your review
settings.</sub>
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
Jens
2025-12-05 20:53:40 +01:00
committed by GitHub
parent 6cbc25b54c
commit 220c01f257
9 changed files with 527 additions and 303 deletions

View File

@@ -226,10 +226,6 @@ select = [
] ]
ignore = [ ignore = [
"ANN401", # Dynamically typed expressions (typing.Any) are disallowed "ANN401", # Dynamically typed expressions (typing.Any) are disallowed
"ASYNC210", # TODO Async functions should not call blocking HTTP methods
"ASYNC230", # TODO Async functions should not open files with blocking methods like `open`
"ASYNC240", # TODO Async functions should not use os.path methods, use trio.Path or anyio.path
"ASYNC250", # TODO Blocking call to input() in async context
"COM812", # Trailing comma missing "COM812", # Trailing comma missing
"D1", # Missing docstring in ... "D1", # Missing docstring in ...
"D200", # One-line docstring should fit on one line "D200", # One-line docstring should fit on one line

View File

@@ -937,7 +937,7 @@ class KleinanzeigenBot(WebScrapingMixin):
LOG.warning("# Payment form detected! Please proceed with payment.") LOG.warning("# Payment form detected! Please proceed with payment.")
LOG.warning("############################################") LOG.warning("############################################")
await self.web_scroll_page_down() await self.web_scroll_page_down()
input(_("Press a key to continue...")) await ainput(_("Press a key to continue..."))
except TimeoutError: except TimeoutError:
pass pass
@@ -1108,7 +1108,7 @@ class KleinanzeigenBot(WebScrapingMixin):
# in some categories we need to go another dialog back # in some categories we need to go another dialog back
try: try:
await self.web_find(By.XPATH, '//dialog//button[contains(., "Andere Versandmethoden")]', await self.web_find(By.XPATH, '//dialog//button[contains(., "Andere Versandmethoden")]',
timeout=short_timeout) timeout = short_timeout)
except TimeoutError: except TimeoutError:
await self.web_click(By.XPATH, '//dialog//button[contains(., "Zurück")]') await self.web_click(By.XPATH, '//dialog//button[contains(., "Zurück")]')

View File

@@ -1,18 +1,21 @@
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors # SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ # SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
import asyncio
from gettext import gettext as _ from gettext import gettext as _
import json, mimetypes, os, re, shutil # isort: skip import json, mimetypes, re, shutil # isort: skip
import urllib.error as urllib_error
import urllib.request as urllib_request import urllib.request as urllib_request
from datetime import datetime from datetime import datetime
from pathlib import Path
from typing import Any, Final from typing import Any, Final
from kleinanzeigen_bot.model.ad_model import ContactPartial from kleinanzeigen_bot.model.ad_model import ContactPartial
from .model.ad_model import AdPartial from .model.ad_model import AdPartial
from .model.config_model import Config from .model.config_model import Config
from .utils import dicts, i18n, loggers, misc, reflect from .utils import dicts, files, i18n, loggers, misc, reflect
from .utils.web_scraping_mixin import Browser, By, Element, WebScrapingMixin from .utils.web_scraping_mixin import Browser, By, Element, WebScrapingMixin
__all__ = [ __all__ = [
@@ -44,23 +47,39 @@ class AdExtractor(WebScrapingMixin):
""" """
# create sub-directory for ad(s) to download (if necessary): # create sub-directory for ad(s) to download (if necessary):
relative_directory = "downloaded-ads" relative_directory = Path("downloaded-ads")
# make sure configured base directory exists # make sure configured base directory exists (using exist_ok=True to avoid TOCTOU race)
if not os.path.exists(relative_directory) or not os.path.isdir(relative_directory): await asyncio.get_running_loop().run_in_executor(None, lambda: relative_directory.mkdir(exist_ok = True)) # noqa: ASYNC240
os.mkdir(relative_directory) LOG.info("Ensured ads directory exists at ./%s.", relative_directory)
LOG.info("Created ads directory at ./%s.", relative_directory)
# Extract ad info and determine final directory path # Extract ad info and determine final directory path
ad_cfg, final_dir = await self._extract_ad_page_info_with_directory_handling( ad_cfg, final_dir = await self._extract_ad_page_info_with_directory_handling(
relative_directory, ad_id relative_directory, ad_id
) )
# Save the ad configuration file # Save the ad configuration file (offload to executor to avoid blocking the event loop)
ad_file_path = final_dir + "/" + f"ad_{ad_id}.yaml" ad_file_path = str(Path(final_dir) / f"ad_{ad_id}.yaml")
dicts.save_dict( header_string = "# yaml-language-server: $schema=https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot/refs/heads/main/schemas/ad.schema.json"
ad_file_path, await asyncio.get_running_loop().run_in_executor(
ad_cfg.model_dump(), None,
header = "# yaml-language-server: $schema=https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot/refs/heads/main/schemas/ad.schema.json") lambda: dicts.save_dict(ad_file_path, ad_cfg.model_dump(), header = header_string)
)
@staticmethod
def _download_and_save_image_sync(url:str, directory:str, filename_prefix:str, img_nr:int) -> str | None:
try:
with urllib_request.urlopen(url) as response: # noqa: S310 Audit URL open for permitted schemes.
content_type = response.info().get_content_type()
file_ending = mimetypes.guess_extension(content_type) or ""
# Use pathlib.Path for OS-agnostic path handling
img_path = Path(directory) / f"{filename_prefix}{img_nr}{file_ending}"
with open(img_path, "wb") as f:
shutil.copyfileobj(response, f)
return str(img_path)
except (urllib_error.URLError, urllib_error.HTTPError, OSError, shutil.Error) as e:
# Narrow exception handling to expected network/filesystem errors
LOG.warning("Failed to download image %s: %s", url, e)
return None
async def _download_images_from_ad_page(self, directory:str, ad_id:int) -> list[str]: async def _download_images_from_ad_page(self, directory:str, ad_id:int) -> list[str]:
""" """
@@ -85,19 +104,26 @@ class AdExtractor(WebScrapingMixin):
img_nr = 1 img_nr = 1
dl_counter = 0 dl_counter = 0
loop = asyncio.get_running_loop()
for img_element in images: for img_element in images:
current_img_url = img_element.attrs["src"] # URL of the image current_img_url = img_element.attrs["src"] # URL of the image
if current_img_url is None: if current_img_url is None:
continue continue
with urllib_request.urlopen(str(current_img_url)) as response: # noqa: S310 Audit URL open for permitted schemes. img_path = await loop.run_in_executor(
content_type = response.info().get_content_type() None,
file_ending = mimetypes.guess_extension(content_type) self._download_and_save_image_sync,
img_path = f"{directory}/{img_fn_prefix}{img_nr}{file_ending}" str(current_img_url),
with open(img_path, "wb") as f: directory,
shutil.copyfileobj(response, f) img_fn_prefix,
img_nr
)
if img_path:
dl_counter += 1 dl_counter += 1
img_paths.append(img_path.rsplit("/", maxsplit = 1)[-1]) # Use pathlib.Path for OS-agnostic path handling
img_paths.append(Path(img_path).name)
img_nr += 1 img_nr += 1
LOG.info("Downloaded %s.", i18n.pluralize("image", dl_counter)) LOG.info("Downloaded %s.", i18n.pluralize("image", dl_counter))
@@ -354,8 +380,8 @@ class AdExtractor(WebScrapingMixin):
return ad_cfg return ad_cfg
async def _extract_ad_page_info_with_directory_handling( async def _extract_ad_page_info_with_directory_handling(
self, relative_directory:str, ad_id:int self, relative_directory:Path, ad_id:int
) -> tuple[AdPartial, str]: ) -> tuple[AdPartial, Path]:
""" """
Extracts ad information and handles directory creation/renaming. Extracts ad information and handles directory creation/renaming.
@@ -373,32 +399,37 @@ class AdExtractor(WebScrapingMixin):
# Determine the final directory path # Determine the final directory path
sanitized_title = misc.sanitize_folder_name(title, self.config.download.folder_name_max_length) sanitized_title = misc.sanitize_folder_name(title, self.config.download.folder_name_max_length)
final_dir = os.path.join(relative_directory, f"ad_{ad_id}_{sanitized_title}") final_dir = relative_directory / f"ad_{ad_id}_{sanitized_title}"
temp_dir = os.path.join(relative_directory, f"ad_{ad_id}") temp_dir = relative_directory / f"ad_{ad_id}"
loop = asyncio.get_running_loop()
# Handle existing directories # Handle existing directories
if os.path.exists(final_dir): if await files.exists(final_dir):
# If the folder with title already exists, delete it # If the folder with title already exists, delete it
LOG.info("Deleting current folder of ad %s...", ad_id) LOG.info("Deleting current folder of ad %s...", ad_id)
shutil.rmtree(final_dir) LOG.debug("Removing directory tree: %s", final_dir)
await loop.run_in_executor(None, shutil.rmtree, str(final_dir))
if os.path.exists(temp_dir): if await files.exists(temp_dir):
if self.config.download.rename_existing_folders: if self.config.download.rename_existing_folders:
# Rename the old folder to the new name with title # Rename the old folder to the new name with title
LOG.info("Renaming folder from %s to %s for ad %s...", LOG.info("Renaming folder from %s to %s for ad %s...",
os.path.basename(temp_dir), os.path.basename(final_dir), ad_id) temp_dir.name, final_dir.name, ad_id)
os.rename(temp_dir, final_dir) LOG.debug("Renaming: %s -> %s", temp_dir, final_dir)
await loop.run_in_executor(None, temp_dir.rename, final_dir)
else: else:
# Use the existing folder without renaming # Use the existing folder without renaming
final_dir = temp_dir final_dir = temp_dir
LOG.info("Using existing folder for ad %s at %s.", ad_id, final_dir) LOG.info("Using existing folder for ad %s at %s.", ad_id, final_dir)
else: else:
# Create new directory with title # Create new directory with title
os.mkdir(final_dir) LOG.debug("Creating new directory: %s", final_dir)
await loop.run_in_executor(None, final_dir.mkdir)
LOG.info("New directory for ad created at %s.", final_dir) LOG.info("New directory for ad created at %s.", final_dir)
# Now extract complete ad info (including images) to the final directory # Now extract complete ad info (including images) to the final directory
ad_cfg = await self._extract_ad_page_info(final_dir, ad_id) ad_cfg = await self._extract_ad_page_info(str(final_dir), ad_id)
return ad_cfg, final_dir return ad_cfg, final_dir

View File

@@ -173,7 +173,10 @@ kleinanzeigen_bot/__init__.py:
kleinanzeigen_bot/extract.py: kleinanzeigen_bot/extract.py:
################################################# #################################################
download_ad: download_ad:
"Created ads directory at ./%s.": "Verzeichnis für Anzeigen erstellt unter ./%s." "Ensured ads directory exists at ./%s.": "Verzeichnis [%s] für Anzeige vorhanden."
_download_and_save_image_sync:
"Failed to download image %s: %s": "Fehler beim Herunterladen des Bildes %s: %s"
_download_images_from_ad_page: _download_images_from_ad_page:
"Found %s.": "%s gefunden." "Found %s.": "%s gefunden."

View File

@@ -1,7 +1,8 @@
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors # SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ # SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
import os import asyncio, os # isort: skip
from pathlib import Path
def abspath(relative_path:str, relative_to:str | None = None) -> str: def abspath(relative_path:str, relative_to:str | None = None) -> str:
@@ -24,3 +25,23 @@ def abspath(relative_path:str, relative_to:str | None = None) -> str:
base = os.path.dirname(base) base = os.path.dirname(base)
return os.path.normpath(os.path.join(base, relative_path)) return os.path.normpath(os.path.join(base, relative_path))
async def exists(path:str | Path) -> bool:
"""
Asynchronously check if a file or directory exists.
:param path: Path to check
:return: True if path exists, False otherwise
"""
return await asyncio.get_running_loop().run_in_executor(None, Path(path).exists)
async def is_dir(path:str | Path) -> bool:
"""
Asynchronously check if a path is a directory.
:param path: Path to check
:return: True if path is a directory, False otherwise
"""
return await asyncio.get_running_loop().run_in_executor(None, Path(path).is_dir)

View File

@@ -22,7 +22,7 @@ from nodriver.core.tab import Tab as Page
from kleinanzeigen_bot.model.config_model import Config as BotConfig from kleinanzeigen_bot.model.config_model import Config as BotConfig
from kleinanzeigen_bot.model.config_model import TimeoutConfig from kleinanzeigen_bot.model.config_model import TimeoutConfig
from . import loggers, net from . import files, loggers, net
from .chrome_version_detector import ( from .chrome_version_detector import (
ChromeVersionInfo, ChromeVersionInfo,
detect_chrome_version_from_binary, detect_chrome_version_from_binary,
@@ -100,6 +100,37 @@ class BrowserConfig:
self.profile_name:str | None = None self.profile_name:str | None = None
def _write_initial_prefs(prefs_file:str) -> None:
with open(prefs_file, "w", encoding = "UTF-8") as fd:
json.dump({
"credentials_enable_service": False,
"enable_do_not_track": True,
"google": {
"services": {
"consented_to_sync": False
}
},
"profile": {
"default_content_setting_values": {
"popups": 0,
"notifications": 2 # 1 = allow, 2 = block browser notifications
},
"password_manager_enabled": False
},
"signin": {
"allowed": False
},
"translate_site_blacklist": [
"www.kleinanzeigen.de"
],
"devtools": {
"preferences": {
"currentDockState": '"bottom"'
}
}
}, fd)
class WebScrapingMixin: class WebScrapingMixin:
def __init__(self) -> None: def __init__(self) -> None:
@@ -174,7 +205,7 @@ class WebScrapingMixin:
LOG.info("Creating Browser session...") LOG.info("Creating Browser session...")
if self.browser_config.binary_location: if self.browser_config.binary_location:
ensure(os.path.exists(self.browser_config.binary_location), f"Specified browser binary [{self.browser_config.binary_location}] does not exist.") ensure(await files.exists(self.browser_config.binary_location), f"Specified browser binary [{self.browser_config.binary_location}] does not exist.")
else: else:
self.browser_config.binary_location = self.get_compatible_browser() self.browser_config.binary_location = self.get_compatible_browser()
LOG.info(" -> Browser binary location: %s", self.browser_config.binary_location) LOG.info(" -> Browser binary location: %s", self.browser_config.binary_location)
@@ -289,41 +320,14 @@ class WebScrapingMixin:
profile_dir = os.path.join(cfg.user_data_dir, self.browser_config.profile_name or "Default") profile_dir = os.path.join(cfg.user_data_dir, self.browser_config.profile_name or "Default")
os.makedirs(profile_dir, exist_ok = True) os.makedirs(profile_dir, exist_ok = True)
prefs_file = os.path.join(profile_dir, "Preferences") prefs_file = os.path.join(profile_dir, "Preferences")
if not os.path.exists(prefs_file): if not await files.exists(prefs_file):
LOG.info(" -> Setting chrome prefs [%s]...", prefs_file) LOG.info(" -> Setting chrome prefs [%s]...", prefs_file)
with open(prefs_file, "w", encoding = "UTF-8") as fd: await asyncio.get_running_loop().run_in_executor(None, _write_initial_prefs, prefs_file)
json.dump({
"credentials_enable_service": False,
"enable_do_not_track": True,
"google": {
"services": {
"consented_to_sync": False
}
},
"profile": {
"default_content_setting_values": {
"popups": 0,
"notifications": 2 # 1 = allow, 2 = block browser notifications
},
"password_manager_enabled": False
},
"signin": {
"allowed": False
},
"translate_site_blacklist": [
"www.kleinanzeigen.de"
],
"devtools": {
"preferences": {
"currentDockState": '"bottom"'
}
}
}, fd)
# load extensions # load extensions
for crx_extension in self.browser_config.extensions: for crx_extension in self.browser_config.extensions:
LOG.info(" -> Adding Browser extension: [%s]", crx_extension) LOG.info(" -> Adding Browser extension: [%s]", crx_extension)
ensure(os.path.exists(crx_extension), f"Configured extension-file [{crx_extension}] does not exist.") ensure(await files.exists(crx_extension), f"Configured extension-file [{crx_extension}] does not exist.")
cfg.add_extension(crx_extension) cfg.add_extension(crx_extension)
try: try:

View File

@@ -1,10 +1,12 @@
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors # SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ # SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
import json, os # isort: skip import json # isort: skip
from gettext import gettext as _ from gettext import gettext as _
from pathlib import Path
from typing import Any, TypedDict from typing import Any, TypedDict
from unittest.mock import AsyncMock, MagicMock, call, patch from unittest.mock import AsyncMock, MagicMock, call, patch
from urllib.error import URLError
import pytest import pytest
@@ -66,6 +68,124 @@ class TestAdExtractorBasics:
"""Test extraction of ad ID from different URL formats.""" """Test extraction of ad ID from different URL formats."""
assert test_extractor.extract_ad_id_from_ad_url(url) == expected_id assert test_extractor.extract_ad_id_from_ad_url(url) == expected_id
@pytest.mark.asyncio
async def test_path_exists_helper(self, tmp_path:Path) -> None:
"""Test files.exists helper function."""
from kleinanzeigen_bot.utils import files # noqa: PLC0415
# Test with existing path
existing_file = tmp_path / "test.txt"
existing_file.write_text("test")
assert await files.exists(existing_file) is True
assert await files.exists(str(existing_file)) is True
# Test with non-existing path
non_existing = tmp_path / "nonexistent.txt"
assert await files.exists(non_existing) is False
assert await files.exists(str(non_existing)) is False
@pytest.mark.asyncio
async def test_path_is_dir_helper(self, tmp_path:Path) -> None:
"""Test files.is_dir helper function."""
from kleinanzeigen_bot.utils import files # noqa: PLC0415
# Test with directory
test_dir = tmp_path / "testdir"
test_dir.mkdir()
assert await files.is_dir(test_dir) is True
assert await files.is_dir(str(test_dir)) is True
# Test with file
test_file = tmp_path / "test.txt"
test_file.write_text("test")
assert await files.is_dir(test_file) is False
assert await files.is_dir(str(test_file)) is False
# Test with non-existing path
non_existing = tmp_path / "nonexistent"
assert await files.is_dir(non_existing) is False
assert await files.is_dir(str(non_existing)) is False
@pytest.mark.asyncio
async def test_exists_async_helper(self, tmp_path:Path) -> None:
"""Test files.exists async helper function."""
from kleinanzeigen_bot.utils import files # noqa: PLC0415
# Test with existing path
existing_file = tmp_path / "test.txt"
existing_file.write_text("test")
assert await files.exists(existing_file) is True
assert await files.exists(str(existing_file)) is True
# Test with non-existing path
non_existing = tmp_path / "nonexistent.txt"
assert await files.exists(non_existing) is False
assert await files.exists(str(non_existing)) is False
@pytest.mark.asyncio
async def test_isdir_async_helper(self, tmp_path:Path) -> None:
"""Test files.is_dir async helper function."""
from kleinanzeigen_bot.utils import files # noqa: PLC0415
# Test with directory
test_dir = tmp_path / "testdir"
test_dir.mkdir()
assert await files.is_dir(test_dir) is True
assert await files.is_dir(str(test_dir)) is True
# Test with file
test_file = tmp_path / "test.txt"
test_file.write_text("test")
assert await files.is_dir(test_file) is False
assert await files.is_dir(str(test_file)) is False
# Test with non-existing path
non_existing = tmp_path / "nonexistent"
assert await files.is_dir(non_existing) is False
assert await files.is_dir(str(non_existing)) is False
def test_download_and_save_image_sync_success(self, tmp_path:Path) -> None:
"""Test _download_and_save_image_sync with successful download."""
from unittest.mock import MagicMock, mock_open # noqa: PLC0415
test_dir = tmp_path / "images"
test_dir.mkdir()
# Mock urllib response
mock_response = MagicMock()
mock_response.info().get_content_type.return_value = "image/jpeg"
mock_response.__enter__ = MagicMock(return_value = mock_response)
mock_response.__exit__ = MagicMock(return_value = False)
with patch("kleinanzeigen_bot.extract.urllib_request.urlopen", return_value = mock_response), \
patch("kleinanzeigen_bot.extract.open", mock_open()), \
patch("kleinanzeigen_bot.extract.shutil.copyfileobj"):
result = AdExtractor._download_and_save_image_sync(
"http://example.com/image.jpg",
str(test_dir),
"test_",
1
)
assert result is not None
assert result.endswith((".jpe", ".jpeg", ".jpg"))
assert "test_1" in result
def test_download_and_save_image_sync_failure(self, tmp_path:Path) -> None:
"""Test _download_and_save_image_sync with download failure."""
with patch("kleinanzeigen_bot.extract.urllib_request.urlopen", side_effect = URLError("Network error")):
result = AdExtractor._download_and_save_image_sync(
"http://example.com/image.jpg",
str(tmp_path),
"test_",
1
)
assert result is None
class TestAdExtractorPricing: class TestAdExtractorPricing:
"""Tests for pricing related functionality.""" """Tests for pricing related functionality."""
@@ -865,84 +985,17 @@ class TestAdExtractorDownload:
}) })
return AdExtractor(browser_mock, config) return AdExtractor(browser_mock, config)
@pytest.mark.asyncio
async def test_download_ad_existing_directory(self, extractor:AdExtractor) -> None:
"""Test downloading an ad when the directory already exists."""
with patch("os.path.exists") as mock_exists, \
patch("os.path.isdir") as mock_isdir, \
patch("os.makedirs") as mock_makedirs, \
patch("os.mkdir") as mock_mkdir, \
patch("os.rename") as mock_rename, \
patch("shutil.rmtree") as mock_rmtree, \
patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict, \
patch.object(extractor, "_extract_ad_page_info_with_directory_handling", new_callable = AsyncMock) as mock_extract_with_dir:
base_dir = "downloaded-ads"
final_dir = os.path.join(base_dir, "ad_12345_Test Advertisement Title")
yaml_path = os.path.join(final_dir, "ad_12345.yaml")
# Configure mocks for directory checks
existing_paths = {base_dir, final_dir} # Final directory with title exists
mock_exists.side_effect = lambda path: path in existing_paths
mock_isdir.side_effect = lambda path: path == base_dir
# Mock the new method that handles directory creation and extraction
mock_extract_with_dir.return_value = (
AdPartial.model_validate({
"title": "Test Advertisement Title",
"description": "Test Description",
"category": "Dienstleistungen",
"price": 100,
"images": [],
"contact": {
"name": "Test User",
"street": "Test Street 123",
"zipcode": "12345",
"location": "Test City"
}
}),
final_dir
)
await extractor.download_ad(12345)
# Verify the correct functions were called
mock_extract_with_dir.assert_called_once()
# Directory handling is now done inside _extract_ad_page_info_with_directory_handling
# so we don't expect rmtree/mkdir to be called directly in download_ad
mock_rmtree.assert_not_called() # Directory handling is done internally
mock_mkdir.assert_not_called() # Directory handling is done internally
mock_makedirs.assert_not_called() # Directory already exists
mock_rename.assert_not_called() # No renaming needed
# Get the actual call arguments
actual_call = mock_save_dict.call_args
assert actual_call is not None
actual_path = actual_call[0][0].replace("/", os.path.sep)
assert actual_path == yaml_path
assert actual_call[0][1] == mock_extract_with_dir.return_value[0].model_dump()
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_download_ad(self, extractor:AdExtractor) -> None: async def test_download_ad(self, extractor:AdExtractor) -> None:
"""Test downloading an entire ad.""" """Test downloading an ad - directory creation and saving ad data."""
with patch("os.path.exists") as mock_exists, \ with patch("pathlib.Path.mkdir"), \
patch("os.path.isdir") as mock_isdir, \
patch("os.makedirs") as mock_makedirs, \
patch("os.mkdir") as mock_mkdir, \
patch("os.rename") as mock_rename, \
patch("shutil.rmtree") as mock_rmtree, \
patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict, \ patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict, \
patch.object(extractor, "_extract_ad_page_info_with_directory_handling", new_callable = AsyncMock) as mock_extract_with_dir: patch.object(extractor, "_extract_ad_page_info_with_directory_handling", new_callable = AsyncMock) as mock_extract_with_dir:
base_dir = "downloaded-ads" # Use Path for OS-agnostic path handling
final_dir = os.path.join(base_dir, "ad_12345_Test Advertisement Title") final_dir = Path("downloaded-ads") / "ad_12345_Test Advertisement Title"
yaml_path = os.path.join(final_dir, "ad_12345.yaml") yaml_path = final_dir / "ad_12345.yaml"
# Configure mocks for directory checks
mock_exists.return_value = False
mock_isdir.return_value = False
# Mock the new method that handles directory creation and extraction
mock_extract_with_dir.return_value = ( mock_extract_with_dir.return_value = (
AdPartial.model_validate({ AdPartial.model_validate({
"title": "Test Advertisement Title", "title": "Test Advertisement Title",
@@ -957,140 +1010,18 @@ class TestAdExtractorDownload:
"location": "Test City" "location": "Test City"
} }
}), }),
final_dir str(final_dir)
) )
await extractor.download_ad(12345) await extractor.download_ad(12345)
# Verify the correct functions were called # Verify observable behavior: extraction and save were called
mock_extract_with_dir.assert_called_once() mock_extract_with_dir.assert_called_once()
# Directory handling is now done inside _extract_ad_page_info_with_directory_handling mock_save_dict.assert_called_once()
mock_rmtree.assert_not_called() # Directory handling is done internally
mock_mkdir.assert_has_calls([call(base_dir)]) # Only base directory creation
mock_makedirs.assert_not_called() # Using mkdir instead
mock_rename.assert_not_called() # No renaming needed
# Get the actual call arguments # Verify saved to correct location with correct data
actual_call = mock_save_dict.call_args actual_call = mock_save_dict.call_args
assert actual_call is not None actual_path = Path(actual_call[0][0])
actual_path = actual_call[0][0].replace("/", os.path.sep)
assert actual_path == yaml_path
assert actual_call[0][1] == mock_extract_with_dir.return_value[0].model_dump()
@pytest.mark.asyncio
async def test_download_ad_use_existing_folder(self, extractor:AdExtractor) -> None:
"""Test downloading an ad when an old folder without title exists (default behavior)."""
with patch("os.path.exists") as mock_exists, \
patch("os.path.isdir") as mock_isdir, \
patch("os.makedirs") as mock_makedirs, \
patch("os.mkdir") as mock_mkdir, \
patch("os.rename") as mock_rename, \
patch("shutil.rmtree") as mock_rmtree, \
patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict, \
patch.object(extractor, "_extract_ad_page_info_with_directory_handling", new_callable = AsyncMock) as mock_extract_with_dir:
base_dir = "downloaded-ads"
temp_dir = os.path.join(base_dir, "ad_12345")
yaml_path = os.path.join(temp_dir, "ad_12345.yaml")
# Configure mocks for directory checks
# Base directory exists, temp directory exists
existing_paths = {base_dir, temp_dir}
mock_exists.side_effect = lambda path: path in existing_paths
mock_isdir.side_effect = lambda path: path == base_dir
# Mock the new method that handles directory creation and extraction
mock_extract_with_dir.return_value = (
AdPartial.model_validate({
"title": "Test Advertisement Title",
"description": "Test Description",
"category": "Dienstleistungen",
"price": 100,
"images": [],
"contact": {
"name": "Test User",
"street": "Test Street 123",
"zipcode": "12345",
"location": "Test City"
}
}),
temp_dir # Use existing temp directory
)
await extractor.download_ad(12345)
# Verify the correct functions were called
mock_extract_with_dir.assert_called_once()
mock_rmtree.assert_not_called() # No directory to remove
mock_mkdir.assert_not_called() # Base directory already exists
mock_makedirs.assert_not_called() # Using mkdir instead
mock_rename.assert_not_called() # No renaming (default behavior)
# Get the actual call arguments
actual_call = mock_save_dict.call_args
assert actual_call is not None
actual_path = actual_call[0][0].replace("/", os.path.sep)
assert actual_path == yaml_path
assert actual_call[0][1] == mock_extract_with_dir.return_value[0].model_dump()
@pytest.mark.asyncio
async def test_download_ad_rename_existing_folder_when_enabled(self, extractor:AdExtractor) -> None:
"""Test downloading an ad when an old folder without title exists and renaming is enabled."""
# Enable renaming in config
extractor.config.download.rename_existing_folders = True
with patch("os.path.exists") as mock_exists, \
patch("os.path.isdir") as mock_isdir, \
patch("os.makedirs") as mock_makedirs, \
patch("os.mkdir") as mock_mkdir, \
patch("os.rename") as mock_rename, \
patch("shutil.rmtree") as mock_rmtree, \
patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict, \
patch.object(extractor, "_extract_ad_page_info_with_directory_handling", new_callable = AsyncMock) as mock_extract_with_dir:
base_dir = "downloaded-ads"
temp_dir = os.path.join(base_dir, "ad_12345")
final_dir = os.path.join(base_dir, "ad_12345_Test Advertisement Title")
yaml_path = os.path.join(final_dir, "ad_12345.yaml")
# Configure mocks for directory checks
# Base directory exists, temp directory exists, final directory doesn't exist
existing_paths = {base_dir, temp_dir}
mock_exists.side_effect = lambda path: path in existing_paths
mock_isdir.side_effect = lambda path: path == base_dir
# Mock the new method that handles directory creation and extraction
mock_extract_with_dir.return_value = (
AdPartial.model_validate({
"title": "Test Advertisement Title",
"description": "Test Description",
"category": "Dienstleistungen",
"price": 100,
"images": [],
"contact": {
"name": "Test User",
"street": "Test Street 123",
"zipcode": "12345",
"location": "Test City"
}
}),
final_dir
)
await extractor.download_ad(12345)
# Verify the correct functions were called
mock_extract_with_dir.assert_called_once() # Extract to final directory
# Directory handling (including renaming) is now done inside _extract_ad_page_info_with_directory_handling
mock_rmtree.assert_not_called() # Directory handling is done internally
mock_mkdir.assert_not_called() # Directory handling is done internally
mock_makedirs.assert_not_called() # Using mkdir instead
mock_rename.assert_not_called() # Directory handling is done internally
# Get the actual call arguments
actual_call = mock_save_dict.call_args
assert actual_call is not None
actual_path = actual_call[0][0].replace("/", os.path.sep)
assert actual_path == yaml_path assert actual_path == yaml_path
assert actual_call[0][1] == mock_extract_with_dir.return_value[0].model_dump() assert actual_call[0][1] == mock_extract_with_dir.return_value[0].model_dump()
@@ -1101,3 +1032,196 @@ class TestAdExtractorDownload:
with patch.object(extractor, "web_find", new_callable = AsyncMock, side_effect = TimeoutError): with patch.object(extractor, "web_find", new_callable = AsyncMock, side_effect = TimeoutError):
image_paths = await extractor._download_images_from_ad_page("/some/dir", 12345) image_paths = await extractor._download_images_from_ad_page("/some/dir", 12345)
assert len(image_paths) == 0 assert len(image_paths) == 0
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_download_images_with_none_url(self, extractor:AdExtractor) -> None:
"""Test image download when some images have None as src attribute."""
image_box_mock = MagicMock()
# Create image elements - one with valid src, one with None src
img_with_url = MagicMock()
img_with_url.attrs = {"src": "http://example.com/valid_image.jpg"}
img_without_url = MagicMock()
img_without_url.attrs = {"src": None}
with patch.object(extractor, "web_find", new_callable = AsyncMock, return_value = image_box_mock), \
patch.object(extractor, "web_find_all", new_callable = AsyncMock, return_value = [img_with_url, img_without_url]), \
patch.object(AdExtractor, "_download_and_save_image_sync", return_value = "/some/dir/ad_12345__img1.jpg"):
image_paths = await extractor._download_images_from_ad_page("/some/dir", 12345)
# Should only download the one valid image (skip the None)
assert len(image_paths) == 1
assert image_paths[0] == "ad_12345__img1.jpg"
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_ad_page_info_with_directory_handling_final_dir_exists(
self, extractor:AdExtractor, tmp_path:Path
) -> None:
"""Test directory handling when final_dir already exists - it should be deleted."""
base_dir = tmp_path / "downloaded-ads"
base_dir.mkdir()
# Create the final directory that should be deleted
final_dir = base_dir / "ad_12345_Test Title"
final_dir.mkdir()
old_file = final_dir / "old_file.txt"
old_file.write_text("old content")
# Mock the page
page_mock = MagicMock()
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
extractor.page = page_mock
with patch.object(extractor, "web_text", new_callable = AsyncMock, side_effect = [
"Test Title", # Title extraction
"Test Title", # Second title call for full extraction
"Description text", # Description
"03.02.2025" # Creation date
]), \
patch.object(extractor, "web_execute", new_callable = AsyncMock, return_value = {
"universalAnalyticsOpts": {
"dimensions": {
"dimension92": "",
"dimension108": ""
}
}
}), \
patch.object(extractor, "_extract_category_from_ad_page", new_callable = AsyncMock, return_value = "160"), \
patch.object(extractor, "_extract_special_attributes_from_ad_page", new_callable = AsyncMock, return_value = {}), \
patch.object(extractor, "_extract_pricing_info_from_ad_page", new_callable = AsyncMock, return_value = (None, "NOT_APPLICABLE")), \
patch.object(extractor, "_extract_shipping_info_from_ad_page", new_callable = AsyncMock, return_value = ("NOT_APPLICABLE", None, None)), \
patch.object(extractor, "_extract_sell_directly_from_ad_page", new_callable = AsyncMock, return_value = False), \
patch.object(extractor, "_download_images_from_ad_page", new_callable = AsyncMock, return_value = []), \
patch.object(extractor, "_extract_contact_from_ad_page", new_callable = AsyncMock, return_value = ContactPartial(
name = "Test", zipcode = "12345", location = "Berlin"
)):
ad_cfg, result_dir = await extractor._extract_ad_page_info_with_directory_handling(
base_dir, 12345
)
# Verify the old directory was deleted and recreated
assert result_dir == final_dir
assert result_dir.exists()
assert not old_file.exists() # Old file should be gone
assert ad_cfg.title == "Test Title"
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_ad_page_info_with_directory_handling_rename_enabled(
self, extractor:AdExtractor, tmp_path:Path
) -> None:
"""Test directory handling when temp_dir exists and rename_existing_folders is True."""
base_dir = tmp_path / "downloaded-ads"
base_dir.mkdir()
# Create the temp directory (without title)
temp_dir = base_dir / "ad_12345"
temp_dir.mkdir()
existing_file = temp_dir / "existing_image.jpg"
existing_file.write_text("existing image data")
# Enable rename_existing_folders in config
extractor.config.download.rename_existing_folders = True
# Mock the page
page_mock = MagicMock()
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
extractor.page = page_mock
with patch.object(extractor, "web_text", new_callable = AsyncMock, side_effect = [
"Test Title", # Title extraction
"Test Title", # Second title call for full extraction
"Description text", # Description
"03.02.2025" # Creation date
]), \
patch.object(extractor, "web_execute", new_callable = AsyncMock, return_value = {
"universalAnalyticsOpts": {
"dimensions": {
"dimension92": "",
"dimension108": ""
}
}
}), \
patch.object(extractor, "_extract_category_from_ad_page", new_callable = AsyncMock, return_value = "160"), \
patch.object(extractor, "_extract_special_attributes_from_ad_page", new_callable = AsyncMock, return_value = {}), \
patch.object(extractor, "_extract_pricing_info_from_ad_page", new_callable = AsyncMock, return_value = (None, "NOT_APPLICABLE")), \
patch.object(extractor, "_extract_shipping_info_from_ad_page", new_callable = AsyncMock, return_value = ("NOT_APPLICABLE", None, None)), \
patch.object(extractor, "_extract_sell_directly_from_ad_page", new_callable = AsyncMock, return_value = False), \
patch.object(extractor, "_download_images_from_ad_page", new_callable = AsyncMock, return_value = []), \
patch.object(extractor, "_extract_contact_from_ad_page", new_callable = AsyncMock, return_value = ContactPartial(
name = "Test", zipcode = "12345", location = "Berlin"
)):
ad_cfg, result_dir = await extractor._extract_ad_page_info_with_directory_handling(
base_dir, 12345
)
# Verify the directory was renamed from temp_dir to final_dir
final_dir = base_dir / "ad_12345_Test Title"
assert result_dir == final_dir
assert result_dir.exists()
assert not temp_dir.exists() # Old temp dir should be gone
assert (result_dir / "existing_image.jpg").exists() # File should be preserved
assert ad_cfg.title == "Test Title"
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_ad_page_info_with_directory_handling_use_existing(
self, extractor:AdExtractor, tmp_path:Path
) -> None:
"""Test directory handling when temp_dir exists and rename_existing_folders is False (default)."""
base_dir = tmp_path / "downloaded-ads"
base_dir.mkdir()
# Create the temp directory (without title)
temp_dir = base_dir / "ad_12345"
temp_dir.mkdir()
existing_file = temp_dir / "existing_image.jpg"
existing_file.write_text("existing image data")
# Ensure rename_existing_folders is False (default)
extractor.config.download.rename_existing_folders = False
# Mock the page
page_mock = MagicMock()
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
extractor.page = page_mock
with patch.object(extractor, "web_text", new_callable = AsyncMock, side_effect = [
"Test Title", # Title extraction
"Test Title", # Second title call for full extraction
"Description text", # Description
"03.02.2025" # Creation date
]), \
patch.object(extractor, "web_execute", new_callable = AsyncMock, return_value = {
"universalAnalyticsOpts": {
"dimensions": {
"dimension92": "",
"dimension108": ""
}
}
}), \
patch.object(extractor, "_extract_category_from_ad_page", new_callable = AsyncMock, return_value = "160"), \
patch.object(extractor, "_extract_special_attributes_from_ad_page", new_callable = AsyncMock, return_value = {}), \
patch.object(extractor, "_extract_pricing_info_from_ad_page", new_callable = AsyncMock, return_value = (None, "NOT_APPLICABLE")), \
patch.object(extractor, "_extract_shipping_info_from_ad_page", new_callable = AsyncMock, return_value = ("NOT_APPLICABLE", None, None)), \
patch.object(extractor, "_extract_sell_directly_from_ad_page", new_callable = AsyncMock, return_value = False), \
patch.object(extractor, "_download_images_from_ad_page", new_callable = AsyncMock, return_value = []), \
patch.object(extractor, "_extract_contact_from_ad_page", new_callable = AsyncMock, return_value = ContactPartial(
name = "Test", zipcode = "12345", location = "Berlin"
)):
ad_cfg, result_dir = await extractor._extract_ad_page_info_with_directory_handling(
base_dir, 12345
)
# Verify the existing temp_dir was used (not renamed)
assert result_dir == temp_dir
assert result_dir.exists()
assert (result_dir / "existing_image.jpg").exists() # File should be preserved
assert ad_cfg.title == "Test Title"

View File

@@ -641,12 +641,11 @@ class TestKleinanzeigenBotCommands:
async def test_verify_command(self, test_bot:KleinanzeigenBot, tmp_path:Any) -> None: async def test_verify_command(self, test_bot:KleinanzeigenBot, tmp_path:Any) -> None:
"""Test verify command with minimal config.""" """Test verify command with minimal config."""
config_path = Path(tmp_path) / "config.yaml" config_path = Path(tmp_path) / "config.yaml"
with open(config_path, "w", encoding = "utf-8") as f: config_path.write_text("""
f.write("""
login: login:
username: test username: test
password: test password: test
""") """, encoding = "utf-8")
test_bot.config_file_path = str(config_path) test_bot.config_file_path = str(config_path)
await test_bot.run(["script.py", "verify"]) await test_bot.run(["script.py", "verify"])
assert test_bot.config.login.username == "test" assert test_bot.config.login.username == "test"

View File

@@ -25,7 +25,7 @@ from nodriver.core.element import Element
from nodriver.core.tab import Tab as Page from nodriver.core.tab import Tab as Page
from kleinanzeigen_bot.model.config_model import Config from kleinanzeigen_bot.model.config_model import Config
from kleinanzeigen_bot.utils import loggers from kleinanzeigen_bot.utils import files, loggers
from kleinanzeigen_bot.utils.web_scraping_mixin import By, Is, WebScrapingMixin, _is_admin # noqa: PLC2701 from kleinanzeigen_bot.utils.web_scraping_mixin import By, Is, WebScrapingMixin, _is_admin # noqa: PLC2701
@@ -95,6 +95,30 @@ def web_scraper(mock_browser:AsyncMock, mock_page:TrulyAwaitableMockPage) -> Web
return scraper return scraper
def test_write_initial_prefs(tmp_path:Path) -> None:
"""Test _write_initial_prefs helper function."""
from kleinanzeigen_bot.utils.web_scraping_mixin import _write_initial_prefs # noqa: PLC0415, PLC2701
prefs_file = tmp_path / "Preferences"
_write_initial_prefs(str(prefs_file))
# Verify file was created
assert prefs_file.exists()
# Verify content is valid JSON with expected structure
with open(prefs_file, encoding = "UTF-8") as f:
prefs = json.load(f)
assert prefs["credentials_enable_service"] is False
assert prefs["enable_do_not_track"] is True
assert prefs["google"]["services"]["consented_to_sync"] is False
assert prefs["profile"]["password_manager_enabled"] is False
assert prefs["profile"]["default_content_setting_values"]["notifications"] == 2
assert prefs["signin"]["allowed"] is False
assert "www.kleinanzeigen.de" in prefs["translate_site_blacklist"]
assert prefs["devtools"]["preferences"]["currentDockState"] == '"bottom"'
class TestWebScrapingErrorHandling: class TestWebScrapingErrorHandling:
"""Test error handling scenarios in WebScrapingMixin.""" """Test error handling scenarios in WebScrapingMixin."""
@@ -728,7 +752,7 @@ class TestWebScrapingBrowserConfiguration:
chrome_path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" chrome_path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
real_exists = os.path.exists real_exists = os.path.exists
def mock_exists(path:str) -> bool: def mock_exists_sync(path:str) -> bool:
# Handle all browser paths # Handle all browser paths
if path in { if path in {
# Linux paths # Linux paths
@@ -754,7 +778,12 @@ class TestWebScrapingBrowserConfiguration:
if "Preferences" in str(path) and str(tmp_path) in str(path): if "Preferences" in str(path) and str(tmp_path) in str(path):
return real_exists(path) return real_exists(path)
return False return False
monkeypatch.setattr(os.path, "exists", mock_exists)
async def mock_exists_async(path:str | Path) -> bool:
return mock_exists_sync(str(path))
monkeypatch.setattr(os.path, "exists", mock_exists_sync)
monkeypatch.setattr(files, "exists", mock_exists_async)
# Create test profile directory # Create test profile directory
profile_dir = tmp_path / "Default" profile_dir = tmp_path / "Default"
@@ -762,8 +791,7 @@ class TestWebScrapingBrowserConfiguration:
prefs_file = profile_dir / "Preferences" prefs_file = profile_dir / "Preferences"
# Test with existing preferences file # Test with existing preferences file
with open(prefs_file, "w", encoding = "UTF-8") as f: prefs_file.write_text(json.dumps({"existing": "prefs"}), encoding = "UTF-8")
json.dump({"existing": "prefs"}, f)
scraper = WebScrapingMixin() scraper = WebScrapingMixin()
scraper.browser_config.user_data_dir = str(tmp_path) scraper.browser_config.user_data_dir = str(tmp_path)
@@ -771,22 +799,20 @@ class TestWebScrapingBrowserConfiguration:
await scraper.create_browser_session() await scraper.create_browser_session()
# Verify preferences file was not overwritten # Verify preferences file was not overwritten
with open(prefs_file, "r", encoding = "UTF-8") as f: prefs = json.loads(prefs_file.read_text(encoding = "UTF-8"))
prefs = json.load(f) assert prefs["existing"] == "prefs"
assert prefs["existing"] == "prefs"
# Test with missing preferences file # Test with missing preferences file
prefs_file.unlink() prefs_file.unlink()
await scraper.create_browser_session() await scraper.create_browser_session()
# Verify new preferences file was created with correct settings # Verify new preferences file was created with correct settings
with open(prefs_file, "r", encoding = "UTF-8") as f: prefs = json.loads(prefs_file.read_text(encoding = "UTF-8"))
prefs = json.load(f) assert prefs["credentials_enable_service"] is False
assert prefs["credentials_enable_service"] is False assert prefs["enable_do_not_track"] is True
assert prefs["enable_do_not_track"] is True assert prefs["profile"]["password_manager_enabled"] is False
assert prefs["profile"]["password_manager_enabled"] is False assert prefs["signin"]["allowed"] is False
assert prefs["signin"]["allowed"] is False assert "www.kleinanzeigen.de" in prefs["translate_site_blacklist"]
assert "www.kleinanzeigen.de" in prefs["translate_site_blacklist"]
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_browser_arguments_configuration(self, tmp_path:Path, monkeypatch:pytest.MonkeyPatch) -> None: async def test_browser_arguments_configuration(self, tmp_path:Path, monkeypatch:pytest.MonkeyPatch) -> None:
@@ -815,6 +841,10 @@ class TestWebScrapingBrowserConfiguration:
# Mock os.path.exists to return True for both Chrome and Edge paths # Mock os.path.exists to return True for both Chrome and Edge paths
monkeypatch.setattr(os.path, "exists", lambda p: p in {"/usr/bin/chrome", "/usr/bin/edge"}) monkeypatch.setattr(os.path, "exists", lambda p: p in {"/usr/bin/chrome", "/usr/bin/edge"})
async def mock_exists_async(path:str | Path) -> bool:
return str(path) in {"/usr/bin/chrome", "/usr/bin/edge"}
monkeypatch.setattr(files, "exists", mock_exists_async)
# Test with custom arguments # Test with custom arguments
scraper = WebScrapingMixin() scraper = WebScrapingMixin()
scraper.browser_config.arguments = ["--custom-arg=value", "--another-arg"] scraper.browser_config.arguments = ["--custom-arg=value", "--another-arg"]
@@ -875,27 +905,41 @@ class TestWebScrapingBrowserConfiguration:
# Mock Config class # Mock Config class
monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue,attr-defined] monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue,attr-defined]
# Mock os.path.exists to return True for browser binaries and extension files, real_exists for others # Mock files.exists and files.is_dir to return appropriate values
real_exists = os.path.exists async def mock_exists(path:str | Path) -> bool:
monkeypatch.setattr( path_str = str(path)
os.path, # Resolve real paths to handle symlinks (e.g., /var -> /private/var on macOS)
"exists", real_path = str(Path(path_str).resolve()) # noqa: ASYNC240 Test mock, runs synchronously
lambda p: p in {"/usr/bin/chrome", "/usr/bin/edge", str(ext1), str(ext2)} or real_exists(p), real_ext1 = str(Path(ext1).resolve()) # noqa: ASYNC240 Test mock, runs synchronously
) real_ext2 = str(Path(ext2).resolve()) # noqa: ASYNC240 Test mock, runs synchronously
return path_str in {"/usr/bin/chrome", "/usr/bin/edge"} or real_path in {real_ext1, real_ext2} or os.path.exists(path_str) # noqa: ASYNC240
async def mock_is_dir(path:str | Path) -> bool:
path_str = str(path)
# Resolve real paths to handle symlinks
real_path = str(Path(path_str).resolve()) # noqa: ASYNC240 Test mock, runs synchronously
real_ext1 = str(Path(ext1).resolve()) # noqa: ASYNC240 Test mock, runs synchronously
real_ext2 = str(Path(ext2).resolve()) # noqa: ASYNC240 Test mock, runs synchronously
# Nodriver extracts CRX files to temp directories, so they appear as directories
if real_path in {real_ext1, real_ext2}:
return True
return Path(path_str).is_dir() # noqa: ASYNC240 Test mock, runs synchronously
monkeypatch.setattr(files, "exists", mock_exists)
monkeypatch.setattr(files, "is_dir", mock_is_dir)
# Test extension loading # Test extension loading
scraper = WebScrapingMixin() scraper = WebScrapingMixin()
scraper.browser_config.extensions = [str(ext1), str(ext2)] scraper.browser_config.extensions = [str(ext1), str(ext2)]
scraper.browser_config.binary_location = "/usr/bin/chrome" scraper.browser_config.binary_location = "/usr/bin/chrome"
# Removed monkeypatch for os.path.exists so extension files are detected
await scraper.create_browser_session() await scraper.create_browser_session()
# Verify extensions were loaded # Verify extensions were loaded
config = _nodriver_start_mock().call_args[0][0] config = _nodriver_start_mock().call_args[0][0]
assert len(config._extensions) == 2 assert len(config._extensions) == 2
for ext_path in config._extensions: for ext_path in config._extensions:
assert os.path.exists(ext_path) assert await files.exists(ext_path)
assert os.path.isdir(ext_path) assert await files.is_dir(ext_path)
# Test with non-existent extension # Test with non-existent extension
scraper.browser_config.extensions = ["non_existent.crx"] scraper.browser_config.extensions = ["non_existent.crx"]
@@ -976,8 +1020,7 @@ class TestWebScrapingBrowserConfiguration:
scraper.browser_config.user_data_dir = str(tmp_path) scraper.browser_config.user_data_dir = str(tmp_path)
scraper.browser_config.profile_name = "Default" scraper.browser_config.profile_name = "Default"
await scraper.create_browser_session() await scraper.create_browser_session()
with open(state_file, "w", encoding = "utf-8") as f: state_file.write_text('{"foo": "bar"}', encoding = "utf-8")
f.write('{"foo": "bar"}')
scraper.browser._process_pid = 12345 scraper.browser._process_pid = 12345
scraper.browser.stop = MagicMock() scraper.browser.stop = MagicMock()
with patch("psutil.Process") as mock_proc: with patch("psutil.Process") as mock_proc:
@@ -989,8 +1032,7 @@ class TestWebScrapingBrowserConfiguration:
scraper2.browser_config.user_data_dir = str(tmp_path) scraper2.browser_config.user_data_dir = str(tmp_path)
scraper2.browser_config.profile_name = "Default" scraper2.browser_config.profile_name = "Default"
await scraper2.create_browser_session() await scraper2.create_browser_session()
with open(state_file, "r", encoding = "utf-8") as f: data = state_file.read_text(encoding = "utf-8")
data = f.read()
assert data == '{"foo": "bar"}' assert data == '{"foo": "bar"}'
scraper2.browser._process_pid = 12346 scraper2.browser._process_pid = 12346
scraper2.browser.stop = MagicMock() scraper2.browser.stop = MagicMock()
@@ -1814,6 +1856,7 @@ class TestWebScrapingMixinPortRetry:
) -> None: ) -> None:
"""Test error handling when browser connection fails.""" """Test error handling when browser connection fails."""
with patch("os.path.exists", return_value = True), \ with patch("os.path.exists", return_value = True), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin.files.exists", AsyncMock(return_value = True)), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin.net.is_port_open", return_value = True), \ patch("kleinanzeigen_bot.utils.web_scraping_mixin.net.is_port_open", return_value = True), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Failed to connect as root user")), \ patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Failed to connect as root user")), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class: patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class:
@@ -1833,6 +1876,7 @@ class TestWebScrapingMixinPortRetry:
) -> None: ) -> None:
"""Test error handling when browser connection fails with non-root error.""" """Test error handling when browser connection fails with non-root error."""
with patch("os.path.exists", return_value = True), \ with patch("os.path.exists", return_value = True), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin.files.exists", AsyncMock(return_value = True)), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin.net.is_port_open", return_value = True), \ patch("kleinanzeigen_bot.utils.web_scraping_mixin.net.is_port_open", return_value = True), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Connection timeout")), \ patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Connection timeout")), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class: patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class:
@@ -1860,6 +1904,7 @@ class TestWebScrapingMixinPortRetry:
) -> None: ) -> None:
"""Test error handling when browser startup fails with root error.""" """Test error handling when browser startup fails with root error."""
with patch("os.path.exists", return_value = True), \ with patch("os.path.exists", return_value = True), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin.files.exists", AsyncMock(return_value = True)), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Failed to start as root user")), \ patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Failed to start as root user")), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class: patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class:
@@ -1878,6 +1923,7 @@ class TestWebScrapingMixinPortRetry:
) -> None: ) -> None:
"""Test error handling when browser startup fails with non-root error.""" """Test error handling when browser startup fails with non-root error."""
with patch("os.path.exists", return_value = True), \ with patch("os.path.exists", return_value = True), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin.files.exists", AsyncMock(return_value = True)), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Browser binary not found")), \ patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Browser binary not found")), \
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class: patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class: