mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 10:31:50 +01:00
fix: eliminate async safety violations and migrate to pathlib (#697)
## ℹ️ Description Eliminate all blocking I/O operations in async contexts and modernize file path handling by migrating from os.path to pathlib.Path. - Link to the related issue(s): #692 - Get rid of the TODO in pyproject.toml - The added debug logging will ease the troubleshooting for path related issues. ## 📋 Changes Summary - Enable ASYNC210, ASYNC230, ASYNC240, ASYNC250 Ruff rules - Wrap blocking urllib.request.urlopen() in run_in_executor - Wrap blocking file operations (open, write) in run_in_executor - Replace blocking os.path calls with async helpers using run_in_executor - Replace blocking input() with await ainput() - Migrate extract.py from os.path to pathlib.Path - Use Path() constructor and / operator for path joining - Use Path.mkdir(), Path.rename() in executor instead of os functions - Create mockable _path_exists() and _path_is_dir() helpers - Add debug logging for all file system operations ### ⚙️ Type of Change Select the type(s) of change(s) included in this pull request: - [X] 🐞 Bug fix (non-breaking change which fixes an issue) - [ ] ✨ New feature (adds new functionality without breaking existing usage) - [ ] 💥 Breaking change (changes that might break existing user setups, scripts, or configurations) ## ✅ Checklist Before requesting a review, confirm the following: - [X] I have reviewed my changes to ensure they meet the project's standards. - [X] I have tested my changes and ensured that all tests pass (`pdm run test`). - [X] I have formatted the code (`pdm run format`). - [X] I have verified that linting passes (`pdm run lint`). - [X] I have updated documentation where necessary. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **Refactor** * Made user prompt non‑blocking to improve responsiveness. * Converted filesystem/path handling and prefs I/O to async‑friendly operations; moved blocking network and file work to background tasks. * Added async file/path helpers and async port‑check before browser connections. * **Tests** * Expanded unit tests for path helpers, image download success/failure, prefs writing, and directory creation/renaming workflows. <sub>✏️ Tip: You can customize this high-level summary in your review settings.</sub> <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -226,10 +226,6 @@ select = [
|
|||||||
]
|
]
|
||||||
ignore = [
|
ignore = [
|
||||||
"ANN401", # Dynamically typed expressions (typing.Any) are disallowed
|
"ANN401", # Dynamically typed expressions (typing.Any) are disallowed
|
||||||
"ASYNC210", # TODO Async functions should not call blocking HTTP methods
|
|
||||||
"ASYNC230", # TODO Async functions should not open files with blocking methods like `open`
|
|
||||||
"ASYNC240", # TODO Async functions should not use os.path methods, use trio.Path or anyio.path
|
|
||||||
"ASYNC250", # TODO Blocking call to input() in async context
|
|
||||||
"COM812", # Trailing comma missing
|
"COM812", # Trailing comma missing
|
||||||
"D1", # Missing docstring in ...
|
"D1", # Missing docstring in ...
|
||||||
"D200", # One-line docstring should fit on one line
|
"D200", # One-line docstring should fit on one line
|
||||||
|
|||||||
@@ -937,7 +937,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
LOG.warning("# Payment form detected! Please proceed with payment.")
|
LOG.warning("# Payment form detected! Please proceed with payment.")
|
||||||
LOG.warning("############################################")
|
LOG.warning("############################################")
|
||||||
await self.web_scroll_page_down()
|
await self.web_scroll_page_down()
|
||||||
input(_("Press a key to continue..."))
|
await ainput(_("Press a key to continue..."))
|
||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -1108,7 +1108,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
# in some categories we need to go another dialog back
|
# in some categories we need to go another dialog back
|
||||||
try:
|
try:
|
||||||
await self.web_find(By.XPATH, '//dialog//button[contains(., "Andere Versandmethoden")]',
|
await self.web_find(By.XPATH, '//dialog//button[contains(., "Andere Versandmethoden")]',
|
||||||
timeout=short_timeout)
|
timeout = short_timeout)
|
||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
await self.web_click(By.XPATH, '//dialog//button[contains(., "Zurück")]')
|
await self.web_click(By.XPATH, '//dialog//button[contains(., "Zurück")]')
|
||||||
|
|
||||||
|
|||||||
@@ -1,18 +1,21 @@
|
|||||||
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||||
|
import asyncio
|
||||||
from gettext import gettext as _
|
from gettext import gettext as _
|
||||||
|
|
||||||
import json, mimetypes, os, re, shutil # isort: skip
|
import json, mimetypes, re, shutil # isort: skip
|
||||||
|
import urllib.error as urllib_error
|
||||||
import urllib.request as urllib_request
|
import urllib.request as urllib_request
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
from typing import Any, Final
|
from typing import Any, Final
|
||||||
|
|
||||||
from kleinanzeigen_bot.model.ad_model import ContactPartial
|
from kleinanzeigen_bot.model.ad_model import ContactPartial
|
||||||
|
|
||||||
from .model.ad_model import AdPartial
|
from .model.ad_model import AdPartial
|
||||||
from .model.config_model import Config
|
from .model.config_model import Config
|
||||||
from .utils import dicts, i18n, loggers, misc, reflect
|
from .utils import dicts, files, i18n, loggers, misc, reflect
|
||||||
from .utils.web_scraping_mixin import Browser, By, Element, WebScrapingMixin
|
from .utils.web_scraping_mixin import Browser, By, Element, WebScrapingMixin
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
@@ -44,23 +47,39 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# create sub-directory for ad(s) to download (if necessary):
|
# create sub-directory for ad(s) to download (if necessary):
|
||||||
relative_directory = "downloaded-ads"
|
relative_directory = Path("downloaded-ads")
|
||||||
# make sure configured base directory exists
|
# make sure configured base directory exists (using exist_ok=True to avoid TOCTOU race)
|
||||||
if not os.path.exists(relative_directory) or not os.path.isdir(relative_directory):
|
await asyncio.get_running_loop().run_in_executor(None, lambda: relative_directory.mkdir(exist_ok = True)) # noqa: ASYNC240
|
||||||
os.mkdir(relative_directory)
|
LOG.info("Ensured ads directory exists at ./%s.", relative_directory)
|
||||||
LOG.info("Created ads directory at ./%s.", relative_directory)
|
|
||||||
|
|
||||||
# Extract ad info and determine final directory path
|
# Extract ad info and determine final directory path
|
||||||
ad_cfg, final_dir = await self._extract_ad_page_info_with_directory_handling(
|
ad_cfg, final_dir = await self._extract_ad_page_info_with_directory_handling(
|
||||||
relative_directory, ad_id
|
relative_directory, ad_id
|
||||||
)
|
)
|
||||||
|
|
||||||
# Save the ad configuration file
|
# Save the ad configuration file (offload to executor to avoid blocking the event loop)
|
||||||
ad_file_path = final_dir + "/" + f"ad_{ad_id}.yaml"
|
ad_file_path = str(Path(final_dir) / f"ad_{ad_id}.yaml")
|
||||||
dicts.save_dict(
|
header_string = "# yaml-language-server: $schema=https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot/refs/heads/main/schemas/ad.schema.json"
|
||||||
ad_file_path,
|
await asyncio.get_running_loop().run_in_executor(
|
||||||
ad_cfg.model_dump(),
|
None,
|
||||||
header = "# yaml-language-server: $schema=https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot/refs/heads/main/schemas/ad.schema.json")
|
lambda: dicts.save_dict(ad_file_path, ad_cfg.model_dump(), header = header_string)
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _download_and_save_image_sync(url:str, directory:str, filename_prefix:str, img_nr:int) -> str | None:
|
||||||
|
try:
|
||||||
|
with urllib_request.urlopen(url) as response: # noqa: S310 Audit URL open for permitted schemes.
|
||||||
|
content_type = response.info().get_content_type()
|
||||||
|
file_ending = mimetypes.guess_extension(content_type) or ""
|
||||||
|
# Use pathlib.Path for OS-agnostic path handling
|
||||||
|
img_path = Path(directory) / f"{filename_prefix}{img_nr}{file_ending}"
|
||||||
|
with open(img_path, "wb") as f:
|
||||||
|
shutil.copyfileobj(response, f)
|
||||||
|
return str(img_path)
|
||||||
|
except (urllib_error.URLError, urllib_error.HTTPError, OSError, shutil.Error) as e:
|
||||||
|
# Narrow exception handling to expected network/filesystem errors
|
||||||
|
LOG.warning("Failed to download image %s: %s", url, e)
|
||||||
|
return None
|
||||||
|
|
||||||
async def _download_images_from_ad_page(self, directory:str, ad_id:int) -> list[str]:
|
async def _download_images_from_ad_page(self, directory:str, ad_id:int) -> list[str]:
|
||||||
"""
|
"""
|
||||||
@@ -85,19 +104,26 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
img_nr = 1
|
img_nr = 1
|
||||||
dl_counter = 0
|
dl_counter = 0
|
||||||
|
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
|
||||||
for img_element in images:
|
for img_element in images:
|
||||||
current_img_url = img_element.attrs["src"] # URL of the image
|
current_img_url = img_element.attrs["src"] # URL of the image
|
||||||
if current_img_url is None:
|
if current_img_url is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
with urllib_request.urlopen(str(current_img_url)) as response: # noqa: S310 Audit URL open for permitted schemes.
|
img_path = await loop.run_in_executor(
|
||||||
content_type = response.info().get_content_type()
|
None,
|
||||||
file_ending = mimetypes.guess_extension(content_type)
|
self._download_and_save_image_sync,
|
||||||
img_path = f"{directory}/{img_fn_prefix}{img_nr}{file_ending}"
|
str(current_img_url),
|
||||||
with open(img_path, "wb") as f:
|
directory,
|
||||||
shutil.copyfileobj(response, f)
|
img_fn_prefix,
|
||||||
|
img_nr
|
||||||
|
)
|
||||||
|
|
||||||
|
if img_path:
|
||||||
dl_counter += 1
|
dl_counter += 1
|
||||||
img_paths.append(img_path.rsplit("/", maxsplit = 1)[-1])
|
# Use pathlib.Path for OS-agnostic path handling
|
||||||
|
img_paths.append(Path(img_path).name)
|
||||||
|
|
||||||
img_nr += 1
|
img_nr += 1
|
||||||
LOG.info("Downloaded %s.", i18n.pluralize("image", dl_counter))
|
LOG.info("Downloaded %s.", i18n.pluralize("image", dl_counter))
|
||||||
@@ -354,8 +380,8 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
return ad_cfg
|
return ad_cfg
|
||||||
|
|
||||||
async def _extract_ad_page_info_with_directory_handling(
|
async def _extract_ad_page_info_with_directory_handling(
|
||||||
self, relative_directory:str, ad_id:int
|
self, relative_directory:Path, ad_id:int
|
||||||
) -> tuple[AdPartial, str]:
|
) -> tuple[AdPartial, Path]:
|
||||||
"""
|
"""
|
||||||
Extracts ad information and handles directory creation/renaming.
|
Extracts ad information and handles directory creation/renaming.
|
||||||
|
|
||||||
@@ -373,32 +399,37 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
|
|
||||||
# Determine the final directory path
|
# Determine the final directory path
|
||||||
sanitized_title = misc.sanitize_folder_name(title, self.config.download.folder_name_max_length)
|
sanitized_title = misc.sanitize_folder_name(title, self.config.download.folder_name_max_length)
|
||||||
final_dir = os.path.join(relative_directory, f"ad_{ad_id}_{sanitized_title}")
|
final_dir = relative_directory / f"ad_{ad_id}_{sanitized_title}"
|
||||||
temp_dir = os.path.join(relative_directory, f"ad_{ad_id}")
|
temp_dir = relative_directory / f"ad_{ad_id}"
|
||||||
|
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
|
||||||
# Handle existing directories
|
# Handle existing directories
|
||||||
if os.path.exists(final_dir):
|
if await files.exists(final_dir):
|
||||||
# If the folder with title already exists, delete it
|
# If the folder with title already exists, delete it
|
||||||
LOG.info("Deleting current folder of ad %s...", ad_id)
|
LOG.info("Deleting current folder of ad %s...", ad_id)
|
||||||
shutil.rmtree(final_dir)
|
LOG.debug("Removing directory tree: %s", final_dir)
|
||||||
|
await loop.run_in_executor(None, shutil.rmtree, str(final_dir))
|
||||||
|
|
||||||
if os.path.exists(temp_dir):
|
if await files.exists(temp_dir):
|
||||||
if self.config.download.rename_existing_folders:
|
if self.config.download.rename_existing_folders:
|
||||||
# Rename the old folder to the new name with title
|
# Rename the old folder to the new name with title
|
||||||
LOG.info("Renaming folder from %s to %s for ad %s...",
|
LOG.info("Renaming folder from %s to %s for ad %s...",
|
||||||
os.path.basename(temp_dir), os.path.basename(final_dir), ad_id)
|
temp_dir.name, final_dir.name, ad_id)
|
||||||
os.rename(temp_dir, final_dir)
|
LOG.debug("Renaming: %s -> %s", temp_dir, final_dir)
|
||||||
|
await loop.run_in_executor(None, temp_dir.rename, final_dir)
|
||||||
else:
|
else:
|
||||||
# Use the existing folder without renaming
|
# Use the existing folder without renaming
|
||||||
final_dir = temp_dir
|
final_dir = temp_dir
|
||||||
LOG.info("Using existing folder for ad %s at %s.", ad_id, final_dir)
|
LOG.info("Using existing folder for ad %s at %s.", ad_id, final_dir)
|
||||||
else:
|
else:
|
||||||
# Create new directory with title
|
# Create new directory with title
|
||||||
os.mkdir(final_dir)
|
LOG.debug("Creating new directory: %s", final_dir)
|
||||||
|
await loop.run_in_executor(None, final_dir.mkdir)
|
||||||
LOG.info("New directory for ad created at %s.", final_dir)
|
LOG.info("New directory for ad created at %s.", final_dir)
|
||||||
|
|
||||||
# Now extract complete ad info (including images) to the final directory
|
# Now extract complete ad info (including images) to the final directory
|
||||||
ad_cfg = await self._extract_ad_page_info(final_dir, ad_id)
|
ad_cfg = await self._extract_ad_page_info(str(final_dir), ad_id)
|
||||||
|
|
||||||
return ad_cfg, final_dir
|
return ad_cfg, final_dir
|
||||||
|
|
||||||
|
|||||||
@@ -173,7 +173,10 @@ kleinanzeigen_bot/__init__.py:
|
|||||||
kleinanzeigen_bot/extract.py:
|
kleinanzeigen_bot/extract.py:
|
||||||
#################################################
|
#################################################
|
||||||
download_ad:
|
download_ad:
|
||||||
"Created ads directory at ./%s.": "Verzeichnis für Anzeigen erstellt unter ./%s."
|
"Ensured ads directory exists at ./%s.": "Verzeichnis [%s] für Anzeige vorhanden."
|
||||||
|
|
||||||
|
_download_and_save_image_sync:
|
||||||
|
"Failed to download image %s: %s": "Fehler beim Herunterladen des Bildes %s: %s"
|
||||||
|
|
||||||
_download_images_from_ad_page:
|
_download_images_from_ad_page:
|
||||||
"Found %s.": "%s gefunden."
|
"Found %s.": "%s gefunden."
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||||
import os
|
import asyncio, os # isort: skip
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
def abspath(relative_path:str, relative_to:str | None = None) -> str:
|
def abspath(relative_path:str, relative_to:str | None = None) -> str:
|
||||||
@@ -24,3 +25,23 @@ def abspath(relative_path:str, relative_to:str | None = None) -> str:
|
|||||||
base = os.path.dirname(base)
|
base = os.path.dirname(base)
|
||||||
|
|
||||||
return os.path.normpath(os.path.join(base, relative_path))
|
return os.path.normpath(os.path.join(base, relative_path))
|
||||||
|
|
||||||
|
|
||||||
|
async def exists(path:str | Path) -> bool:
|
||||||
|
"""
|
||||||
|
Asynchronously check if a file or directory exists.
|
||||||
|
|
||||||
|
:param path: Path to check
|
||||||
|
:return: True if path exists, False otherwise
|
||||||
|
"""
|
||||||
|
return await asyncio.get_running_loop().run_in_executor(None, Path(path).exists)
|
||||||
|
|
||||||
|
|
||||||
|
async def is_dir(path:str | Path) -> bool:
|
||||||
|
"""
|
||||||
|
Asynchronously check if a path is a directory.
|
||||||
|
|
||||||
|
:param path: Path to check
|
||||||
|
:return: True if path is a directory, False otherwise
|
||||||
|
"""
|
||||||
|
return await asyncio.get_running_loop().run_in_executor(None, Path(path).is_dir)
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ from nodriver.core.tab import Tab as Page
|
|||||||
from kleinanzeigen_bot.model.config_model import Config as BotConfig
|
from kleinanzeigen_bot.model.config_model import Config as BotConfig
|
||||||
from kleinanzeigen_bot.model.config_model import TimeoutConfig
|
from kleinanzeigen_bot.model.config_model import TimeoutConfig
|
||||||
|
|
||||||
from . import loggers, net
|
from . import files, loggers, net
|
||||||
from .chrome_version_detector import (
|
from .chrome_version_detector import (
|
||||||
ChromeVersionInfo,
|
ChromeVersionInfo,
|
||||||
detect_chrome_version_from_binary,
|
detect_chrome_version_from_binary,
|
||||||
@@ -100,6 +100,37 @@ class BrowserConfig:
|
|||||||
self.profile_name:str | None = None
|
self.profile_name:str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def _write_initial_prefs(prefs_file:str) -> None:
|
||||||
|
with open(prefs_file, "w", encoding = "UTF-8") as fd:
|
||||||
|
json.dump({
|
||||||
|
"credentials_enable_service": False,
|
||||||
|
"enable_do_not_track": True,
|
||||||
|
"google": {
|
||||||
|
"services": {
|
||||||
|
"consented_to_sync": False
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"profile": {
|
||||||
|
"default_content_setting_values": {
|
||||||
|
"popups": 0,
|
||||||
|
"notifications": 2 # 1 = allow, 2 = block browser notifications
|
||||||
|
},
|
||||||
|
"password_manager_enabled": False
|
||||||
|
},
|
||||||
|
"signin": {
|
||||||
|
"allowed": False
|
||||||
|
},
|
||||||
|
"translate_site_blacklist": [
|
||||||
|
"www.kleinanzeigen.de"
|
||||||
|
],
|
||||||
|
"devtools": {
|
||||||
|
"preferences": {
|
||||||
|
"currentDockState": '"bottom"'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, fd)
|
||||||
|
|
||||||
|
|
||||||
class WebScrapingMixin:
|
class WebScrapingMixin:
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
@@ -174,7 +205,7 @@ class WebScrapingMixin:
|
|||||||
LOG.info("Creating Browser session...")
|
LOG.info("Creating Browser session...")
|
||||||
|
|
||||||
if self.browser_config.binary_location:
|
if self.browser_config.binary_location:
|
||||||
ensure(os.path.exists(self.browser_config.binary_location), f"Specified browser binary [{self.browser_config.binary_location}] does not exist.")
|
ensure(await files.exists(self.browser_config.binary_location), f"Specified browser binary [{self.browser_config.binary_location}] does not exist.")
|
||||||
else:
|
else:
|
||||||
self.browser_config.binary_location = self.get_compatible_browser()
|
self.browser_config.binary_location = self.get_compatible_browser()
|
||||||
LOG.info(" -> Browser binary location: %s", self.browser_config.binary_location)
|
LOG.info(" -> Browser binary location: %s", self.browser_config.binary_location)
|
||||||
@@ -289,41 +320,14 @@ class WebScrapingMixin:
|
|||||||
profile_dir = os.path.join(cfg.user_data_dir, self.browser_config.profile_name or "Default")
|
profile_dir = os.path.join(cfg.user_data_dir, self.browser_config.profile_name or "Default")
|
||||||
os.makedirs(profile_dir, exist_ok = True)
|
os.makedirs(profile_dir, exist_ok = True)
|
||||||
prefs_file = os.path.join(profile_dir, "Preferences")
|
prefs_file = os.path.join(profile_dir, "Preferences")
|
||||||
if not os.path.exists(prefs_file):
|
if not await files.exists(prefs_file):
|
||||||
LOG.info(" -> Setting chrome prefs [%s]...", prefs_file)
|
LOG.info(" -> Setting chrome prefs [%s]...", prefs_file)
|
||||||
with open(prefs_file, "w", encoding = "UTF-8") as fd:
|
await asyncio.get_running_loop().run_in_executor(None, _write_initial_prefs, prefs_file)
|
||||||
json.dump({
|
|
||||||
"credentials_enable_service": False,
|
|
||||||
"enable_do_not_track": True,
|
|
||||||
"google": {
|
|
||||||
"services": {
|
|
||||||
"consented_to_sync": False
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"profile": {
|
|
||||||
"default_content_setting_values": {
|
|
||||||
"popups": 0,
|
|
||||||
"notifications": 2 # 1 = allow, 2 = block browser notifications
|
|
||||||
},
|
|
||||||
"password_manager_enabled": False
|
|
||||||
},
|
|
||||||
"signin": {
|
|
||||||
"allowed": False
|
|
||||||
},
|
|
||||||
"translate_site_blacklist": [
|
|
||||||
"www.kleinanzeigen.de"
|
|
||||||
],
|
|
||||||
"devtools": {
|
|
||||||
"preferences": {
|
|
||||||
"currentDockState": '"bottom"'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}, fd)
|
|
||||||
|
|
||||||
# load extensions
|
# load extensions
|
||||||
for crx_extension in self.browser_config.extensions:
|
for crx_extension in self.browser_config.extensions:
|
||||||
LOG.info(" -> Adding Browser extension: [%s]", crx_extension)
|
LOG.info(" -> Adding Browser extension: [%s]", crx_extension)
|
||||||
ensure(os.path.exists(crx_extension), f"Configured extension-file [{crx_extension}] does not exist.")
|
ensure(await files.exists(crx_extension), f"Configured extension-file [{crx_extension}] does not exist.")
|
||||||
cfg.add_extension(crx_extension)
|
cfg.add_extension(crx_extension)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -1,10 +1,12 @@
|
|||||||
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||||
import json, os # isort: skip
|
import json # isort: skip
|
||||||
from gettext import gettext as _
|
from gettext import gettext as _
|
||||||
|
from pathlib import Path
|
||||||
from typing import Any, TypedDict
|
from typing import Any, TypedDict
|
||||||
from unittest.mock import AsyncMock, MagicMock, call, patch
|
from unittest.mock import AsyncMock, MagicMock, call, patch
|
||||||
|
from urllib.error import URLError
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
@@ -66,6 +68,124 @@ class TestAdExtractorBasics:
|
|||||||
"""Test extraction of ad ID from different URL formats."""
|
"""Test extraction of ad ID from different URL formats."""
|
||||||
assert test_extractor.extract_ad_id_from_ad_url(url) == expected_id
|
assert test_extractor.extract_ad_id_from_ad_url(url) == expected_id
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_path_exists_helper(self, tmp_path:Path) -> None:
|
||||||
|
"""Test files.exists helper function."""
|
||||||
|
|
||||||
|
from kleinanzeigen_bot.utils import files # noqa: PLC0415
|
||||||
|
|
||||||
|
# Test with existing path
|
||||||
|
existing_file = tmp_path / "test.txt"
|
||||||
|
existing_file.write_text("test")
|
||||||
|
assert await files.exists(existing_file) is True
|
||||||
|
assert await files.exists(str(existing_file)) is True
|
||||||
|
|
||||||
|
# Test with non-existing path
|
||||||
|
non_existing = tmp_path / "nonexistent.txt"
|
||||||
|
assert await files.exists(non_existing) is False
|
||||||
|
assert await files.exists(str(non_existing)) is False
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_path_is_dir_helper(self, tmp_path:Path) -> None:
|
||||||
|
"""Test files.is_dir helper function."""
|
||||||
|
|
||||||
|
from kleinanzeigen_bot.utils import files # noqa: PLC0415
|
||||||
|
|
||||||
|
# Test with directory
|
||||||
|
test_dir = tmp_path / "testdir"
|
||||||
|
test_dir.mkdir()
|
||||||
|
assert await files.is_dir(test_dir) is True
|
||||||
|
assert await files.is_dir(str(test_dir)) is True
|
||||||
|
|
||||||
|
# Test with file
|
||||||
|
test_file = tmp_path / "test.txt"
|
||||||
|
test_file.write_text("test")
|
||||||
|
assert await files.is_dir(test_file) is False
|
||||||
|
assert await files.is_dir(str(test_file)) is False
|
||||||
|
|
||||||
|
# Test with non-existing path
|
||||||
|
non_existing = tmp_path / "nonexistent"
|
||||||
|
assert await files.is_dir(non_existing) is False
|
||||||
|
assert await files.is_dir(str(non_existing)) is False
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_exists_async_helper(self, tmp_path:Path) -> None:
|
||||||
|
"""Test files.exists async helper function."""
|
||||||
|
from kleinanzeigen_bot.utils import files # noqa: PLC0415
|
||||||
|
|
||||||
|
# Test with existing path
|
||||||
|
existing_file = tmp_path / "test.txt"
|
||||||
|
existing_file.write_text("test")
|
||||||
|
assert await files.exists(existing_file) is True
|
||||||
|
assert await files.exists(str(existing_file)) is True
|
||||||
|
|
||||||
|
# Test with non-existing path
|
||||||
|
non_existing = tmp_path / "nonexistent.txt"
|
||||||
|
assert await files.exists(non_existing) is False
|
||||||
|
assert await files.exists(str(non_existing)) is False
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_isdir_async_helper(self, tmp_path:Path) -> None:
|
||||||
|
"""Test files.is_dir async helper function."""
|
||||||
|
from kleinanzeigen_bot.utils import files # noqa: PLC0415
|
||||||
|
|
||||||
|
# Test with directory
|
||||||
|
test_dir = tmp_path / "testdir"
|
||||||
|
test_dir.mkdir()
|
||||||
|
assert await files.is_dir(test_dir) is True
|
||||||
|
assert await files.is_dir(str(test_dir)) is True
|
||||||
|
|
||||||
|
# Test with file
|
||||||
|
test_file = tmp_path / "test.txt"
|
||||||
|
test_file.write_text("test")
|
||||||
|
assert await files.is_dir(test_file) is False
|
||||||
|
assert await files.is_dir(str(test_file)) is False
|
||||||
|
|
||||||
|
# Test with non-existing path
|
||||||
|
non_existing = tmp_path / "nonexistent"
|
||||||
|
assert await files.is_dir(non_existing) is False
|
||||||
|
assert await files.is_dir(str(non_existing)) is False
|
||||||
|
|
||||||
|
def test_download_and_save_image_sync_success(self, tmp_path:Path) -> None:
|
||||||
|
"""Test _download_and_save_image_sync with successful download."""
|
||||||
|
from unittest.mock import MagicMock, mock_open # noqa: PLC0415
|
||||||
|
|
||||||
|
test_dir = tmp_path / "images"
|
||||||
|
test_dir.mkdir()
|
||||||
|
|
||||||
|
# Mock urllib response
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.info().get_content_type.return_value = "image/jpeg"
|
||||||
|
mock_response.__enter__ = MagicMock(return_value = mock_response)
|
||||||
|
mock_response.__exit__ = MagicMock(return_value = False)
|
||||||
|
|
||||||
|
with patch("kleinanzeigen_bot.extract.urllib_request.urlopen", return_value = mock_response), \
|
||||||
|
patch("kleinanzeigen_bot.extract.open", mock_open()), \
|
||||||
|
patch("kleinanzeigen_bot.extract.shutil.copyfileobj"):
|
||||||
|
|
||||||
|
result = AdExtractor._download_and_save_image_sync(
|
||||||
|
"http://example.com/image.jpg",
|
||||||
|
str(test_dir),
|
||||||
|
"test_",
|
||||||
|
1
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result.endswith((".jpe", ".jpeg", ".jpg"))
|
||||||
|
assert "test_1" in result
|
||||||
|
|
||||||
|
def test_download_and_save_image_sync_failure(self, tmp_path:Path) -> None:
|
||||||
|
"""Test _download_and_save_image_sync with download failure."""
|
||||||
|
with patch("kleinanzeigen_bot.extract.urllib_request.urlopen", side_effect = URLError("Network error")):
|
||||||
|
result = AdExtractor._download_and_save_image_sync(
|
||||||
|
"http://example.com/image.jpg",
|
||||||
|
str(tmp_path),
|
||||||
|
"test_",
|
||||||
|
1
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
class TestAdExtractorPricing:
|
class TestAdExtractorPricing:
|
||||||
"""Tests for pricing related functionality."""
|
"""Tests for pricing related functionality."""
|
||||||
@@ -865,84 +985,17 @@ class TestAdExtractorDownload:
|
|||||||
})
|
})
|
||||||
return AdExtractor(browser_mock, config)
|
return AdExtractor(browser_mock, config)
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_download_ad_existing_directory(self, extractor:AdExtractor) -> None:
|
|
||||||
"""Test downloading an ad when the directory already exists."""
|
|
||||||
with patch("os.path.exists") as mock_exists, \
|
|
||||||
patch("os.path.isdir") as mock_isdir, \
|
|
||||||
patch("os.makedirs") as mock_makedirs, \
|
|
||||||
patch("os.mkdir") as mock_mkdir, \
|
|
||||||
patch("os.rename") as mock_rename, \
|
|
||||||
patch("shutil.rmtree") as mock_rmtree, \
|
|
||||||
patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict, \
|
|
||||||
patch.object(extractor, "_extract_ad_page_info_with_directory_handling", new_callable = AsyncMock) as mock_extract_with_dir:
|
|
||||||
|
|
||||||
base_dir = "downloaded-ads"
|
|
||||||
final_dir = os.path.join(base_dir, "ad_12345_Test Advertisement Title")
|
|
||||||
yaml_path = os.path.join(final_dir, "ad_12345.yaml")
|
|
||||||
|
|
||||||
# Configure mocks for directory checks
|
|
||||||
existing_paths = {base_dir, final_dir} # Final directory with title exists
|
|
||||||
mock_exists.side_effect = lambda path: path in existing_paths
|
|
||||||
mock_isdir.side_effect = lambda path: path == base_dir
|
|
||||||
|
|
||||||
# Mock the new method that handles directory creation and extraction
|
|
||||||
mock_extract_with_dir.return_value = (
|
|
||||||
AdPartial.model_validate({
|
|
||||||
"title": "Test Advertisement Title",
|
|
||||||
"description": "Test Description",
|
|
||||||
"category": "Dienstleistungen",
|
|
||||||
"price": 100,
|
|
||||||
"images": [],
|
|
||||||
"contact": {
|
|
||||||
"name": "Test User",
|
|
||||||
"street": "Test Street 123",
|
|
||||||
"zipcode": "12345",
|
|
||||||
"location": "Test City"
|
|
||||||
}
|
|
||||||
}),
|
|
||||||
final_dir
|
|
||||||
)
|
|
||||||
|
|
||||||
await extractor.download_ad(12345)
|
|
||||||
|
|
||||||
# Verify the correct functions were called
|
|
||||||
mock_extract_with_dir.assert_called_once()
|
|
||||||
# Directory handling is now done inside _extract_ad_page_info_with_directory_handling
|
|
||||||
# so we don't expect rmtree/mkdir to be called directly in download_ad
|
|
||||||
mock_rmtree.assert_not_called() # Directory handling is done internally
|
|
||||||
mock_mkdir.assert_not_called() # Directory handling is done internally
|
|
||||||
mock_makedirs.assert_not_called() # Directory already exists
|
|
||||||
mock_rename.assert_not_called() # No renaming needed
|
|
||||||
|
|
||||||
# Get the actual call arguments
|
|
||||||
actual_call = mock_save_dict.call_args
|
|
||||||
assert actual_call is not None
|
|
||||||
actual_path = actual_call[0][0].replace("/", os.path.sep)
|
|
||||||
assert actual_path == yaml_path
|
|
||||||
assert actual_call[0][1] == mock_extract_with_dir.return_value[0].model_dump()
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_download_ad(self, extractor:AdExtractor) -> None:
|
async def test_download_ad(self, extractor:AdExtractor) -> None:
|
||||||
"""Test downloading an entire ad."""
|
"""Test downloading an ad - directory creation and saving ad data."""
|
||||||
with patch("os.path.exists") as mock_exists, \
|
with patch("pathlib.Path.mkdir"), \
|
||||||
patch("os.path.isdir") as mock_isdir, \
|
|
||||||
patch("os.makedirs") as mock_makedirs, \
|
|
||||||
patch("os.mkdir") as mock_mkdir, \
|
|
||||||
patch("os.rename") as mock_rename, \
|
|
||||||
patch("shutil.rmtree") as mock_rmtree, \
|
|
||||||
patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict, \
|
patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict, \
|
||||||
patch.object(extractor, "_extract_ad_page_info_with_directory_handling", new_callable = AsyncMock) as mock_extract_with_dir:
|
patch.object(extractor, "_extract_ad_page_info_with_directory_handling", new_callable = AsyncMock) as mock_extract_with_dir:
|
||||||
|
|
||||||
base_dir = "downloaded-ads"
|
# Use Path for OS-agnostic path handling
|
||||||
final_dir = os.path.join(base_dir, "ad_12345_Test Advertisement Title")
|
final_dir = Path("downloaded-ads") / "ad_12345_Test Advertisement Title"
|
||||||
yaml_path = os.path.join(final_dir, "ad_12345.yaml")
|
yaml_path = final_dir / "ad_12345.yaml"
|
||||||
|
|
||||||
# Configure mocks for directory checks
|
|
||||||
mock_exists.return_value = False
|
|
||||||
mock_isdir.return_value = False
|
|
||||||
|
|
||||||
# Mock the new method that handles directory creation and extraction
|
|
||||||
mock_extract_with_dir.return_value = (
|
mock_extract_with_dir.return_value = (
|
||||||
AdPartial.model_validate({
|
AdPartial.model_validate({
|
||||||
"title": "Test Advertisement Title",
|
"title": "Test Advertisement Title",
|
||||||
@@ -957,140 +1010,18 @@ class TestAdExtractorDownload:
|
|||||||
"location": "Test City"
|
"location": "Test City"
|
||||||
}
|
}
|
||||||
}),
|
}),
|
||||||
final_dir
|
str(final_dir)
|
||||||
)
|
)
|
||||||
|
|
||||||
await extractor.download_ad(12345)
|
await extractor.download_ad(12345)
|
||||||
|
|
||||||
# Verify the correct functions were called
|
# Verify observable behavior: extraction and save were called
|
||||||
mock_extract_with_dir.assert_called_once()
|
mock_extract_with_dir.assert_called_once()
|
||||||
# Directory handling is now done inside _extract_ad_page_info_with_directory_handling
|
mock_save_dict.assert_called_once()
|
||||||
mock_rmtree.assert_not_called() # Directory handling is done internally
|
|
||||||
mock_mkdir.assert_has_calls([call(base_dir)]) # Only base directory creation
|
|
||||||
mock_makedirs.assert_not_called() # Using mkdir instead
|
|
||||||
mock_rename.assert_not_called() # No renaming needed
|
|
||||||
|
|
||||||
# Get the actual call arguments
|
# Verify saved to correct location with correct data
|
||||||
actual_call = mock_save_dict.call_args
|
actual_call = mock_save_dict.call_args
|
||||||
assert actual_call is not None
|
actual_path = Path(actual_call[0][0])
|
||||||
actual_path = actual_call[0][0].replace("/", os.path.sep)
|
|
||||||
assert actual_path == yaml_path
|
|
||||||
assert actual_call[0][1] == mock_extract_with_dir.return_value[0].model_dump()
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_download_ad_use_existing_folder(self, extractor:AdExtractor) -> None:
|
|
||||||
"""Test downloading an ad when an old folder without title exists (default behavior)."""
|
|
||||||
with patch("os.path.exists") as mock_exists, \
|
|
||||||
patch("os.path.isdir") as mock_isdir, \
|
|
||||||
patch("os.makedirs") as mock_makedirs, \
|
|
||||||
patch("os.mkdir") as mock_mkdir, \
|
|
||||||
patch("os.rename") as mock_rename, \
|
|
||||||
patch("shutil.rmtree") as mock_rmtree, \
|
|
||||||
patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict, \
|
|
||||||
patch.object(extractor, "_extract_ad_page_info_with_directory_handling", new_callable = AsyncMock) as mock_extract_with_dir:
|
|
||||||
|
|
||||||
base_dir = "downloaded-ads"
|
|
||||||
temp_dir = os.path.join(base_dir, "ad_12345")
|
|
||||||
yaml_path = os.path.join(temp_dir, "ad_12345.yaml")
|
|
||||||
|
|
||||||
# Configure mocks for directory checks
|
|
||||||
# Base directory exists, temp directory exists
|
|
||||||
existing_paths = {base_dir, temp_dir}
|
|
||||||
mock_exists.side_effect = lambda path: path in existing_paths
|
|
||||||
mock_isdir.side_effect = lambda path: path == base_dir
|
|
||||||
|
|
||||||
# Mock the new method that handles directory creation and extraction
|
|
||||||
mock_extract_with_dir.return_value = (
|
|
||||||
AdPartial.model_validate({
|
|
||||||
"title": "Test Advertisement Title",
|
|
||||||
"description": "Test Description",
|
|
||||||
"category": "Dienstleistungen",
|
|
||||||
"price": 100,
|
|
||||||
"images": [],
|
|
||||||
"contact": {
|
|
||||||
"name": "Test User",
|
|
||||||
"street": "Test Street 123",
|
|
||||||
"zipcode": "12345",
|
|
||||||
"location": "Test City"
|
|
||||||
}
|
|
||||||
}),
|
|
||||||
temp_dir # Use existing temp directory
|
|
||||||
)
|
|
||||||
|
|
||||||
await extractor.download_ad(12345)
|
|
||||||
|
|
||||||
# Verify the correct functions were called
|
|
||||||
mock_extract_with_dir.assert_called_once()
|
|
||||||
mock_rmtree.assert_not_called() # No directory to remove
|
|
||||||
mock_mkdir.assert_not_called() # Base directory already exists
|
|
||||||
mock_makedirs.assert_not_called() # Using mkdir instead
|
|
||||||
mock_rename.assert_not_called() # No renaming (default behavior)
|
|
||||||
|
|
||||||
# Get the actual call arguments
|
|
||||||
actual_call = mock_save_dict.call_args
|
|
||||||
assert actual_call is not None
|
|
||||||
actual_path = actual_call[0][0].replace("/", os.path.sep)
|
|
||||||
assert actual_path == yaml_path
|
|
||||||
assert actual_call[0][1] == mock_extract_with_dir.return_value[0].model_dump()
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
|
||||||
async def test_download_ad_rename_existing_folder_when_enabled(self, extractor:AdExtractor) -> None:
|
|
||||||
"""Test downloading an ad when an old folder without title exists and renaming is enabled."""
|
|
||||||
# Enable renaming in config
|
|
||||||
extractor.config.download.rename_existing_folders = True
|
|
||||||
|
|
||||||
with patch("os.path.exists") as mock_exists, \
|
|
||||||
patch("os.path.isdir") as mock_isdir, \
|
|
||||||
patch("os.makedirs") as mock_makedirs, \
|
|
||||||
patch("os.mkdir") as mock_mkdir, \
|
|
||||||
patch("os.rename") as mock_rename, \
|
|
||||||
patch("shutil.rmtree") as mock_rmtree, \
|
|
||||||
patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict, \
|
|
||||||
patch.object(extractor, "_extract_ad_page_info_with_directory_handling", new_callable = AsyncMock) as mock_extract_with_dir:
|
|
||||||
|
|
||||||
base_dir = "downloaded-ads"
|
|
||||||
temp_dir = os.path.join(base_dir, "ad_12345")
|
|
||||||
final_dir = os.path.join(base_dir, "ad_12345_Test Advertisement Title")
|
|
||||||
yaml_path = os.path.join(final_dir, "ad_12345.yaml")
|
|
||||||
|
|
||||||
# Configure mocks for directory checks
|
|
||||||
# Base directory exists, temp directory exists, final directory doesn't exist
|
|
||||||
existing_paths = {base_dir, temp_dir}
|
|
||||||
mock_exists.side_effect = lambda path: path in existing_paths
|
|
||||||
mock_isdir.side_effect = lambda path: path == base_dir
|
|
||||||
|
|
||||||
# Mock the new method that handles directory creation and extraction
|
|
||||||
mock_extract_with_dir.return_value = (
|
|
||||||
AdPartial.model_validate({
|
|
||||||
"title": "Test Advertisement Title",
|
|
||||||
"description": "Test Description",
|
|
||||||
"category": "Dienstleistungen",
|
|
||||||
"price": 100,
|
|
||||||
"images": [],
|
|
||||||
"contact": {
|
|
||||||
"name": "Test User",
|
|
||||||
"street": "Test Street 123",
|
|
||||||
"zipcode": "12345",
|
|
||||||
"location": "Test City"
|
|
||||||
}
|
|
||||||
}),
|
|
||||||
final_dir
|
|
||||||
)
|
|
||||||
|
|
||||||
await extractor.download_ad(12345)
|
|
||||||
|
|
||||||
# Verify the correct functions were called
|
|
||||||
mock_extract_with_dir.assert_called_once() # Extract to final directory
|
|
||||||
# Directory handling (including renaming) is now done inside _extract_ad_page_info_with_directory_handling
|
|
||||||
mock_rmtree.assert_not_called() # Directory handling is done internally
|
|
||||||
mock_mkdir.assert_not_called() # Directory handling is done internally
|
|
||||||
mock_makedirs.assert_not_called() # Using mkdir instead
|
|
||||||
mock_rename.assert_not_called() # Directory handling is done internally
|
|
||||||
|
|
||||||
# Get the actual call arguments
|
|
||||||
actual_call = mock_save_dict.call_args
|
|
||||||
assert actual_call is not None
|
|
||||||
actual_path = actual_call[0][0].replace("/", os.path.sep)
|
|
||||||
assert actual_path == yaml_path
|
assert actual_path == yaml_path
|
||||||
assert actual_call[0][1] == mock_extract_with_dir.return_value[0].model_dump()
|
assert actual_call[0][1] == mock_extract_with_dir.return_value[0].model_dump()
|
||||||
|
|
||||||
@@ -1101,3 +1032,196 @@ class TestAdExtractorDownload:
|
|||||||
with patch.object(extractor, "web_find", new_callable = AsyncMock, side_effect = TimeoutError):
|
with patch.object(extractor, "web_find", new_callable = AsyncMock, side_effect = TimeoutError):
|
||||||
image_paths = await extractor._download_images_from_ad_page("/some/dir", 12345)
|
image_paths = await extractor._download_images_from_ad_page("/some/dir", 12345)
|
||||||
assert len(image_paths) == 0
|
assert len(image_paths) == 0
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
# pylint: disable=protected-access
|
||||||
|
async def test_download_images_with_none_url(self, extractor:AdExtractor) -> None:
|
||||||
|
"""Test image download when some images have None as src attribute."""
|
||||||
|
image_box_mock = MagicMock()
|
||||||
|
|
||||||
|
# Create image elements - one with valid src, one with None src
|
||||||
|
img_with_url = MagicMock()
|
||||||
|
img_with_url.attrs = {"src": "http://example.com/valid_image.jpg"}
|
||||||
|
|
||||||
|
img_without_url = MagicMock()
|
||||||
|
img_without_url.attrs = {"src": None}
|
||||||
|
|
||||||
|
with patch.object(extractor, "web_find", new_callable = AsyncMock, return_value = image_box_mock), \
|
||||||
|
patch.object(extractor, "web_find_all", new_callable = AsyncMock, return_value = [img_with_url, img_without_url]), \
|
||||||
|
patch.object(AdExtractor, "_download_and_save_image_sync", return_value = "/some/dir/ad_12345__img1.jpg"):
|
||||||
|
|
||||||
|
image_paths = await extractor._download_images_from_ad_page("/some/dir", 12345)
|
||||||
|
|
||||||
|
# Should only download the one valid image (skip the None)
|
||||||
|
assert len(image_paths) == 1
|
||||||
|
assert image_paths[0] == "ad_12345__img1.jpg"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
# pylint: disable=protected-access
|
||||||
|
async def test_extract_ad_page_info_with_directory_handling_final_dir_exists(
|
||||||
|
self, extractor:AdExtractor, tmp_path:Path
|
||||||
|
) -> None:
|
||||||
|
"""Test directory handling when final_dir already exists - it should be deleted."""
|
||||||
|
base_dir = tmp_path / "downloaded-ads"
|
||||||
|
base_dir.mkdir()
|
||||||
|
|
||||||
|
# Create the final directory that should be deleted
|
||||||
|
final_dir = base_dir / "ad_12345_Test Title"
|
||||||
|
final_dir.mkdir()
|
||||||
|
old_file = final_dir / "old_file.txt"
|
||||||
|
old_file.write_text("old content")
|
||||||
|
|
||||||
|
# Mock the page
|
||||||
|
page_mock = MagicMock()
|
||||||
|
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
||||||
|
extractor.page = page_mock
|
||||||
|
|
||||||
|
with patch.object(extractor, "web_text", new_callable = AsyncMock, side_effect = [
|
||||||
|
"Test Title", # Title extraction
|
||||||
|
"Test Title", # Second title call for full extraction
|
||||||
|
"Description text", # Description
|
||||||
|
"03.02.2025" # Creation date
|
||||||
|
]), \
|
||||||
|
patch.object(extractor, "web_execute", new_callable = AsyncMock, return_value = {
|
||||||
|
"universalAnalyticsOpts": {
|
||||||
|
"dimensions": {
|
||||||
|
"dimension92": "",
|
||||||
|
"dimension108": ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}), \
|
||||||
|
patch.object(extractor, "_extract_category_from_ad_page", new_callable = AsyncMock, return_value = "160"), \
|
||||||
|
patch.object(extractor, "_extract_special_attributes_from_ad_page", new_callable = AsyncMock, return_value = {}), \
|
||||||
|
patch.object(extractor, "_extract_pricing_info_from_ad_page", new_callable = AsyncMock, return_value = (None, "NOT_APPLICABLE")), \
|
||||||
|
patch.object(extractor, "_extract_shipping_info_from_ad_page", new_callable = AsyncMock, return_value = ("NOT_APPLICABLE", None, None)), \
|
||||||
|
patch.object(extractor, "_extract_sell_directly_from_ad_page", new_callable = AsyncMock, return_value = False), \
|
||||||
|
patch.object(extractor, "_download_images_from_ad_page", new_callable = AsyncMock, return_value = []), \
|
||||||
|
patch.object(extractor, "_extract_contact_from_ad_page", new_callable = AsyncMock, return_value = ContactPartial(
|
||||||
|
name = "Test", zipcode = "12345", location = "Berlin"
|
||||||
|
)):
|
||||||
|
|
||||||
|
ad_cfg, result_dir = await extractor._extract_ad_page_info_with_directory_handling(
|
||||||
|
base_dir, 12345
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify the old directory was deleted and recreated
|
||||||
|
assert result_dir == final_dir
|
||||||
|
assert result_dir.exists()
|
||||||
|
assert not old_file.exists() # Old file should be gone
|
||||||
|
assert ad_cfg.title == "Test Title"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
# pylint: disable=protected-access
|
||||||
|
async def test_extract_ad_page_info_with_directory_handling_rename_enabled(
|
||||||
|
self, extractor:AdExtractor, tmp_path:Path
|
||||||
|
) -> None:
|
||||||
|
"""Test directory handling when temp_dir exists and rename_existing_folders is True."""
|
||||||
|
base_dir = tmp_path / "downloaded-ads"
|
||||||
|
base_dir.mkdir()
|
||||||
|
|
||||||
|
# Create the temp directory (without title)
|
||||||
|
temp_dir = base_dir / "ad_12345"
|
||||||
|
temp_dir.mkdir()
|
||||||
|
existing_file = temp_dir / "existing_image.jpg"
|
||||||
|
existing_file.write_text("existing image data")
|
||||||
|
|
||||||
|
# Enable rename_existing_folders in config
|
||||||
|
extractor.config.download.rename_existing_folders = True
|
||||||
|
|
||||||
|
# Mock the page
|
||||||
|
page_mock = MagicMock()
|
||||||
|
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
||||||
|
extractor.page = page_mock
|
||||||
|
|
||||||
|
with patch.object(extractor, "web_text", new_callable = AsyncMock, side_effect = [
|
||||||
|
"Test Title", # Title extraction
|
||||||
|
"Test Title", # Second title call for full extraction
|
||||||
|
"Description text", # Description
|
||||||
|
"03.02.2025" # Creation date
|
||||||
|
]), \
|
||||||
|
patch.object(extractor, "web_execute", new_callable = AsyncMock, return_value = {
|
||||||
|
"universalAnalyticsOpts": {
|
||||||
|
"dimensions": {
|
||||||
|
"dimension92": "",
|
||||||
|
"dimension108": ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}), \
|
||||||
|
patch.object(extractor, "_extract_category_from_ad_page", new_callable = AsyncMock, return_value = "160"), \
|
||||||
|
patch.object(extractor, "_extract_special_attributes_from_ad_page", new_callable = AsyncMock, return_value = {}), \
|
||||||
|
patch.object(extractor, "_extract_pricing_info_from_ad_page", new_callable = AsyncMock, return_value = (None, "NOT_APPLICABLE")), \
|
||||||
|
patch.object(extractor, "_extract_shipping_info_from_ad_page", new_callable = AsyncMock, return_value = ("NOT_APPLICABLE", None, None)), \
|
||||||
|
patch.object(extractor, "_extract_sell_directly_from_ad_page", new_callable = AsyncMock, return_value = False), \
|
||||||
|
patch.object(extractor, "_download_images_from_ad_page", new_callable = AsyncMock, return_value = []), \
|
||||||
|
patch.object(extractor, "_extract_contact_from_ad_page", new_callable = AsyncMock, return_value = ContactPartial(
|
||||||
|
name = "Test", zipcode = "12345", location = "Berlin"
|
||||||
|
)):
|
||||||
|
|
||||||
|
ad_cfg, result_dir = await extractor._extract_ad_page_info_with_directory_handling(
|
||||||
|
base_dir, 12345
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify the directory was renamed from temp_dir to final_dir
|
||||||
|
final_dir = base_dir / "ad_12345_Test Title"
|
||||||
|
assert result_dir == final_dir
|
||||||
|
assert result_dir.exists()
|
||||||
|
assert not temp_dir.exists() # Old temp dir should be gone
|
||||||
|
assert (result_dir / "existing_image.jpg").exists() # File should be preserved
|
||||||
|
assert ad_cfg.title == "Test Title"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
# pylint: disable=protected-access
|
||||||
|
async def test_extract_ad_page_info_with_directory_handling_use_existing(
|
||||||
|
self, extractor:AdExtractor, tmp_path:Path
|
||||||
|
) -> None:
|
||||||
|
"""Test directory handling when temp_dir exists and rename_existing_folders is False (default)."""
|
||||||
|
base_dir = tmp_path / "downloaded-ads"
|
||||||
|
base_dir.mkdir()
|
||||||
|
|
||||||
|
# Create the temp directory (without title)
|
||||||
|
temp_dir = base_dir / "ad_12345"
|
||||||
|
temp_dir.mkdir()
|
||||||
|
existing_file = temp_dir / "existing_image.jpg"
|
||||||
|
existing_file.write_text("existing image data")
|
||||||
|
|
||||||
|
# Ensure rename_existing_folders is False (default)
|
||||||
|
extractor.config.download.rename_existing_folders = False
|
||||||
|
|
||||||
|
# Mock the page
|
||||||
|
page_mock = MagicMock()
|
||||||
|
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
||||||
|
extractor.page = page_mock
|
||||||
|
|
||||||
|
with patch.object(extractor, "web_text", new_callable = AsyncMock, side_effect = [
|
||||||
|
"Test Title", # Title extraction
|
||||||
|
"Test Title", # Second title call for full extraction
|
||||||
|
"Description text", # Description
|
||||||
|
"03.02.2025" # Creation date
|
||||||
|
]), \
|
||||||
|
patch.object(extractor, "web_execute", new_callable = AsyncMock, return_value = {
|
||||||
|
"universalAnalyticsOpts": {
|
||||||
|
"dimensions": {
|
||||||
|
"dimension92": "",
|
||||||
|
"dimension108": ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}), \
|
||||||
|
patch.object(extractor, "_extract_category_from_ad_page", new_callable = AsyncMock, return_value = "160"), \
|
||||||
|
patch.object(extractor, "_extract_special_attributes_from_ad_page", new_callable = AsyncMock, return_value = {}), \
|
||||||
|
patch.object(extractor, "_extract_pricing_info_from_ad_page", new_callable = AsyncMock, return_value = (None, "NOT_APPLICABLE")), \
|
||||||
|
patch.object(extractor, "_extract_shipping_info_from_ad_page", new_callable = AsyncMock, return_value = ("NOT_APPLICABLE", None, None)), \
|
||||||
|
patch.object(extractor, "_extract_sell_directly_from_ad_page", new_callable = AsyncMock, return_value = False), \
|
||||||
|
patch.object(extractor, "_download_images_from_ad_page", new_callable = AsyncMock, return_value = []), \
|
||||||
|
patch.object(extractor, "_extract_contact_from_ad_page", new_callable = AsyncMock, return_value = ContactPartial(
|
||||||
|
name = "Test", zipcode = "12345", location = "Berlin"
|
||||||
|
)):
|
||||||
|
|
||||||
|
ad_cfg, result_dir = await extractor._extract_ad_page_info_with_directory_handling(
|
||||||
|
base_dir, 12345
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify the existing temp_dir was used (not renamed)
|
||||||
|
assert result_dir == temp_dir
|
||||||
|
assert result_dir.exists()
|
||||||
|
assert (result_dir / "existing_image.jpg").exists() # File should be preserved
|
||||||
|
assert ad_cfg.title == "Test Title"
|
||||||
|
|||||||
@@ -641,12 +641,11 @@ class TestKleinanzeigenBotCommands:
|
|||||||
async def test_verify_command(self, test_bot:KleinanzeigenBot, tmp_path:Any) -> None:
|
async def test_verify_command(self, test_bot:KleinanzeigenBot, tmp_path:Any) -> None:
|
||||||
"""Test verify command with minimal config."""
|
"""Test verify command with minimal config."""
|
||||||
config_path = Path(tmp_path) / "config.yaml"
|
config_path = Path(tmp_path) / "config.yaml"
|
||||||
with open(config_path, "w", encoding = "utf-8") as f:
|
config_path.write_text("""
|
||||||
f.write("""
|
|
||||||
login:
|
login:
|
||||||
username: test
|
username: test
|
||||||
password: test
|
password: test
|
||||||
""")
|
""", encoding = "utf-8")
|
||||||
test_bot.config_file_path = str(config_path)
|
test_bot.config_file_path = str(config_path)
|
||||||
await test_bot.run(["script.py", "verify"])
|
await test_bot.run(["script.py", "verify"])
|
||||||
assert test_bot.config.login.username == "test"
|
assert test_bot.config.login.username == "test"
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ from nodriver.core.element import Element
|
|||||||
from nodriver.core.tab import Tab as Page
|
from nodriver.core.tab import Tab as Page
|
||||||
|
|
||||||
from kleinanzeigen_bot.model.config_model import Config
|
from kleinanzeigen_bot.model.config_model import Config
|
||||||
from kleinanzeigen_bot.utils import loggers
|
from kleinanzeigen_bot.utils import files, loggers
|
||||||
from kleinanzeigen_bot.utils.web_scraping_mixin import By, Is, WebScrapingMixin, _is_admin # noqa: PLC2701
|
from kleinanzeigen_bot.utils.web_scraping_mixin import By, Is, WebScrapingMixin, _is_admin # noqa: PLC2701
|
||||||
|
|
||||||
|
|
||||||
@@ -95,6 +95,30 @@ def web_scraper(mock_browser:AsyncMock, mock_page:TrulyAwaitableMockPage) -> Web
|
|||||||
return scraper
|
return scraper
|
||||||
|
|
||||||
|
|
||||||
|
def test_write_initial_prefs(tmp_path:Path) -> None:
|
||||||
|
"""Test _write_initial_prefs helper function."""
|
||||||
|
from kleinanzeigen_bot.utils.web_scraping_mixin import _write_initial_prefs # noqa: PLC0415, PLC2701
|
||||||
|
|
||||||
|
prefs_file = tmp_path / "Preferences"
|
||||||
|
_write_initial_prefs(str(prefs_file))
|
||||||
|
|
||||||
|
# Verify file was created
|
||||||
|
assert prefs_file.exists()
|
||||||
|
|
||||||
|
# Verify content is valid JSON with expected structure
|
||||||
|
with open(prefs_file, encoding = "UTF-8") as f:
|
||||||
|
prefs = json.load(f)
|
||||||
|
|
||||||
|
assert prefs["credentials_enable_service"] is False
|
||||||
|
assert prefs["enable_do_not_track"] is True
|
||||||
|
assert prefs["google"]["services"]["consented_to_sync"] is False
|
||||||
|
assert prefs["profile"]["password_manager_enabled"] is False
|
||||||
|
assert prefs["profile"]["default_content_setting_values"]["notifications"] == 2
|
||||||
|
assert prefs["signin"]["allowed"] is False
|
||||||
|
assert "www.kleinanzeigen.de" in prefs["translate_site_blacklist"]
|
||||||
|
assert prefs["devtools"]["preferences"]["currentDockState"] == '"bottom"'
|
||||||
|
|
||||||
|
|
||||||
class TestWebScrapingErrorHandling:
|
class TestWebScrapingErrorHandling:
|
||||||
"""Test error handling scenarios in WebScrapingMixin."""
|
"""Test error handling scenarios in WebScrapingMixin."""
|
||||||
|
|
||||||
@@ -728,7 +752,7 @@ class TestWebScrapingBrowserConfiguration:
|
|||||||
chrome_path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
|
chrome_path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
|
||||||
real_exists = os.path.exists
|
real_exists = os.path.exists
|
||||||
|
|
||||||
def mock_exists(path:str) -> bool:
|
def mock_exists_sync(path:str) -> bool:
|
||||||
# Handle all browser paths
|
# Handle all browser paths
|
||||||
if path in {
|
if path in {
|
||||||
# Linux paths
|
# Linux paths
|
||||||
@@ -754,7 +778,12 @@ class TestWebScrapingBrowserConfiguration:
|
|||||||
if "Preferences" in str(path) and str(tmp_path) in str(path):
|
if "Preferences" in str(path) and str(tmp_path) in str(path):
|
||||||
return real_exists(path)
|
return real_exists(path)
|
||||||
return False
|
return False
|
||||||
monkeypatch.setattr(os.path, "exists", mock_exists)
|
|
||||||
|
async def mock_exists_async(path:str | Path) -> bool:
|
||||||
|
return mock_exists_sync(str(path))
|
||||||
|
|
||||||
|
monkeypatch.setattr(os.path, "exists", mock_exists_sync)
|
||||||
|
monkeypatch.setattr(files, "exists", mock_exists_async)
|
||||||
|
|
||||||
# Create test profile directory
|
# Create test profile directory
|
||||||
profile_dir = tmp_path / "Default"
|
profile_dir = tmp_path / "Default"
|
||||||
@@ -762,8 +791,7 @@ class TestWebScrapingBrowserConfiguration:
|
|||||||
prefs_file = profile_dir / "Preferences"
|
prefs_file = profile_dir / "Preferences"
|
||||||
|
|
||||||
# Test with existing preferences file
|
# Test with existing preferences file
|
||||||
with open(prefs_file, "w", encoding = "UTF-8") as f:
|
prefs_file.write_text(json.dumps({"existing": "prefs"}), encoding = "UTF-8")
|
||||||
json.dump({"existing": "prefs"}, f)
|
|
||||||
|
|
||||||
scraper = WebScrapingMixin()
|
scraper = WebScrapingMixin()
|
||||||
scraper.browser_config.user_data_dir = str(tmp_path)
|
scraper.browser_config.user_data_dir = str(tmp_path)
|
||||||
@@ -771,8 +799,7 @@ class TestWebScrapingBrowserConfiguration:
|
|||||||
await scraper.create_browser_session()
|
await scraper.create_browser_session()
|
||||||
|
|
||||||
# Verify preferences file was not overwritten
|
# Verify preferences file was not overwritten
|
||||||
with open(prefs_file, "r", encoding = "UTF-8") as f:
|
prefs = json.loads(prefs_file.read_text(encoding = "UTF-8"))
|
||||||
prefs = json.load(f)
|
|
||||||
assert prefs["existing"] == "prefs"
|
assert prefs["existing"] == "prefs"
|
||||||
|
|
||||||
# Test with missing preferences file
|
# Test with missing preferences file
|
||||||
@@ -780,8 +807,7 @@ class TestWebScrapingBrowserConfiguration:
|
|||||||
await scraper.create_browser_session()
|
await scraper.create_browser_session()
|
||||||
|
|
||||||
# Verify new preferences file was created with correct settings
|
# Verify new preferences file was created with correct settings
|
||||||
with open(prefs_file, "r", encoding = "UTF-8") as f:
|
prefs = json.loads(prefs_file.read_text(encoding = "UTF-8"))
|
||||||
prefs = json.load(f)
|
|
||||||
assert prefs["credentials_enable_service"] is False
|
assert prefs["credentials_enable_service"] is False
|
||||||
assert prefs["enable_do_not_track"] is True
|
assert prefs["enable_do_not_track"] is True
|
||||||
assert prefs["profile"]["password_manager_enabled"] is False
|
assert prefs["profile"]["password_manager_enabled"] is False
|
||||||
@@ -815,6 +841,10 @@ class TestWebScrapingBrowserConfiguration:
|
|||||||
# Mock os.path.exists to return True for both Chrome and Edge paths
|
# Mock os.path.exists to return True for both Chrome and Edge paths
|
||||||
monkeypatch.setattr(os.path, "exists", lambda p: p in {"/usr/bin/chrome", "/usr/bin/edge"})
|
monkeypatch.setattr(os.path, "exists", lambda p: p in {"/usr/bin/chrome", "/usr/bin/edge"})
|
||||||
|
|
||||||
|
async def mock_exists_async(path:str | Path) -> bool:
|
||||||
|
return str(path) in {"/usr/bin/chrome", "/usr/bin/edge"}
|
||||||
|
monkeypatch.setattr(files, "exists", mock_exists_async)
|
||||||
|
|
||||||
# Test with custom arguments
|
# Test with custom arguments
|
||||||
scraper = WebScrapingMixin()
|
scraper = WebScrapingMixin()
|
||||||
scraper.browser_config.arguments = ["--custom-arg=value", "--another-arg"]
|
scraper.browser_config.arguments = ["--custom-arg=value", "--another-arg"]
|
||||||
@@ -875,27 +905,41 @@ class TestWebScrapingBrowserConfiguration:
|
|||||||
# Mock Config class
|
# Mock Config class
|
||||||
monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue,attr-defined]
|
monkeypatch.setattr(nodriver.core.config, "Config", DummyConfig) # type: ignore[unused-ignore,reportAttributeAccessIssue,attr-defined]
|
||||||
|
|
||||||
# Mock os.path.exists to return True for browser binaries and extension files, real_exists for others
|
# Mock files.exists and files.is_dir to return appropriate values
|
||||||
real_exists = os.path.exists
|
async def mock_exists(path:str | Path) -> bool:
|
||||||
monkeypatch.setattr(
|
path_str = str(path)
|
||||||
os.path,
|
# Resolve real paths to handle symlinks (e.g., /var -> /private/var on macOS)
|
||||||
"exists",
|
real_path = str(Path(path_str).resolve()) # noqa: ASYNC240 Test mock, runs synchronously
|
||||||
lambda p: p in {"/usr/bin/chrome", "/usr/bin/edge", str(ext1), str(ext2)} or real_exists(p),
|
real_ext1 = str(Path(ext1).resolve()) # noqa: ASYNC240 Test mock, runs synchronously
|
||||||
)
|
real_ext2 = str(Path(ext2).resolve()) # noqa: ASYNC240 Test mock, runs synchronously
|
||||||
|
return path_str in {"/usr/bin/chrome", "/usr/bin/edge"} or real_path in {real_ext1, real_ext2} or os.path.exists(path_str) # noqa: ASYNC240
|
||||||
|
|
||||||
|
async def mock_is_dir(path:str | Path) -> bool:
|
||||||
|
path_str = str(path)
|
||||||
|
# Resolve real paths to handle symlinks
|
||||||
|
real_path = str(Path(path_str).resolve()) # noqa: ASYNC240 Test mock, runs synchronously
|
||||||
|
real_ext1 = str(Path(ext1).resolve()) # noqa: ASYNC240 Test mock, runs synchronously
|
||||||
|
real_ext2 = str(Path(ext2).resolve()) # noqa: ASYNC240 Test mock, runs synchronously
|
||||||
|
# Nodriver extracts CRX files to temp directories, so they appear as directories
|
||||||
|
if real_path in {real_ext1, real_ext2}:
|
||||||
|
return True
|
||||||
|
return Path(path_str).is_dir() # noqa: ASYNC240 Test mock, runs synchronously
|
||||||
|
|
||||||
|
monkeypatch.setattr(files, "exists", mock_exists)
|
||||||
|
monkeypatch.setattr(files, "is_dir", mock_is_dir)
|
||||||
|
|
||||||
# Test extension loading
|
# Test extension loading
|
||||||
scraper = WebScrapingMixin()
|
scraper = WebScrapingMixin()
|
||||||
scraper.browser_config.extensions = [str(ext1), str(ext2)]
|
scraper.browser_config.extensions = [str(ext1), str(ext2)]
|
||||||
scraper.browser_config.binary_location = "/usr/bin/chrome"
|
scraper.browser_config.binary_location = "/usr/bin/chrome"
|
||||||
# Removed monkeypatch for os.path.exists so extension files are detected
|
|
||||||
await scraper.create_browser_session()
|
await scraper.create_browser_session()
|
||||||
|
|
||||||
# Verify extensions were loaded
|
# Verify extensions were loaded
|
||||||
config = _nodriver_start_mock().call_args[0][0]
|
config = _nodriver_start_mock().call_args[0][0]
|
||||||
assert len(config._extensions) == 2
|
assert len(config._extensions) == 2
|
||||||
for ext_path in config._extensions:
|
for ext_path in config._extensions:
|
||||||
assert os.path.exists(ext_path)
|
assert await files.exists(ext_path)
|
||||||
assert os.path.isdir(ext_path)
|
assert await files.is_dir(ext_path)
|
||||||
|
|
||||||
# Test with non-existent extension
|
# Test with non-existent extension
|
||||||
scraper.browser_config.extensions = ["non_existent.crx"]
|
scraper.browser_config.extensions = ["non_existent.crx"]
|
||||||
@@ -976,8 +1020,7 @@ class TestWebScrapingBrowserConfiguration:
|
|||||||
scraper.browser_config.user_data_dir = str(tmp_path)
|
scraper.browser_config.user_data_dir = str(tmp_path)
|
||||||
scraper.browser_config.profile_name = "Default"
|
scraper.browser_config.profile_name = "Default"
|
||||||
await scraper.create_browser_session()
|
await scraper.create_browser_session()
|
||||||
with open(state_file, "w", encoding = "utf-8") as f:
|
state_file.write_text('{"foo": "bar"}', encoding = "utf-8")
|
||||||
f.write('{"foo": "bar"}')
|
|
||||||
scraper.browser._process_pid = 12345
|
scraper.browser._process_pid = 12345
|
||||||
scraper.browser.stop = MagicMock()
|
scraper.browser.stop = MagicMock()
|
||||||
with patch("psutil.Process") as mock_proc:
|
with patch("psutil.Process") as mock_proc:
|
||||||
@@ -989,8 +1032,7 @@ class TestWebScrapingBrowserConfiguration:
|
|||||||
scraper2.browser_config.user_data_dir = str(tmp_path)
|
scraper2.browser_config.user_data_dir = str(tmp_path)
|
||||||
scraper2.browser_config.profile_name = "Default"
|
scraper2.browser_config.profile_name = "Default"
|
||||||
await scraper2.create_browser_session()
|
await scraper2.create_browser_session()
|
||||||
with open(state_file, "r", encoding = "utf-8") as f:
|
data = state_file.read_text(encoding = "utf-8")
|
||||||
data = f.read()
|
|
||||||
assert data == '{"foo": "bar"}'
|
assert data == '{"foo": "bar"}'
|
||||||
scraper2.browser._process_pid = 12346
|
scraper2.browser._process_pid = 12346
|
||||||
scraper2.browser.stop = MagicMock()
|
scraper2.browser.stop = MagicMock()
|
||||||
@@ -1814,6 +1856,7 @@ class TestWebScrapingMixinPortRetry:
|
|||||||
) -> None:
|
) -> None:
|
||||||
"""Test error handling when browser connection fails."""
|
"""Test error handling when browser connection fails."""
|
||||||
with patch("os.path.exists", return_value = True), \
|
with patch("os.path.exists", return_value = True), \
|
||||||
|
patch("kleinanzeigen_bot.utils.web_scraping_mixin.files.exists", AsyncMock(return_value = True)), \
|
||||||
patch("kleinanzeigen_bot.utils.web_scraping_mixin.net.is_port_open", return_value = True), \
|
patch("kleinanzeigen_bot.utils.web_scraping_mixin.net.is_port_open", return_value = True), \
|
||||||
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Failed to connect as root user")), \
|
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Failed to connect as root user")), \
|
||||||
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class:
|
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class:
|
||||||
@@ -1833,6 +1876,7 @@ class TestWebScrapingMixinPortRetry:
|
|||||||
) -> None:
|
) -> None:
|
||||||
"""Test error handling when browser connection fails with non-root error."""
|
"""Test error handling when browser connection fails with non-root error."""
|
||||||
with patch("os.path.exists", return_value = True), \
|
with patch("os.path.exists", return_value = True), \
|
||||||
|
patch("kleinanzeigen_bot.utils.web_scraping_mixin.files.exists", AsyncMock(return_value = True)), \
|
||||||
patch("kleinanzeigen_bot.utils.web_scraping_mixin.net.is_port_open", return_value = True), \
|
patch("kleinanzeigen_bot.utils.web_scraping_mixin.net.is_port_open", return_value = True), \
|
||||||
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Connection timeout")), \
|
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Connection timeout")), \
|
||||||
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class:
|
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class:
|
||||||
@@ -1860,6 +1904,7 @@ class TestWebScrapingMixinPortRetry:
|
|||||||
) -> None:
|
) -> None:
|
||||||
"""Test error handling when browser startup fails with root error."""
|
"""Test error handling when browser startup fails with root error."""
|
||||||
with patch("os.path.exists", return_value = True), \
|
with patch("os.path.exists", return_value = True), \
|
||||||
|
patch("kleinanzeigen_bot.utils.web_scraping_mixin.files.exists", AsyncMock(return_value = True)), \
|
||||||
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Failed to start as root user")), \
|
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Failed to start as root user")), \
|
||||||
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class:
|
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class:
|
||||||
|
|
||||||
@@ -1878,6 +1923,7 @@ class TestWebScrapingMixinPortRetry:
|
|||||||
) -> None:
|
) -> None:
|
||||||
"""Test error handling when browser startup fails with non-root error."""
|
"""Test error handling when browser startup fails with non-root error."""
|
||||||
with patch("os.path.exists", return_value = True), \
|
with patch("os.path.exists", return_value = True), \
|
||||||
|
patch("kleinanzeigen_bot.utils.web_scraping_mixin.files.exists", AsyncMock(return_value = True)), \
|
||||||
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Browser binary not found")), \
|
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Browser binary not found")), \
|
||||||
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class:
|
patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class:
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user