fix: extend command fails with >25 ads due to pagination (#793)

This commit is contained in:
Jens
2026-01-28 06:08:03 +01:00
committed by GitHub
parent d954e849a2
commit 7098719d5b
7 changed files with 589 additions and 327 deletions

View File

@@ -999,15 +999,23 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
LOG.info("Extending ad '%s' (ID: %s)...", ad_cfg.title, ad_cfg.id)
try:
# Navigate to ad management page
await self.web_open(f"{self.root_url}/m-meine-anzeigen.html")
# Find and click "Verlängern" (extend) button for this ad
# Navigate to ad management page and find extend button across all pages
extend_button_xpath = f'//li[@data-adid="{ad_cfg.id}"]//button[contains(., "Verlängern")]'
try:
await self.web_click(By.XPATH, extend_button_xpath)
except TimeoutError:
async def find_and_click_extend_button(page_num:int) -> bool:
"""Try to find and click extend button on current page."""
try:
extend_button = await self.web_find(By.XPATH, extend_button_xpath, timeout = self._timeout("quick_dom"))
LOG.info("Found extend button on page %s", page_num)
await extend_button.click()
return True # Success - stop pagination
except TimeoutError:
LOG.debug("Extend button not found on page %s", page_num)
return False # Continue to next page
success = await self._navigate_paginated_ad_overview(find_and_click_extend_button, page_url = f"{self.root_url}/m-meine-anzeigen.html")
if not success:
LOG.error(" -> FAILED: Could not find extend button for ad ID %s", ad_cfg.id)
return False

View File

@@ -148,104 +148,34 @@ class AdExtractor(WebScrapingMixin):
:return: the links to your ad pages
"""
# navigate to "your ads" page
await self.web_open("https://www.kleinanzeigen.de/m-meine-anzeigen.html")
await self.web_sleep(2000, 3000) # Consider replacing with explicit waits later
# Try to find the main ad list container first
try:
_ = await self.web_find(By.ID, "my-manageitems-adlist")
except TimeoutError:
LOG.warning("Ad list container #my-manageitems-adlist not found. Maybe no ads present?")
return []
# --- Pagination handling ---
multi_page = False
pagination_timeout = self._timeout("pagination_initial")
try:
# Correct selector: Use uppercase '.Pagination'
pagination_section = await self.web_find(By.CSS_SELECTOR, ".Pagination", timeout = pagination_timeout) # Increased timeout slightly
# Correct selector: Use 'aria-label'
# Also check if the button is actually present AND potentially enabled (though enabled check isn't strictly necessary here, only for clicking later)
next_buttons = await self.web_find_all(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent = pagination_section)
if next_buttons:
# Check if at least one 'Nächste' button is not disabled (optional but good practice)
enabled_next_buttons = [btn for btn in next_buttons if not btn.attrs.get("disabled")]
if enabled_next_buttons:
multi_page = True
LOG.info("Multiple ad pages detected.")
else:
LOG.info("Next button found but is disabled. Assuming single effective page.")
else:
LOG.info('No "Naechste" button found within pagination. Assuming single page.')
except TimeoutError:
# This will now correctly trigger only if the '.Pagination' div itself is not found
LOG.info("No pagination controls found. Assuming single page.")
except Exception as e:
LOG.exception("Error during pagination detection: %s", e)
LOG.info("Assuming single page due to error during pagination check.")
# --- End Pagination Handling ---
refs:list[str] = []
current_page = 1
while True: # Loop reference extraction
LOG.info("Extracting ads from page %s...", current_page)
# scroll down to load dynamically if necessary
await self.web_scroll_page_down()
await self.web_sleep(2000, 3000) # Consider replacing with explicit waits
# Re-find the ad list container on the current page/state
async def extract_page_refs(page_num:int) -> bool:
"""Extract ad reference URLs from the current page.
:param page_num: The current page number being processed
:return: True to stop pagination (e.g. ads container disappeared), False to continue to next page
"""
try:
ad_list_container = await self.web_find(By.ID, "my-manageitems-adlist")
list_items = await self.web_find_all(By.CLASS_NAME, "cardbox", parent = ad_list_container)
LOG.info("Found %s ad items on page %s.", len(list_items), current_page)
except TimeoutError:
LOG.warning("Could not find ad list container or items on page %s.", current_page)
break # Stop if ads disappear
LOG.info("Found %s ad items on page %s.", len(list_items), page_num)
# Extract references using the CORRECTED selector
try:
page_refs:list[str] = [str((await self.web_find(By.CSS_SELECTOR, "div h3 a.text-onSurface", parent = li)).attrs["href"]) for li in list_items]
refs.extend(page_refs)
LOG.info("Successfully extracted %s refs from page %s.", len(page_refs), current_page)
except Exception as e:
# Log the error if extraction fails for some items, but try to continue
LOG.exception("Error extracting refs on page %s: %s", current_page, e)
LOG.info("Successfully extracted %s refs from page %s.", len(page_refs), page_num)
return False # Continue to next page
if not multi_page: # only one iteration for single-page overview
break
# --- Navigate to next page ---
follow_up_timeout = self._timeout("pagination_follow_up")
try:
# Find the pagination section again (scope might have changed after scroll/wait)
pagination_section = await self.web_find(By.CSS_SELECTOR, ".Pagination", timeout = follow_up_timeout)
# Find the "Next" button using the correct aria-label selector and ensure it's not disabled
next_button_element = None
possible_next_buttons = await self.web_find_all(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent = pagination_section)
for btn in possible_next_buttons:
if not btn.attrs.get("disabled"): # Check if the button is enabled
next_button_element = btn
break # Found an enabled next button
if next_button_element:
LOG.info("Navigating to next page...")
await next_button_element.click()
current_page += 1
# Wait for page load - consider waiting for a specific element on the new page instead of fixed sleep
await self.web_sleep(3000, 4000)
else:
LOG.info('Last ad overview page explored (no enabled "Naechste" button found).')
break
except TimeoutError:
# This might happen if pagination disappears on the last page after loading
LOG.info("No pagination controls found after scrolling/waiting. Assuming last page.")
break
LOG.warning("Could not find ad list container or items on page %s.", page_num)
return True # Stop pagination (ads disappeared)
except Exception as e:
LOG.exception("Error during pagination navigation: %s", e)
break
# --- End Navigation ---
# Continue despite error for resilience against transient web scraping issues
# (e.g., DOM structure changes, network glitches). LOG.exception ensures visibility.
LOG.exception("Error extracting refs on page %s: %s", page_num, e)
return False # Continue to next page
await self._navigate_paginated_ad_overview(extract_page_refs)
if not refs:
LOG.warning("No ad URLs were extracted.")

View File

@@ -112,6 +112,9 @@ kleinanzeigen_bot/__init__.py:
" -> FAILED: Timeout while extending ad '%s': %s": " -> FEHLER: Zeitüberschreitung beim Verlängern der Anzeige '%s': %s"
" -> FAILED: Could not persist extension for ad '%s': %s": " -> FEHLER: Verlängerung der Anzeige '%s' konnte nicht gespeichert werden: %s"
find_and_click_extend_button:
"Found extend button on page %s": "'Verlängern'-Button auf Seite %s gefunden"
finalize_installation_mode:
"Config file: %s": "Konfigurationsdatei: %s"
"First run detected, prompting user for installation mode": "Erster Start erkannt, frage Benutzer nach Installationsmodus"
@@ -259,21 +262,11 @@ kleinanzeigen_bot/extract.py:
"Failed to extract ad ID from URL '%s': %s": "Fehler beim Extrahieren der Anzeigen-ID aus der URL '%s': %s"
extract_own_ads_urls:
"Ad list container #my-manageitems-adlist not found. Maybe no ads present?": "Anzeigenlistencontainer #my-manageitems-adlist nicht gefunden. Vielleicht sind keine Anzeigen vorhanden?"
"Multiple ad pages detected.": "Mehrere Anzeigenseiten erkannt."
"Next button found but is disabled. Assuming single effective page.": "Weiter-Button gefunden, aber deaktiviert. Es wird von einer einzelnen effektiven Seite ausgegangen."
"No \"Naechste\" button found within pagination. Assuming single page.": "Kein \"Nächste\"-Button in der Paginierung gefunden. Es wird von einer einzelnen Seite ausgegangen."
"No pagination controls found. Assuming single page.": "Keine Paginierungssteuerung gefunden. Es wird von einer einzelnen Seite ausgegangen."
"Assuming single page due to error during pagination check.": "Es wird von einer einzelnen Seite ausgegangen wegen eines Fehlers bei der Paginierungsprüfung."
"Navigating to next page...": "Navigiere zur nächsten Seite..."
"Last ad overview page explored (no enabled \"Naechste\" button found).": "Letzte Anzeigenübersichtsseite erkundet (kein aktivierter \"Nächste\"-Button gefunden)."
"No pagination controls found after scrolling/waiting. Assuming last page.": "Keine Paginierungssteuerung nach dem Scrollen/Warten gefunden. Es wird von der letzten Seite ausgegangen."
"No ad URLs were extracted.": "Es wurden keine Anzeigen-URLs extrahiert."
extract_page_refs:
"Could not find ad list container or items on page %s.": "Anzeigenlistencontainer oder Elemente auf Seite %s nicht gefunden."
"Error during pagination detection: %s": "Fehler bei der Paginierungserkennung: %s"
"Error during pagination navigation: %s": "Fehler bei der Paginierungsnavigation: %s"
"Error extracting refs on page %s: %s": "Fehler beim Extrahieren der Referenzen auf Seite %s: %s"
"Extracting ads from page %s...": "Extrahiere Anzeigen von Seite %s..."
"Found %s ad items on page %s.": "%s Anzeigen-Elemente auf Seite %s gefunden."
"Successfully extracted %s refs from page %s.": "%s Referenzen von Seite %s erfolgreich extrahiert."
@@ -488,6 +481,18 @@ kleinanzeigen_bot/utils/web_scraping_mixin.py:
"Combobox missing aria-controls attribute": "Combobox fehlt aria-controls Attribut"
"No matching option found in combobox: '%s'": "Keine passende Option in Combobox gefunden: '%s'"
_navigate_paginated_ad_overview:
"Failed to open ad overview page at %s: timeout": "Fehler beim Öffnen der Anzeigenübersichtsseite unter %s: Zeitüberschreitung"
"Scroll timeout on page %s (non-critical, continuing)": "Zeitüberschreitung beim Scrollen auf Seite %s (nicht kritisch, wird fortgesetzt)"
"Page action timed out on page %s": "Seitenaktion hat auf Seite %s eine Zeitüberschreitung erreicht"
"Ad list container not found. Maybe no ads present?": "Anzeigenlistencontainer nicht gefunden. Vielleicht sind keine Anzeigen vorhanden?"
"Multiple ad pages detected.": "Mehrere Anzeigenseiten erkannt."
"No pagination controls found. Assuming single page.": "Keine Paginierungssteuerung gefunden. Es wird von einer einzelnen Seite ausgegangen."
"Processing page %s...": "Verarbeite Seite %s..."
"Navigating to page %s...": "Navigiere zu Seite %s..."
"Last page reached (no enabled 'Naechste' button found).": "Letzte Seite erreicht (kein aktivierter 'Naechste'-Button gefunden)."
"No pagination controls found. Assuming last page.": "Keine Paginierungssteuerung gefunden. Es wird von der letzten Seite ausgegangen."
close_browser_session:
"Closing Browser session...": "Schließe Browser-Sitzung..."

View File

@@ -969,6 +969,111 @@ class WebScrapingMixin:
)
await self.page.sleep(duration / 1_000)
async def _navigate_paginated_ad_overview(
self,
page_action:Callable[[int], Awaitable[bool]],
page_url:str = "https://www.kleinanzeigen.de/m-meine-anzeigen.html",
*,
max_pages:int = 10,
) -> bool:
"""
Navigate through paginated ad overview page, calling page_action on each page.
This helper guarantees to return a boolean result and never propagates TimeoutError.
All timeout conditions are handled internally and logged appropriately.
Args:
page_action: Async callable that receives current_page number and returns True if action succeeded/should stop
page_url: URL of the paginated overview page (default: kleinanzeigen ad management page)
max_pages: Maximum number of pages to navigate (safety limit)
Returns:
True if page_action returned True on any page, False otherwise
Example:
async def find_ad_callback(page_num: int) -> bool:
element = await self.web_find(By.XPATH, "//div[@id='my-ad']")
if element:
await element.click()
return True
return False
success = await self._navigate_paginated_ad_overview(find_ad_callback)
"""
try:
await self.web_open(page_url)
except TimeoutError:
LOG.warning("Failed to open ad overview page at %s: timeout", page_url)
return False
await self.web_sleep(2000, 3000)
# Check if ad list container exists
try:
_ = await self.web_find(By.ID, "my-manageitems-adlist")
except TimeoutError:
LOG.warning("Ad list container not found. Maybe no ads present?")
return False
# Check for pagination controls
multi_page = False
pagination_timeout = self._timeout("pagination_initial")
try:
pagination_section = await self.web_find(By.CSS_SELECTOR, ".Pagination", timeout = pagination_timeout)
next_buttons = await self.web_find_all(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent = pagination_section)
if next_buttons:
enabled_next_buttons = [btn for btn in next_buttons if not btn.attrs.get("disabled")]
if enabled_next_buttons:
multi_page = True
LOG.info("Multiple ad pages detected.")
except TimeoutError:
LOG.info("No pagination controls found. Assuming single page.")
current_page = 1
while current_page <= max_pages:
LOG.info("Processing page %s...", current_page)
try:
await self.web_scroll_page_down()
except TimeoutError:
LOG.debug("Scroll timeout on page %s (non-critical, continuing)", current_page)
await self.web_sleep(2000, 3000)
try:
if await page_action(current_page):
return True
except TimeoutError:
LOG.warning("Page action timed out on page %s", current_page)
return False
if not multi_page:
break
follow_up_timeout = self._timeout("pagination_follow_up")
try:
pagination_section = await self.web_find(By.CSS_SELECTOR, ".Pagination", timeout = follow_up_timeout)
next_button_element = None
possible_next_buttons = await self.web_find_all(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent = pagination_section)
for btn in possible_next_buttons:
if not btn.attrs.get("disabled"):
next_button_element = btn
break
if next_button_element:
LOG.info("Navigating to page %s...", current_page + 1)
await next_button_element.click()
await self.web_sleep(3000, 4000)
current_page += 1
else:
LOG.info("Last page reached (no enabled 'Naechste' button found).")
break
except TimeoutError:
LOG.info("No pagination controls found. Assuming last page.")
break
return False
async def web_request(self, url:str, method:str = "GET", valid_response_codes:int | Iterable[int] = 200, headers:dict[str, str] | None = None) -> Any:
method = method.upper()
LOG.debug(" -> HTTP %s [%s]...", method, url)

View File

@@ -5,13 +5,14 @@ import json # isort: skip
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any
from unittest.mock import AsyncMock, patch
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from kleinanzeigen_bot import KleinanzeigenBot, misc
from kleinanzeigen_bot.model.ad_model import Ad
from kleinanzeigen_bot.utils import dicts
from kleinanzeigen_bot.utils.web_scraping_mixin import By, Element
@pytest.fixture
@@ -34,13 +35,7 @@ def base_ad_config_with_id() -> dict[str, Any]:
"republication_interval": 7,
"created_on": "2024-12-07T10:00:00",
"updated_on": "2024-12-10T15:20:00",
"contact": {
"name": "Test User",
"zipcode": "12345",
"location": "Test City",
"street": "",
"phone": ""
}
"contact": {"name": "Test User", "zipcode": "12345", "location": "Test City", "street": "", "phone": ""},
}
@@ -50,9 +45,7 @@ class TestExtendCommand:
@pytest.mark.asyncio
async def test_run_extend_command_no_ads(self, test_bot:KleinanzeigenBot) -> None:
"""Test running extend command with no ads."""
with patch.object(test_bot, "load_config"), \
patch.object(test_bot, "load_ads", return_value = []), \
patch("kleinanzeigen_bot.UpdateChecker"):
with patch.object(test_bot, "load_config"), patch.object(test_bot, "load_ads", return_value = []), patch("kleinanzeigen_bot.UpdateChecker"):
await test_bot.run(["script.py", "extend"])
assert test_bot.command == "extend"
assert test_bot.ads_selector == "all"
@@ -60,11 +53,13 @@ class TestExtendCommand:
@pytest.mark.asyncio
async def test_run_extend_command_with_specific_ids(self, test_bot:KleinanzeigenBot) -> None:
"""Test running extend command with specific ad IDs."""
with patch.object(test_bot, "load_config"), \
patch.object(test_bot, "load_ads", return_value = []), \
patch.object(test_bot, "create_browser_session", new_callable = AsyncMock), \
patch.object(test_bot, "login", new_callable = AsyncMock), \
patch("kleinanzeigen_bot.UpdateChecker"):
with (
patch.object(test_bot, "load_config"),
patch.object(test_bot, "load_ads", return_value = []),
patch.object(test_bot, "create_browser_session", new_callable = AsyncMock),
patch.object(test_bot, "login", new_callable = AsyncMock),
patch("kleinanzeigen_bot.UpdateChecker"),
):
await test_bot.run(["script.py", "extend", "--ads=12345,67890"])
assert test_bot.command == "extend"
assert test_bot.ads_selector == "12345,67890"
@@ -74,19 +69,14 @@ class TestExtendAdsMethod:
"""Tests for the extend_ads() method."""
@pytest.mark.asyncio
async def test_extend_ads_skips_unpublished_ad(
self,
test_bot:KleinanzeigenBot,
base_ad_config_with_id:dict[str, Any]
) -> None:
async def test_extend_ads_skips_unpublished_ad(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any]) -> None:
"""Test that extend_ads skips ads without an ID (unpublished)."""
# Create ad without ID
ad_config = base_ad_config_with_id.copy()
ad_config["id"] = None
ad_cfg = Ad.model_validate(ad_config)
with patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, \
patch.object(test_bot, "web_sleep", new_callable = AsyncMock):
with patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, patch.object(test_bot, "web_sleep", new_callable = AsyncMock):
mock_request.return_value = {"content": '{"ads": []}'}
await test_bot.extend_ads([("test.yaml", ad_cfg, ad_config)])
@@ -95,16 +85,11 @@ class TestExtendAdsMethod:
mock_request.assert_called_once() # Only the API call to get published ads
@pytest.mark.asyncio
async def test_extend_ads_skips_ad_not_in_published_list(
self,
test_bot:KleinanzeigenBot,
base_ad_config_with_id:dict[str, Any]
) -> None:
async def test_extend_ads_skips_ad_not_in_published_list(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any]) -> None:
"""Test that extend_ads skips ads not found in the published ads API response."""
ad_cfg = Ad.model_validate(base_ad_config_with_id)
with patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, \
patch.object(test_bot, "web_sleep", new_callable = AsyncMock):
with patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, patch.object(test_bot, "web_sleep", new_callable = AsyncMock):
# Return empty published ads list
mock_request.return_value = {"content": '{"ads": []}'}
@@ -114,11 +99,7 @@ class TestExtendAdsMethod:
mock_request.assert_called_once()
@pytest.mark.asyncio
async def test_extend_ads_skips_inactive_ad(
self,
test_bot:KleinanzeigenBot,
base_ad_config_with_id:dict[str, Any]
) -> None:
async def test_extend_ads_skips_inactive_ad(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any]) -> None:
"""Test that extend_ads skips ads with state != 'active'."""
ad_cfg = Ad.model_validate(base_ad_config_with_id)
@@ -128,14 +109,16 @@ class TestExtendAdsMethod:
"id": 12345,
"title": "Test Ad Title",
"state": "paused", # Not active
"endDate": "05.02.2026"
"endDate": "05.02.2026",
}
]
}
with patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, \
patch.object(test_bot, "web_sleep", new_callable = AsyncMock), \
patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad:
with (
patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request,
patch.object(test_bot, "web_sleep", new_callable = AsyncMock),
patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad,
):
mock_request.return_value = {"content": json.dumps(published_ads_json)}
await test_bot.extend_ads([("test.yaml", ad_cfg, base_ad_config_with_id)])
@@ -144,11 +127,7 @@ class TestExtendAdsMethod:
mock_extend_ad.assert_not_called()
@pytest.mark.asyncio
async def test_extend_ads_skips_ad_without_enddate(
self,
test_bot:KleinanzeigenBot,
base_ad_config_with_id:dict[str, Any]
) -> None:
async def test_extend_ads_skips_ad_without_enddate(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any]) -> None:
"""Test that extend_ads skips ads without endDate in API response."""
ad_cfg = Ad.model_validate(base_ad_config_with_id)
@@ -157,15 +136,17 @@ class TestExtendAdsMethod:
{
"id": 12345,
"title": "Test Ad Title",
"state": "active"
"state": "active",
# No endDate field
}
]
}
with patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, \
patch.object(test_bot, "web_sleep", new_callable = AsyncMock), \
patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad:
with (
patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request,
patch.object(test_bot, "web_sleep", new_callable = AsyncMock),
patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad,
):
mock_request.return_value = {"content": json.dumps(published_ads_json)}
await test_bot.extend_ads([("test.yaml", ad_cfg, base_ad_config_with_id)])
@@ -174,11 +155,7 @@ class TestExtendAdsMethod:
mock_extend_ad.assert_not_called()
@pytest.mark.asyncio
async def test_extend_ads_skips_ad_outside_window(
self,
test_bot:KleinanzeigenBot,
base_ad_config_with_id:dict[str, Any]
) -> None:
async def test_extend_ads_skips_ad_outside_window(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any]) -> None:
"""Test that extend_ads skips ads expiring more than 8 days in the future."""
ad_cfg = Ad.model_validate(base_ad_config_with_id)
@@ -186,20 +163,13 @@ class TestExtendAdsMethod:
future_date = misc.now() + timedelta(days = 30)
end_date_str = future_date.strftime("%d.%m.%Y")
published_ads_json = {
"ads": [
{
"id": 12345,
"title": "Test Ad Title",
"state": "active",
"endDate": end_date_str
}
]
}
published_ads_json = {"ads": [{"id": 12345, "title": "Test Ad Title", "state": "active", "endDate": end_date_str}]}
with patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, \
patch.object(test_bot, "web_sleep", new_callable = AsyncMock), \
patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad:
with (
patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request,
patch.object(test_bot, "web_sleep", new_callable = AsyncMock),
patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad,
):
mock_request.return_value = {"content": json.dumps(published_ads_json)}
await test_bot.extend_ads([("test.yaml", ad_cfg, base_ad_config_with_id)])
@@ -208,11 +178,7 @@ class TestExtendAdsMethod:
mock_extend_ad.assert_not_called()
@pytest.mark.asyncio
async def test_extend_ads_extends_ad_within_window(
self,
test_bot:KleinanzeigenBot,
base_ad_config_with_id:dict[str, Any]
) -> None:
async def test_extend_ads_extends_ad_within_window(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any]) -> None:
"""Test that extend_ads extends ads within the 8-day window."""
ad_cfg = Ad.model_validate(base_ad_config_with_id)
@@ -220,20 +186,13 @@ class TestExtendAdsMethod:
future_date = misc.now() + timedelta(days = 5)
end_date_str = future_date.strftime("%d.%m.%Y")
published_ads_json = {
"ads": [
{
"id": 12345,
"title": "Test Ad Title",
"state": "active",
"endDate": end_date_str
}
]
}
published_ads_json = {"ads": [{"id": 12345, "title": "Test Ad Title", "state": "active", "endDate": end_date_str}]}
with patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, \
patch.object(test_bot, "web_sleep", new_callable = AsyncMock), \
patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad:
with (
patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request,
patch.object(test_bot, "web_sleep", new_callable = AsyncMock),
patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad,
):
mock_request.return_value = {"content": json.dumps(published_ads_json)}
mock_extend_ad.return_value = True
@@ -243,11 +202,7 @@ class TestExtendAdsMethod:
mock_extend_ad.assert_called_once()
@pytest.mark.asyncio
async def test_extend_ads_no_eligible_ads(
self,
test_bot:KleinanzeigenBot,
base_ad_config_with_id:dict[str, Any]
) -> None:
async def test_extend_ads_no_eligible_ads(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any]) -> None:
"""Test extend_ads when no ads are eligible for extension."""
ad_cfg = Ad.model_validate(base_ad_config_with_id)
@@ -255,20 +210,13 @@ class TestExtendAdsMethod:
future_date = misc.now() + timedelta(days = 30)
end_date_str = future_date.strftime("%d.%m.%Y")
published_ads_json = {
"ads": [
{
"id": 12345,
"title": "Test Ad Title",
"state": "active",
"endDate": end_date_str
}
]
}
published_ads_json = {"ads": [{"id": 12345, "title": "Test Ad Title", "state": "active", "endDate": end_date_str}]}
with patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, \
patch.object(test_bot, "web_sleep", new_callable = AsyncMock), \
patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad:
with (
patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request,
patch.object(test_bot, "web_sleep", new_callable = AsyncMock),
patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad,
):
mock_request.return_value = {"content": json.dumps(published_ads_json)}
await test_bot.extend_ads([("test.yaml", ad_cfg, base_ad_config_with_id)])
@@ -277,11 +225,7 @@ class TestExtendAdsMethod:
mock_extend_ad.assert_not_called()
@pytest.mark.asyncio
async def test_extend_ads_handles_multiple_ads(
self,
test_bot:KleinanzeigenBot,
base_ad_config_with_id:dict[str, Any]
) -> None:
async def test_extend_ads_handles_multiple_ads(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any]) -> None:
"""Test that extend_ads processes multiple ads correctly."""
ad_cfg1 = Ad.model_validate(base_ad_config_with_id)
@@ -297,46 +241,36 @@ class TestExtendAdsMethod:
published_ads_json = {
"ads": [
{
"id": 12345,
"title": "Test Ad Title",
"state": "active",
"endDate": within_window.strftime("%d.%m.%Y")
},
{
"id": 67890,
"title": "Second Test Ad",
"state": "active",
"endDate": outside_window.strftime("%d.%m.%Y")
}
{"id": 12345, "title": "Test Ad Title", "state": "active", "endDate": within_window.strftime("%d.%m.%Y")},
{"id": 67890, "title": "Second Test Ad", "state": "active", "endDate": outside_window.strftime("%d.%m.%Y")},
]
}
with patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, \
patch.object(test_bot, "web_sleep", new_callable = AsyncMock), \
patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad:
with (
patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request,
patch.object(test_bot, "web_sleep", new_callable = AsyncMock),
patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad,
):
mock_request.return_value = {"content": json.dumps(published_ads_json)}
mock_extend_ad.return_value = True
await test_bot.extend_ads([
("test1.yaml", ad_cfg1, base_ad_config_with_id),
("test2.yaml", ad_cfg2, ad_config2)
])
await test_bot.extend_ads([("test1.yaml", ad_cfg1, base_ad_config_with_id), ("test2.yaml", ad_cfg2, ad_config2)])
# Verify extend_ad was called only once (for the ad within window)
assert mock_extend_ad.call_count == 1
class TestExtendAdMethod:
"""Tests for the extend_ad() method."""
"""Tests for the extend_ad() method.
Note: These tests mock `_navigate_paginated_ad_overview` rather than individual browser methods
(web_find, web_click, etc.) because the pagination helper involves complex multi-step browser
interactions that would require extensive, brittle mock choreography. Mocking at this level
keeps tests focused on extend_ad's own logic (dialog handling, YAML persistence, error paths).
"""
@pytest.mark.asyncio
async def test_extend_ad_success(
self,
test_bot:KleinanzeigenBot,
base_ad_config_with_id:dict[str, Any],
tmp_path:Path
) -> None:
async def test_extend_ad_success(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any], tmp_path:Path) -> None:
"""Test successful ad extension."""
ad_cfg = Ad.model_validate(base_ad_config_with_id)
@@ -344,27 +278,27 @@ class TestExtendAdMethod:
ad_file = tmp_path / "test_ad.yaml"
dicts.save_dict(str(ad_file), base_ad_config_with_id)
with patch.object(test_bot, "web_open", new_callable = AsyncMock), \
patch.object(test_bot, "web_click", new_callable = AsyncMock), \
patch("kleinanzeigen_bot.misc.now") as mock_now:
with (
patch.object(test_bot, "_navigate_paginated_ad_overview", new_callable = AsyncMock) as mock_paginate,
patch.object(test_bot, "web_click", new_callable = AsyncMock),
patch("kleinanzeigen_bot.misc.now") as mock_now,
):
# Test mock datetime - timezone not relevant for timestamp formatting test
mock_now.return_value = datetime(2025, 1, 28, 14, 30, 0) # noqa: DTZ001
mock_paginate.return_value = True
result = await test_bot.extend_ad(str(ad_file), ad_cfg, base_ad_config_with_id)
assert result is True
assert mock_paginate.call_count == 1
# Verify updated_on was updated in the YAML file
updated_config = dicts.load_dict(str(ad_file))
assert updated_config["updated_on"] == "2025-01-28T14:30:00"
@pytest.mark.asyncio
async def test_extend_ad_button_not_found(
self,
test_bot:KleinanzeigenBot,
base_ad_config_with_id:dict[str, Any],
tmp_path:Path
) -> None:
async def test_extend_ad_button_not_found(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any], tmp_path:Path) -> None:
"""Test extend_ad when the Verlängern button is not found."""
ad_cfg = Ad.model_validate(base_ad_config_with_id)
@@ -372,22 +306,17 @@ class TestExtendAdMethod:
ad_file = tmp_path / "test_ad.yaml"
dicts.save_dict(str(ad_file), base_ad_config_with_id)
with patch.object(test_bot, "web_open", new_callable = AsyncMock), \
patch.object(test_bot, "web_click", new_callable = AsyncMock) as mock_click:
# Simulate button not found by raising TimeoutError
mock_click.side_effect = TimeoutError("Button not found")
with patch.object(test_bot, "_navigate_paginated_ad_overview", new_callable = AsyncMock) as mock_paginate:
# Simulate button not found by having pagination return False (not found on any page)
mock_paginate.return_value = False
result = await test_bot.extend_ad(str(ad_file), ad_cfg, base_ad_config_with_id)
assert result is False
assert mock_paginate.call_count == 1
@pytest.mark.asyncio
async def test_extend_ad_dialog_timeout(
self,
test_bot:KleinanzeigenBot,
base_ad_config_with_id:dict[str, Any],
tmp_path:Path
) -> None:
async def test_extend_ad_dialog_timeout(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any], tmp_path:Path) -> None:
"""Test extend_ad when the confirmation dialog times out (no dialog appears)."""
ad_cfg = Ad.model_validate(base_ad_config_with_id)
@@ -395,14 +324,18 @@ class TestExtendAdMethod:
ad_file = tmp_path / "test_ad.yaml"
dicts.save_dict(str(ad_file), base_ad_config_with_id)
with patch.object(test_bot, "web_open", new_callable = AsyncMock), \
patch.object(test_bot, "web_click", new_callable = AsyncMock) as mock_click, \
patch("kleinanzeigen_bot.misc.now") as mock_now:
with (
patch.object(test_bot, "_navigate_paginated_ad_overview", new_callable = AsyncMock) as mock_paginate,
patch.object(test_bot, "web_click", new_callable = AsyncMock) as mock_click,
patch("kleinanzeigen_bot.misc.now") as mock_now,
):
# Test mock datetime - timezone not relevant for timestamp formatting test
mock_now.return_value = datetime(2025, 1, 28, 14, 30, 0) # noqa: DTZ001
# First click (Verlängern button) succeeds, second click (dialog close) times out
mock_click.side_effect = [None, TimeoutError("Dialog not found")]
# Pagination succeeds (button found and clicked)
mock_paginate.return_value = True
# Dialog close button times out
mock_click.side_effect = TimeoutError("Dialog not found")
result = await test_bot.extend_ad(str(ad_file), ad_cfg, base_ad_config_with_id)
@@ -410,12 +343,7 @@ class TestExtendAdMethod:
assert result is True
@pytest.mark.asyncio
async def test_extend_ad_exception_handling(
self,
test_bot:KleinanzeigenBot,
base_ad_config_with_id:dict[str, Any],
tmp_path:Path
) -> None:
async def test_extend_ad_exception_handling(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any], tmp_path:Path) -> None:
"""Test extend_ad propagates unexpected exceptions."""
ad_cfg = Ad.model_validate(base_ad_config_with_id)
@@ -423,20 +351,15 @@ class TestExtendAdMethod:
ad_file = tmp_path / "test_ad.yaml"
dicts.save_dict(str(ad_file), base_ad_config_with_id)
with patch.object(test_bot, "web_open", new_callable = AsyncMock) as mock_open:
# Simulate unexpected exception
mock_open.side_effect = Exception("Unexpected error")
with patch.object(test_bot, "_navigate_paginated_ad_overview", new_callable = AsyncMock) as mock_paginate:
# Simulate unexpected exception during pagination
mock_paginate.side_effect = Exception("Unexpected error")
with pytest.raises(Exception, match = "Unexpected error"):
await test_bot.extend_ad(str(ad_file), ad_cfg, base_ad_config_with_id)
@pytest.mark.asyncio
async def test_extend_ad_updates_yaml_file(
self,
test_bot:KleinanzeigenBot,
base_ad_config_with_id:dict[str, Any],
tmp_path:Path
) -> None:
async def test_extend_ad_updates_yaml_file(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any], tmp_path:Path) -> None:
"""Test that extend_ad correctly updates the YAML file with new timestamp."""
ad_cfg = Ad.model_validate(base_ad_config_with_id)
@@ -445,12 +368,17 @@ class TestExtendAdMethod:
original_updated_on = base_ad_config_with_id["updated_on"]
dicts.save_dict(str(ad_file), base_ad_config_with_id)
with patch.object(test_bot, "web_open", new_callable = AsyncMock), \
patch.object(test_bot, "web_click", new_callable = AsyncMock), \
patch("kleinanzeigen_bot.misc.now") as mock_now:
with (
patch.object(test_bot, "_navigate_paginated_ad_overview", new_callable = AsyncMock) as mock_paginate,
patch.object(test_bot, "web_click", new_callable = AsyncMock),
patch("kleinanzeigen_bot.misc.now") as mock_now,
):
# Test mock datetime - timezone not relevant for timestamp formatting test
mock_now.return_value = datetime(2025, 1, 28, 14, 30, 0) # noqa: DTZ001
# Pagination succeeds (button found and clicked)
mock_paginate.return_value = True
await test_bot.extend_ad(str(ad_file), ad_cfg, base_ad_config_with_id)
# Load the updated file and verify the timestamp changed
@@ -458,16 +386,67 @@ class TestExtendAdMethod:
assert updated_config["updated_on"] != original_updated_on
assert updated_config["updated_on"] == "2025-01-28T14:30:00"
@pytest.mark.asyncio
async def test_extend_ad_with_web_mocks(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any], tmp_path:Path) -> None:
"""Test extend_ad with web-level mocks to exercise the find_and_click_extend_button callback."""
ad_cfg = Ad.model_validate(base_ad_config_with_id)
# Create temporary YAML file
ad_file = tmp_path / "test_ad.yaml"
dicts.save_dict(str(ad_file), base_ad_config_with_id)
extend_button_mock = AsyncMock()
extend_button_mock.click = AsyncMock()
pagination_section = MagicMock()
find_call_count = {"count": 0}
async def mock_web_find(selector_type:By, selector_value:str, **kwargs:Any) -> Element:
find_call_count["count"] += 1
# Ad list container (called by pagination helper)
if selector_type == By.ID and selector_value == "my-manageitems-adlist":
return MagicMock()
# Pagination section (called by pagination helper)
if selector_type == By.CSS_SELECTOR and selector_value == ".Pagination":
# Raise TimeoutError on first call (pagination detection) to indicate single page
if find_call_count["count"] == 2:
raise TimeoutError("No pagination")
return pagination_section
# Extend button (called by find_and_click_extend_button callback)
if selector_type == By.XPATH and "Verlängern" in selector_value:
return extend_button_mock
raise TimeoutError(f"Unexpected find: {selector_type} {selector_value}")
with (
patch.object(test_bot, "web_open", new_callable = AsyncMock),
patch.object(test_bot, "web_sleep", new_callable = AsyncMock),
patch.object(test_bot, "web_find", new_callable = AsyncMock, side_effect = mock_web_find),
patch.object(test_bot, "web_find_all", new_callable = AsyncMock, return_value = []),
patch.object(test_bot, "web_scroll_page_down", new_callable = AsyncMock),
patch.object(test_bot, "web_click", new_callable = AsyncMock),
patch.object(test_bot, "_timeout", return_value = 10),
patch("kleinanzeigen_bot.misc.now") as mock_now,
):
# Test mock datetime - timezone not relevant for timestamp formatting test
mock_now.return_value = datetime(2025, 1, 28, 15, 0, 0) # noqa: DTZ001
result = await test_bot.extend_ad(str(ad_file), ad_cfg, base_ad_config_with_id)
assert result is True
# Verify the extend button was found and clicked
extend_button_mock.click.assert_awaited_once()
# Verify updated_on was updated
updated_config = dicts.load_dict(str(ad_file))
assert updated_config["updated_on"] == "2025-01-28T15:00:00"
class TestExtendEdgeCases:
"""Tests for edge cases and boundary conditions."""
@pytest.mark.asyncio
async def test_extend_ads_exactly_8_days(
self,
test_bot:KleinanzeigenBot,
base_ad_config_with_id:dict[str, Any]
) -> None:
async def test_extend_ads_exactly_8_days(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any]) -> None:
"""Test that ads expiring exactly in 8 days are eligible for extension."""
ad_cfg = Ad.model_validate(base_ad_config_with_id)
@@ -475,20 +454,13 @@ class TestExtendEdgeCases:
future_date = misc.now() + timedelta(days = 8)
end_date_str = future_date.strftime("%d.%m.%Y")
published_ads_json = {
"ads": [
{
"id": 12345,
"title": "Test Ad Title",
"state": "active",
"endDate": end_date_str
}
]
}
published_ads_json = {"ads": [{"id": 12345, "title": "Test Ad Title", "state": "active", "endDate": end_date_str}]}
with patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, \
patch.object(test_bot, "web_sleep", new_callable = AsyncMock), \
patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad:
with (
patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request,
patch.object(test_bot, "web_sleep", new_callable = AsyncMock),
patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad,
):
mock_request.return_value = {"content": json.dumps(published_ads_json)}
mock_extend_ad.return_value = True
@@ -498,11 +470,7 @@ class TestExtendEdgeCases:
mock_extend_ad.assert_called_once()
@pytest.mark.asyncio
async def test_extend_ads_exactly_9_days(
self,
test_bot:KleinanzeigenBot,
base_ad_config_with_id:dict[str, Any]
) -> None:
async def test_extend_ads_exactly_9_days(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any]) -> None:
"""Test that ads expiring in exactly 9 days are not eligible for extension."""
ad_cfg = Ad.model_validate(base_ad_config_with_id)
@@ -510,20 +478,13 @@ class TestExtendEdgeCases:
future_date = misc.now() + timedelta(days = 9)
end_date_str = future_date.strftime("%d.%m.%Y")
published_ads_json = {
"ads": [
{
"id": 12345,
"title": "Test Ad Title",
"state": "active",
"endDate": end_date_str
}
]
}
published_ads_json = {"ads": [{"id": 12345, "title": "Test Ad Title", "state": "active", "endDate": end_date_str}]}
with patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, \
patch.object(test_bot, "web_sleep", new_callable = AsyncMock), \
patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad:
with (
patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request,
patch.object(test_bot, "web_sleep", new_callable = AsyncMock),
patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad,
):
mock_request.return_value = {"content": json.dumps(published_ads_json)}
await test_bot.extend_ads([("test.yaml", ad_cfg, base_ad_config_with_id)])
@@ -532,11 +493,7 @@ class TestExtendEdgeCases:
mock_extend_ad.assert_not_called()
@pytest.mark.asyncio
async def test_extend_ads_date_parsing_german_format(
self,
test_bot:KleinanzeigenBot,
base_ad_config_with_id:dict[str, Any]
) -> None:
async def test_extend_ads_date_parsing_german_format(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any]) -> None:
"""Test that extend_ads correctly parses German date format (DD.MM.YYYY)."""
ad_cfg = Ad.model_validate(base_ad_config_with_id)
@@ -547,15 +504,17 @@ class TestExtendEdgeCases:
"id": 12345,
"title": "Test Ad Title",
"state": "active",
"endDate": "05.02.2026" # German format: DD.MM.YYYY
"endDate": "05.02.2026", # German format: DD.MM.YYYY
}
]
}
with patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, \
patch.object(test_bot, "web_sleep", new_callable = AsyncMock), \
patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad, \
patch("kleinanzeigen_bot.misc.now") as mock_now:
with (
patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request,
patch.object(test_bot, "web_sleep", new_callable = AsyncMock),
patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad,
patch("kleinanzeigen_bot.misc.now") as mock_now,
):
# Mock now() to return a date where 05.02.2026 would be within 8 days
# Test mock datetime - timezone not relevant for date comparison test
mock_now.return_value = datetime(2026, 1, 28) # noqa: DTZ001

View File

@@ -662,6 +662,80 @@ class TestAdExtractorNavigation:
assert refs == ["/s-anzeige/page-one/111", "/s-anzeige/page-two/222"]
next_button_enabled.click.assert_awaited() # triggered once during navigation
@pytest.mark.asyncio
async def test_extract_own_ads_urls_timeout_in_callback(self, test_extractor:AdExtractor) -> None:
"""Test that TimeoutError in extract_page_refs callback stops pagination."""
with (
patch.object(test_extractor, "web_open", new_callable = AsyncMock),
patch.object(test_extractor, "web_sleep", new_callable = AsyncMock),
patch.object(test_extractor, "web_find", new_callable = AsyncMock) as mock_web_find,
patch.object(test_extractor, "web_find_all", new_callable = AsyncMock, return_value = []),
patch.object(test_extractor, "web_scroll_page_down", new_callable = AsyncMock),
patch.object(test_extractor, "web_execute", new_callable = AsyncMock),
):
# Setup: ad list container exists, but web_find_all for cardbox raises TimeoutError
ad_list_container_mock = MagicMock()
call_count = {"count": 0}
def mock_find_side_effect(*args:Any, **kwargs:Any) -> Element:
call_count["count"] += 1
if call_count["count"] == 1:
# First call: ad list container (before pagination loop)
return ad_list_container_mock
# Second call: ad list container (inside callback)
return ad_list_container_mock
mock_web_find.side_effect = mock_find_side_effect
# Make web_find_all for cardbox raise TimeoutError (simulating missing ad items)
async def mock_find_all_side_effect(*args:Any, **kwargs:Any) -> list[Element]:
raise TimeoutError("Ad items not found")
with patch.object(test_extractor, "web_find_all", new_callable = AsyncMock, side_effect = mock_find_all_side_effect):
refs = await test_extractor.extract_own_ads_urls()
# Pagination should stop (TimeoutError in callback returns True)
assert refs == []
@pytest.mark.asyncio
async def test_extract_own_ads_urls_generic_exception_in_callback(self, test_extractor:AdExtractor) -> None:
"""Test that generic Exception in extract_page_refs callback continues pagination."""
with (
patch.object(test_extractor, "web_open", new_callable = AsyncMock),
patch.object(test_extractor, "web_sleep", new_callable = AsyncMock),
patch.object(test_extractor, "web_find", new_callable = AsyncMock) as mock_web_find,
patch.object(test_extractor, "web_scroll_page_down", new_callable = AsyncMock),
):
# Setup: ad list container exists, but web_find_all raises generic Exception
ad_list_container_mock = MagicMock()
call_count = {"count": 0}
def mock_find_side_effect(*args:Any, **kwargs:Any) -> Element:
call_count["count"] += 1
if call_count["count"] == 1:
# First call: ad list container (before pagination loop)
return ad_list_container_mock
# Second call: pagination check - raise TimeoutError to indicate no pagination
if call_count["count"] == 2:
raise TimeoutError("No pagination")
# Third call: ad list container (inside callback)
return ad_list_container_mock
mock_web_find.side_effect = mock_find_side_effect
# Make web_find_all raise a generic exception
async def mock_find_all_side_effect(*args:Any, **kwargs:Any) -> list[Element]:
raise AttributeError("Unexpected error")
with patch.object(test_extractor, "web_find_all", new_callable = AsyncMock, side_effect = mock_find_all_side_effect):
refs = await test_extractor.extract_own_ads_urls()
# Pagination should continue despite exception (callback returns False)
# Since it's a single page (no pagination), refs should be empty
assert refs == []
class TestAdExtractorContent:
"""Tests for content extraction functionality."""

View File

@@ -0,0 +1,181 @@
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""Tests for the _navigate_paginated_ad_overview helper method."""
from typing import Any
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from kleinanzeigen_bot.utils.web_scraping_mixin import By, Element, WebScrapingMixin
class TestNavigatePaginatedAdOverview:
"""Tests for _navigate_paginated_ad_overview method."""
@pytest.mark.asyncio
async def test_single_page_action_succeeds(self) -> None:
"""Test pagination on single page where action succeeds."""
mixin = WebScrapingMixin()
# Mock callback that succeeds
callback = AsyncMock(return_value = True)
with (
patch.object(mixin, "web_open", new_callable = AsyncMock),
patch.object(mixin, "web_sleep", new_callable = AsyncMock),
patch.object(mixin, "web_find", new_callable = AsyncMock) as mock_find,
patch.object(mixin, "web_find_all", new_callable = AsyncMock, return_value = []),
patch.object(mixin, "web_scroll_page_down", new_callable = AsyncMock),
patch.object(mixin, "_timeout", return_value = 10),
):
# Ad list container exists
mock_find.return_value = MagicMock()
result = await mixin._navigate_paginated_ad_overview(callback)
assert result is True
callback.assert_awaited_once_with(1)
@pytest.mark.asyncio
async def test_single_page_action_returns_false(self) -> None:
"""Test pagination on single page where action returns False."""
mixin = WebScrapingMixin()
# Mock callback that returns False (doesn't find what it's looking for)
callback = AsyncMock(return_value = False)
with (
patch.object(mixin, "web_open", new_callable = AsyncMock),
patch.object(mixin, "web_sleep", new_callable = AsyncMock),
patch.object(mixin, "web_find", new_callable = AsyncMock) as mock_find,
patch.object(mixin, "web_find_all", new_callable = AsyncMock, return_value = []),
patch.object(mixin, "web_scroll_page_down", new_callable = AsyncMock),
patch.object(mixin, "_timeout", return_value = 10),
):
# Ad list container exists
mock_find.return_value = MagicMock()
result = await mixin._navigate_paginated_ad_overview(callback)
assert result is False
callback.assert_awaited_once_with(1)
@pytest.mark.asyncio
async def test_multi_page_action_succeeds_on_page_2(self) -> None:
"""Test pagination across multiple pages where action succeeds on page 2."""
mixin = WebScrapingMixin()
# Mock callback that returns False on page 1, True on page 2
callback_results = [False, True]
callback = AsyncMock(side_effect = callback_results)
pagination_section = MagicMock()
next_button_enabled = MagicMock()
next_button_enabled.attrs = {} # No "disabled" attribute = enabled
next_button_enabled.click = AsyncMock()
find_call_count = {"count": 0}
async def mock_find_side_effect(selector_type:By, selector_value:str, **kwargs:Any) -> Element:
find_call_count["count"] += 1
if selector_type == By.ID and selector_value == "my-manageitems-adlist":
return MagicMock() # Ad list container
if selector_type == By.CSS_SELECTOR and selector_value == ".Pagination":
return pagination_section
raise TimeoutError("Unexpected find")
find_all_call_count = {"count": 0}
async def mock_find_all_side_effect(selector_type:By, selector_value:str, **kwargs:Any) -> list[Element]:
find_all_call_count["count"] += 1
if selector_type == By.CSS_SELECTOR and 'aria-label="Nächste"' in selector_value:
# Return enabled next button on both calls (initial detection and navigation)
return [next_button_enabled]
return []
with (
patch.object(mixin, "web_open", new_callable = AsyncMock),
patch.object(mixin, "web_sleep", new_callable = AsyncMock),
patch.object(mixin, "web_find", new_callable = AsyncMock, side_effect = mock_find_side_effect),
patch.object(mixin, "web_find_all", new_callable = AsyncMock, side_effect = mock_find_all_side_effect),
patch.object(mixin, "web_scroll_page_down", new_callable = AsyncMock),
patch.object(mixin, "_timeout", return_value = 10),
):
result = await mixin._navigate_paginated_ad_overview(callback)
assert result is True
assert callback.await_count == 2
next_button_enabled.click.assert_awaited_once()
@pytest.mark.asyncio
async def test_web_open_raises_timeout(self) -> None:
"""Test that TimeoutError on web_open is caught and returns False."""
mixin = WebScrapingMixin()
callback = AsyncMock()
with patch.object(mixin, "web_open", new_callable = AsyncMock, side_effect = TimeoutError("Page load timeout")):
result = await mixin._navigate_paginated_ad_overview(callback)
assert result is False
callback.assert_not_awaited() # Callback should not be called
@pytest.mark.asyncio
async def test_ad_list_container_not_found(self) -> None:
"""Test that missing ad list container returns False."""
mixin = WebScrapingMixin()
callback = AsyncMock()
with (
patch.object(mixin, "web_open", new_callable = AsyncMock),
patch.object(mixin, "web_sleep", new_callable = AsyncMock),
patch.object(mixin, "web_find", new_callable = AsyncMock, side_effect = TimeoutError("Container not found")),
):
result = await mixin._navigate_paginated_ad_overview(callback)
assert result is False
callback.assert_not_awaited()
@pytest.mark.asyncio
async def test_web_scroll_timeout_continues(self) -> None:
"""Test that TimeoutError on web_scroll_page_down is non-fatal and pagination continues."""
mixin = WebScrapingMixin()
callback = AsyncMock(return_value = True)
with (
patch.object(mixin, "web_open", new_callable = AsyncMock),
patch.object(mixin, "web_sleep", new_callable = AsyncMock),
patch.object(mixin, "web_find", new_callable = AsyncMock, return_value = MagicMock()),
patch.object(mixin, "web_find_all", new_callable = AsyncMock, return_value = []),
patch.object(mixin, "web_scroll_page_down", new_callable = AsyncMock, side_effect = TimeoutError("Scroll timeout")),
patch.object(mixin, "_timeout", return_value = 10),
):
result = await mixin._navigate_paginated_ad_overview(callback)
# Should continue and call callback despite scroll timeout
assert result is True
callback.assert_awaited_once_with(1)
@pytest.mark.asyncio
async def test_page_action_raises_timeout(self) -> None:
"""Test that TimeoutError from page_action is caught and returns False."""
mixin = WebScrapingMixin()
callback = AsyncMock(side_effect = TimeoutError("Action timeout"))
with (
patch.object(mixin, "web_open", new_callable = AsyncMock),
patch.object(mixin, "web_sleep", new_callable = AsyncMock),
patch.object(mixin, "web_find", new_callable = AsyncMock, return_value = MagicMock()),
patch.object(mixin, "web_find_all", new_callable = AsyncMock, return_value = []),
patch.object(mixin, "web_scroll_page_down", new_callable = AsyncMock),
patch.object(mixin, "_timeout", return_value = 10),
):
result = await mixin._navigate_paginated_ad_overview(callback)
assert result is False
callback.assert_awaited_once_with(1)