From 7098719d5bfbeab9ff4fb48f9172d4c31001cd7f Mon Sep 17 00:00:00 2001 From: Jens <1742418+1cu@users.noreply.github.com> Date: Wed, 28 Jan 2026 06:08:03 +0100 Subject: [PATCH] fix: extend command fails with >25 ads due to pagination (#793) --- src/kleinanzeigen_bot/__init__.py | 22 +- src/kleinanzeigen_bot/extract.py | 104 +---- .../resources/translations.de.yaml | 29 +- .../utils/web_scraping_mixin.py | 105 +++++ tests/unit/test_extend_command.py | 401 ++++++++---------- tests/unit/test_extract.py | 74 ++++ tests/unit/test_web_scraping_pagination.py | 181 ++++++++ 7 files changed, 589 insertions(+), 327 deletions(-) create mode 100644 tests/unit/test_web_scraping_pagination.py diff --git a/src/kleinanzeigen_bot/__init__.py b/src/kleinanzeigen_bot/__init__.py index 3b09559..edf6dd9 100644 --- a/src/kleinanzeigen_bot/__init__.py +++ b/src/kleinanzeigen_bot/__init__.py @@ -999,15 +999,23 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904 LOG.info("Extending ad '%s' (ID: %s)...", ad_cfg.title, ad_cfg.id) try: - # Navigate to ad management page - await self.web_open(f"{self.root_url}/m-meine-anzeigen.html") - - # Find and click "Verlängern" (extend) button for this ad + # Navigate to ad management page and find extend button across all pages extend_button_xpath = f'//li[@data-adid="{ad_cfg.id}"]//button[contains(., "Verlängern")]' - try: - await self.web_click(By.XPATH, extend_button_xpath) - except TimeoutError: + async def find_and_click_extend_button(page_num:int) -> bool: + """Try to find and click extend button on current page.""" + try: + extend_button = await self.web_find(By.XPATH, extend_button_xpath, timeout = self._timeout("quick_dom")) + LOG.info("Found extend button on page %s", page_num) + await extend_button.click() + return True # Success - stop pagination + except TimeoutError: + LOG.debug("Extend button not found on page %s", page_num) + return False # Continue to next page + + success = await self._navigate_paginated_ad_overview(find_and_click_extend_button, page_url = f"{self.root_url}/m-meine-anzeigen.html") + + if not success: LOG.error(" -> FAILED: Could not find extend button for ad ID %s", ad_cfg.id) return False diff --git a/src/kleinanzeigen_bot/extract.py b/src/kleinanzeigen_bot/extract.py index 03def4b..30cc653 100644 --- a/src/kleinanzeigen_bot/extract.py +++ b/src/kleinanzeigen_bot/extract.py @@ -148,104 +148,34 @@ class AdExtractor(WebScrapingMixin): :return: the links to your ad pages """ - # navigate to "your ads" page - await self.web_open("https://www.kleinanzeigen.de/m-meine-anzeigen.html") - await self.web_sleep(2000, 3000) # Consider replacing with explicit waits later - - # Try to find the main ad list container first - try: - _ = await self.web_find(By.ID, "my-manageitems-adlist") - except TimeoutError: - LOG.warning("Ad list container #my-manageitems-adlist not found. Maybe no ads present?") - return [] - - # --- Pagination handling --- - multi_page = False - pagination_timeout = self._timeout("pagination_initial") - try: - # Correct selector: Use uppercase '.Pagination' - pagination_section = await self.web_find(By.CSS_SELECTOR, ".Pagination", timeout = pagination_timeout) # Increased timeout slightly - # Correct selector: Use 'aria-label' - # Also check if the button is actually present AND potentially enabled (though enabled check isn't strictly necessary here, only for clicking later) - next_buttons = await self.web_find_all(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent = pagination_section) - if next_buttons: - # Check if at least one 'Nächste' button is not disabled (optional but good practice) - enabled_next_buttons = [btn for btn in next_buttons if not btn.attrs.get("disabled")] - if enabled_next_buttons: - multi_page = True - LOG.info("Multiple ad pages detected.") - else: - LOG.info("Next button found but is disabled. Assuming single effective page.") - - else: - LOG.info('No "Naechste" button found within pagination. Assuming single page.') - except TimeoutError: - # This will now correctly trigger only if the '.Pagination' div itself is not found - LOG.info("No pagination controls found. Assuming single page.") - except Exception as e: - LOG.exception("Error during pagination detection: %s", e) - LOG.info("Assuming single page due to error during pagination check.") - # --- End Pagination Handling --- - refs:list[str] = [] - current_page = 1 - while True: # Loop reference extraction - LOG.info("Extracting ads from page %s...", current_page) - # scroll down to load dynamically if necessary - await self.web_scroll_page_down() - await self.web_sleep(2000, 3000) # Consider replacing with explicit waits - # Re-find the ad list container on the current page/state + async def extract_page_refs(page_num:int) -> bool: + """Extract ad reference URLs from the current page. + + :param page_num: The current page number being processed + :return: True to stop pagination (e.g. ads container disappeared), False to continue to next page + """ try: ad_list_container = await self.web_find(By.ID, "my-manageitems-adlist") list_items = await self.web_find_all(By.CLASS_NAME, "cardbox", parent = ad_list_container) - LOG.info("Found %s ad items on page %s.", len(list_items), current_page) - except TimeoutError: - LOG.warning("Could not find ad list container or items on page %s.", current_page) - break # Stop if ads disappear + LOG.info("Found %s ad items on page %s.", len(list_items), page_num) - # Extract references using the CORRECTED selector - try: page_refs:list[str] = [str((await self.web_find(By.CSS_SELECTOR, "div h3 a.text-onSurface", parent = li)).attrs["href"]) for li in list_items] refs.extend(page_refs) - LOG.info("Successfully extracted %s refs from page %s.", len(page_refs), current_page) - except Exception as e: - # Log the error if extraction fails for some items, but try to continue - LOG.exception("Error extracting refs on page %s: %s", current_page, e) + LOG.info("Successfully extracted %s refs from page %s.", len(page_refs), page_num) + return False # Continue to next page - if not multi_page: # only one iteration for single-page overview - break - - # --- Navigate to next page --- - follow_up_timeout = self._timeout("pagination_follow_up") - try: - # Find the pagination section again (scope might have changed after scroll/wait) - pagination_section = await self.web_find(By.CSS_SELECTOR, ".Pagination", timeout = follow_up_timeout) - # Find the "Next" button using the correct aria-label selector and ensure it's not disabled - next_button_element = None - possible_next_buttons = await self.web_find_all(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent = pagination_section) - for btn in possible_next_buttons: - if not btn.attrs.get("disabled"): # Check if the button is enabled - next_button_element = btn - break # Found an enabled next button - - if next_button_element: - LOG.info("Navigating to next page...") - await next_button_element.click() - current_page += 1 - # Wait for page load - consider waiting for a specific element on the new page instead of fixed sleep - await self.web_sleep(3000, 4000) - else: - LOG.info('Last ad overview page explored (no enabled "Naechste" button found).') - break except TimeoutError: - # This might happen if pagination disappears on the last page after loading - LOG.info("No pagination controls found after scrolling/waiting. Assuming last page.") - break + LOG.warning("Could not find ad list container or items on page %s.", page_num) + return True # Stop pagination (ads disappeared) except Exception as e: - LOG.exception("Error during pagination navigation: %s", e) - break - # --- End Navigation --- + # Continue despite error for resilience against transient web scraping issues + # (e.g., DOM structure changes, network glitches). LOG.exception ensures visibility. + LOG.exception("Error extracting refs on page %s: %s", page_num, e) + return False # Continue to next page + + await self._navigate_paginated_ad_overview(extract_page_refs) if not refs: LOG.warning("No ad URLs were extracted.") diff --git a/src/kleinanzeigen_bot/resources/translations.de.yaml b/src/kleinanzeigen_bot/resources/translations.de.yaml index 4d05f75..4a70845 100644 --- a/src/kleinanzeigen_bot/resources/translations.de.yaml +++ b/src/kleinanzeigen_bot/resources/translations.de.yaml @@ -112,6 +112,9 @@ kleinanzeigen_bot/__init__.py: " -> FAILED: Timeout while extending ad '%s': %s": " -> FEHLER: Zeitüberschreitung beim Verlängern der Anzeige '%s': %s" " -> FAILED: Could not persist extension for ad '%s': %s": " -> FEHLER: Verlängerung der Anzeige '%s' konnte nicht gespeichert werden: %s" + find_and_click_extend_button: + "Found extend button on page %s": "'Verlängern'-Button auf Seite %s gefunden" + finalize_installation_mode: "Config file: %s": "Konfigurationsdatei: %s" "First run detected, prompting user for installation mode": "Erster Start erkannt, frage Benutzer nach Installationsmodus" @@ -259,21 +262,11 @@ kleinanzeigen_bot/extract.py: "Failed to extract ad ID from URL '%s': %s": "Fehler beim Extrahieren der Anzeigen-ID aus der URL '%s': %s" extract_own_ads_urls: - "Ad list container #my-manageitems-adlist not found. Maybe no ads present?": "Anzeigenlistencontainer #my-manageitems-adlist nicht gefunden. Vielleicht sind keine Anzeigen vorhanden?" - "Multiple ad pages detected.": "Mehrere Anzeigenseiten erkannt." - "Next button found but is disabled. Assuming single effective page.": "Weiter-Button gefunden, aber deaktiviert. Es wird von einer einzelnen effektiven Seite ausgegangen." - "No \"Naechste\" button found within pagination. Assuming single page.": "Kein \"Nächste\"-Button in der Paginierung gefunden. Es wird von einer einzelnen Seite ausgegangen." - "No pagination controls found. Assuming single page.": "Keine Paginierungssteuerung gefunden. Es wird von einer einzelnen Seite ausgegangen." - "Assuming single page due to error during pagination check.": "Es wird von einer einzelnen Seite ausgegangen wegen eines Fehlers bei der Paginierungsprüfung." - "Navigating to next page...": "Navigiere zur nächsten Seite..." - "Last ad overview page explored (no enabled \"Naechste\" button found).": "Letzte Anzeigenübersichtsseite erkundet (kein aktivierter \"Nächste\"-Button gefunden)." - "No pagination controls found after scrolling/waiting. Assuming last page.": "Keine Paginierungssteuerung nach dem Scrollen/Warten gefunden. Es wird von der letzten Seite ausgegangen." "No ad URLs were extracted.": "Es wurden keine Anzeigen-URLs extrahiert." + + extract_page_refs: "Could not find ad list container or items on page %s.": "Anzeigenlistencontainer oder Elemente auf Seite %s nicht gefunden." - "Error during pagination detection: %s": "Fehler bei der Paginierungserkennung: %s" - "Error during pagination navigation: %s": "Fehler bei der Paginierungsnavigation: %s" "Error extracting refs on page %s: %s": "Fehler beim Extrahieren der Referenzen auf Seite %s: %s" - "Extracting ads from page %s...": "Extrahiere Anzeigen von Seite %s..." "Found %s ad items on page %s.": "%s Anzeigen-Elemente auf Seite %s gefunden." "Successfully extracted %s refs from page %s.": "%s Referenzen von Seite %s erfolgreich extrahiert." @@ -488,6 +481,18 @@ kleinanzeigen_bot/utils/web_scraping_mixin.py: "Combobox missing aria-controls attribute": "Combobox fehlt aria-controls Attribut" "No matching option found in combobox: '%s'": "Keine passende Option in Combobox gefunden: '%s'" + _navigate_paginated_ad_overview: + "Failed to open ad overview page at %s: timeout": "Fehler beim Öffnen der Anzeigenübersichtsseite unter %s: Zeitüberschreitung" + "Scroll timeout on page %s (non-critical, continuing)": "Zeitüberschreitung beim Scrollen auf Seite %s (nicht kritisch, wird fortgesetzt)" + "Page action timed out on page %s": "Seitenaktion hat auf Seite %s eine Zeitüberschreitung erreicht" + "Ad list container not found. Maybe no ads present?": "Anzeigenlistencontainer nicht gefunden. Vielleicht sind keine Anzeigen vorhanden?" + "Multiple ad pages detected.": "Mehrere Anzeigenseiten erkannt." + "No pagination controls found. Assuming single page.": "Keine Paginierungssteuerung gefunden. Es wird von einer einzelnen Seite ausgegangen." + "Processing page %s...": "Verarbeite Seite %s..." + "Navigating to page %s...": "Navigiere zu Seite %s..." + "Last page reached (no enabled 'Naechste' button found).": "Letzte Seite erreicht (kein aktivierter 'Naechste'-Button gefunden)." + "No pagination controls found. Assuming last page.": "Keine Paginierungssteuerung gefunden. Es wird von der letzten Seite ausgegangen." + close_browser_session: "Closing Browser session...": "Schließe Browser-Sitzung..." diff --git a/src/kleinanzeigen_bot/utils/web_scraping_mixin.py b/src/kleinanzeigen_bot/utils/web_scraping_mixin.py index fab1ac8..522975a 100644 --- a/src/kleinanzeigen_bot/utils/web_scraping_mixin.py +++ b/src/kleinanzeigen_bot/utils/web_scraping_mixin.py @@ -969,6 +969,111 @@ class WebScrapingMixin: ) await self.page.sleep(duration / 1_000) + async def _navigate_paginated_ad_overview( + self, + page_action:Callable[[int], Awaitable[bool]], + page_url:str = "https://www.kleinanzeigen.de/m-meine-anzeigen.html", + *, + max_pages:int = 10, + ) -> bool: + """ + Navigate through paginated ad overview page, calling page_action on each page. + + This helper guarantees to return a boolean result and never propagates TimeoutError. + All timeout conditions are handled internally and logged appropriately. + + Args: + page_action: Async callable that receives current_page number and returns True if action succeeded/should stop + page_url: URL of the paginated overview page (default: kleinanzeigen ad management page) + max_pages: Maximum number of pages to navigate (safety limit) + + Returns: + True if page_action returned True on any page, False otherwise + + Example: + async def find_ad_callback(page_num: int) -> bool: + element = await self.web_find(By.XPATH, "//div[@id='my-ad']") + if element: + await element.click() + return True + return False + + success = await self._navigate_paginated_ad_overview(find_ad_callback) + """ + try: + await self.web_open(page_url) + except TimeoutError: + LOG.warning("Failed to open ad overview page at %s: timeout", page_url) + return False + + await self.web_sleep(2000, 3000) + + # Check if ad list container exists + try: + _ = await self.web_find(By.ID, "my-manageitems-adlist") + except TimeoutError: + LOG.warning("Ad list container not found. Maybe no ads present?") + return False + + # Check for pagination controls + multi_page = False + pagination_timeout = self._timeout("pagination_initial") + try: + pagination_section = await self.web_find(By.CSS_SELECTOR, ".Pagination", timeout = pagination_timeout) + next_buttons = await self.web_find_all(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent = pagination_section) + if next_buttons: + enabled_next_buttons = [btn for btn in next_buttons if not btn.attrs.get("disabled")] + if enabled_next_buttons: + multi_page = True + LOG.info("Multiple ad pages detected.") + except TimeoutError: + LOG.info("No pagination controls found. Assuming single page.") + + current_page = 1 + while current_page <= max_pages: + LOG.info("Processing page %s...", current_page) + + try: + await self.web_scroll_page_down() + except TimeoutError: + LOG.debug("Scroll timeout on page %s (non-critical, continuing)", current_page) + + await self.web_sleep(2000, 3000) + + try: + if await page_action(current_page): + return True + except TimeoutError: + LOG.warning("Page action timed out on page %s", current_page) + return False + + if not multi_page: + break + + follow_up_timeout = self._timeout("pagination_follow_up") + try: + pagination_section = await self.web_find(By.CSS_SELECTOR, ".Pagination", timeout = follow_up_timeout) + next_button_element = None + possible_next_buttons = await self.web_find_all(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent = pagination_section) + for btn in possible_next_buttons: + if not btn.attrs.get("disabled"): + next_button_element = btn + break + + if next_button_element: + LOG.info("Navigating to page %s...", current_page + 1) + await next_button_element.click() + await self.web_sleep(3000, 4000) + current_page += 1 + else: + LOG.info("Last page reached (no enabled 'Naechste' button found).") + break + except TimeoutError: + LOG.info("No pagination controls found. Assuming last page.") + break + + return False + async def web_request(self, url:str, method:str = "GET", valid_response_codes:int | Iterable[int] = 200, headers:dict[str, str] | None = None) -> Any: method = method.upper() LOG.debug(" -> HTTP %s [%s]...", method, url) diff --git a/tests/unit/test_extend_command.py b/tests/unit/test_extend_command.py index 6e4e1be..a033b46 100644 --- a/tests/unit/test_extend_command.py +++ b/tests/unit/test_extend_command.py @@ -5,13 +5,14 @@ import json # isort: skip from datetime import datetime, timedelta from pathlib import Path from typing import Any -from unittest.mock import AsyncMock, patch +from unittest.mock import AsyncMock, MagicMock, patch import pytest from kleinanzeigen_bot import KleinanzeigenBot, misc from kleinanzeigen_bot.model.ad_model import Ad from kleinanzeigen_bot.utils import dicts +from kleinanzeigen_bot.utils.web_scraping_mixin import By, Element @pytest.fixture @@ -34,13 +35,7 @@ def base_ad_config_with_id() -> dict[str, Any]: "republication_interval": 7, "created_on": "2024-12-07T10:00:00", "updated_on": "2024-12-10T15:20:00", - "contact": { - "name": "Test User", - "zipcode": "12345", - "location": "Test City", - "street": "", - "phone": "" - } + "contact": {"name": "Test User", "zipcode": "12345", "location": "Test City", "street": "", "phone": ""}, } @@ -50,9 +45,7 @@ class TestExtendCommand: @pytest.mark.asyncio async def test_run_extend_command_no_ads(self, test_bot:KleinanzeigenBot) -> None: """Test running extend command with no ads.""" - with patch.object(test_bot, "load_config"), \ - patch.object(test_bot, "load_ads", return_value = []), \ - patch("kleinanzeigen_bot.UpdateChecker"): + with patch.object(test_bot, "load_config"), patch.object(test_bot, "load_ads", return_value = []), patch("kleinanzeigen_bot.UpdateChecker"): await test_bot.run(["script.py", "extend"]) assert test_bot.command == "extend" assert test_bot.ads_selector == "all" @@ -60,11 +53,13 @@ class TestExtendCommand: @pytest.mark.asyncio async def test_run_extend_command_with_specific_ids(self, test_bot:KleinanzeigenBot) -> None: """Test running extend command with specific ad IDs.""" - with patch.object(test_bot, "load_config"), \ - patch.object(test_bot, "load_ads", return_value = []), \ - patch.object(test_bot, "create_browser_session", new_callable = AsyncMock), \ - patch.object(test_bot, "login", new_callable = AsyncMock), \ - patch("kleinanzeigen_bot.UpdateChecker"): + with ( + patch.object(test_bot, "load_config"), + patch.object(test_bot, "load_ads", return_value = []), + patch.object(test_bot, "create_browser_session", new_callable = AsyncMock), + patch.object(test_bot, "login", new_callable = AsyncMock), + patch("kleinanzeigen_bot.UpdateChecker"), + ): await test_bot.run(["script.py", "extend", "--ads=12345,67890"]) assert test_bot.command == "extend" assert test_bot.ads_selector == "12345,67890" @@ -74,19 +69,14 @@ class TestExtendAdsMethod: """Tests for the extend_ads() method.""" @pytest.mark.asyncio - async def test_extend_ads_skips_unpublished_ad( - self, - test_bot:KleinanzeigenBot, - base_ad_config_with_id:dict[str, Any] - ) -> None: + async def test_extend_ads_skips_unpublished_ad(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any]) -> None: """Test that extend_ads skips ads without an ID (unpublished).""" # Create ad without ID ad_config = base_ad_config_with_id.copy() ad_config["id"] = None ad_cfg = Ad.model_validate(ad_config) - with patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, \ - patch.object(test_bot, "web_sleep", new_callable = AsyncMock): + with patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, patch.object(test_bot, "web_sleep", new_callable = AsyncMock): mock_request.return_value = {"content": '{"ads": []}'} await test_bot.extend_ads([("test.yaml", ad_cfg, ad_config)]) @@ -95,16 +85,11 @@ class TestExtendAdsMethod: mock_request.assert_called_once() # Only the API call to get published ads @pytest.mark.asyncio - async def test_extend_ads_skips_ad_not_in_published_list( - self, - test_bot:KleinanzeigenBot, - base_ad_config_with_id:dict[str, Any] - ) -> None: + async def test_extend_ads_skips_ad_not_in_published_list(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any]) -> None: """Test that extend_ads skips ads not found in the published ads API response.""" ad_cfg = Ad.model_validate(base_ad_config_with_id) - with patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, \ - patch.object(test_bot, "web_sleep", new_callable = AsyncMock): + with patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, patch.object(test_bot, "web_sleep", new_callable = AsyncMock): # Return empty published ads list mock_request.return_value = {"content": '{"ads": []}'} @@ -114,11 +99,7 @@ class TestExtendAdsMethod: mock_request.assert_called_once() @pytest.mark.asyncio - async def test_extend_ads_skips_inactive_ad( - self, - test_bot:KleinanzeigenBot, - base_ad_config_with_id:dict[str, Any] - ) -> None: + async def test_extend_ads_skips_inactive_ad(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any]) -> None: """Test that extend_ads skips ads with state != 'active'.""" ad_cfg = Ad.model_validate(base_ad_config_with_id) @@ -128,14 +109,16 @@ class TestExtendAdsMethod: "id": 12345, "title": "Test Ad Title", "state": "paused", # Not active - "endDate": "05.02.2026" + "endDate": "05.02.2026", } ] } - with patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, \ - patch.object(test_bot, "web_sleep", new_callable = AsyncMock), \ - patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad: + with ( + patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, + patch.object(test_bot, "web_sleep", new_callable = AsyncMock), + patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad, + ): mock_request.return_value = {"content": json.dumps(published_ads_json)} await test_bot.extend_ads([("test.yaml", ad_cfg, base_ad_config_with_id)]) @@ -144,11 +127,7 @@ class TestExtendAdsMethod: mock_extend_ad.assert_not_called() @pytest.mark.asyncio - async def test_extend_ads_skips_ad_without_enddate( - self, - test_bot:KleinanzeigenBot, - base_ad_config_with_id:dict[str, Any] - ) -> None: + async def test_extend_ads_skips_ad_without_enddate(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any]) -> None: """Test that extend_ads skips ads without endDate in API response.""" ad_cfg = Ad.model_validate(base_ad_config_with_id) @@ -157,15 +136,17 @@ class TestExtendAdsMethod: { "id": 12345, "title": "Test Ad Title", - "state": "active" + "state": "active", # No endDate field } ] } - with patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, \ - patch.object(test_bot, "web_sleep", new_callable = AsyncMock), \ - patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad: + with ( + patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, + patch.object(test_bot, "web_sleep", new_callable = AsyncMock), + patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad, + ): mock_request.return_value = {"content": json.dumps(published_ads_json)} await test_bot.extend_ads([("test.yaml", ad_cfg, base_ad_config_with_id)]) @@ -174,11 +155,7 @@ class TestExtendAdsMethod: mock_extend_ad.assert_not_called() @pytest.mark.asyncio - async def test_extend_ads_skips_ad_outside_window( - self, - test_bot:KleinanzeigenBot, - base_ad_config_with_id:dict[str, Any] - ) -> None: + async def test_extend_ads_skips_ad_outside_window(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any]) -> None: """Test that extend_ads skips ads expiring more than 8 days in the future.""" ad_cfg = Ad.model_validate(base_ad_config_with_id) @@ -186,20 +163,13 @@ class TestExtendAdsMethod: future_date = misc.now() + timedelta(days = 30) end_date_str = future_date.strftime("%d.%m.%Y") - published_ads_json = { - "ads": [ - { - "id": 12345, - "title": "Test Ad Title", - "state": "active", - "endDate": end_date_str - } - ] - } + published_ads_json = {"ads": [{"id": 12345, "title": "Test Ad Title", "state": "active", "endDate": end_date_str}]} - with patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, \ - patch.object(test_bot, "web_sleep", new_callable = AsyncMock), \ - patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad: + with ( + patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, + patch.object(test_bot, "web_sleep", new_callable = AsyncMock), + patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad, + ): mock_request.return_value = {"content": json.dumps(published_ads_json)} await test_bot.extend_ads([("test.yaml", ad_cfg, base_ad_config_with_id)]) @@ -208,11 +178,7 @@ class TestExtendAdsMethod: mock_extend_ad.assert_not_called() @pytest.mark.asyncio - async def test_extend_ads_extends_ad_within_window( - self, - test_bot:KleinanzeigenBot, - base_ad_config_with_id:dict[str, Any] - ) -> None: + async def test_extend_ads_extends_ad_within_window(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any]) -> None: """Test that extend_ads extends ads within the 8-day window.""" ad_cfg = Ad.model_validate(base_ad_config_with_id) @@ -220,20 +186,13 @@ class TestExtendAdsMethod: future_date = misc.now() + timedelta(days = 5) end_date_str = future_date.strftime("%d.%m.%Y") - published_ads_json = { - "ads": [ - { - "id": 12345, - "title": "Test Ad Title", - "state": "active", - "endDate": end_date_str - } - ] - } + published_ads_json = {"ads": [{"id": 12345, "title": "Test Ad Title", "state": "active", "endDate": end_date_str}]} - with patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, \ - patch.object(test_bot, "web_sleep", new_callable = AsyncMock), \ - patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad: + with ( + patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, + patch.object(test_bot, "web_sleep", new_callable = AsyncMock), + patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad, + ): mock_request.return_value = {"content": json.dumps(published_ads_json)} mock_extend_ad.return_value = True @@ -243,11 +202,7 @@ class TestExtendAdsMethod: mock_extend_ad.assert_called_once() @pytest.mark.asyncio - async def test_extend_ads_no_eligible_ads( - self, - test_bot:KleinanzeigenBot, - base_ad_config_with_id:dict[str, Any] - ) -> None: + async def test_extend_ads_no_eligible_ads(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any]) -> None: """Test extend_ads when no ads are eligible for extension.""" ad_cfg = Ad.model_validate(base_ad_config_with_id) @@ -255,20 +210,13 @@ class TestExtendAdsMethod: future_date = misc.now() + timedelta(days = 30) end_date_str = future_date.strftime("%d.%m.%Y") - published_ads_json = { - "ads": [ - { - "id": 12345, - "title": "Test Ad Title", - "state": "active", - "endDate": end_date_str - } - ] - } + published_ads_json = {"ads": [{"id": 12345, "title": "Test Ad Title", "state": "active", "endDate": end_date_str}]} - with patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, \ - patch.object(test_bot, "web_sleep", new_callable = AsyncMock), \ - patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad: + with ( + patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, + patch.object(test_bot, "web_sleep", new_callable = AsyncMock), + patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad, + ): mock_request.return_value = {"content": json.dumps(published_ads_json)} await test_bot.extend_ads([("test.yaml", ad_cfg, base_ad_config_with_id)]) @@ -277,11 +225,7 @@ class TestExtendAdsMethod: mock_extend_ad.assert_not_called() @pytest.mark.asyncio - async def test_extend_ads_handles_multiple_ads( - self, - test_bot:KleinanzeigenBot, - base_ad_config_with_id:dict[str, Any] - ) -> None: + async def test_extend_ads_handles_multiple_ads(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any]) -> None: """Test that extend_ads processes multiple ads correctly.""" ad_cfg1 = Ad.model_validate(base_ad_config_with_id) @@ -297,46 +241,36 @@ class TestExtendAdsMethod: published_ads_json = { "ads": [ - { - "id": 12345, - "title": "Test Ad Title", - "state": "active", - "endDate": within_window.strftime("%d.%m.%Y") - }, - { - "id": 67890, - "title": "Second Test Ad", - "state": "active", - "endDate": outside_window.strftime("%d.%m.%Y") - } + {"id": 12345, "title": "Test Ad Title", "state": "active", "endDate": within_window.strftime("%d.%m.%Y")}, + {"id": 67890, "title": "Second Test Ad", "state": "active", "endDate": outside_window.strftime("%d.%m.%Y")}, ] } - with patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, \ - patch.object(test_bot, "web_sleep", new_callable = AsyncMock), \ - patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad: + with ( + patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, + patch.object(test_bot, "web_sleep", new_callable = AsyncMock), + patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad, + ): mock_request.return_value = {"content": json.dumps(published_ads_json)} mock_extend_ad.return_value = True - await test_bot.extend_ads([ - ("test1.yaml", ad_cfg1, base_ad_config_with_id), - ("test2.yaml", ad_cfg2, ad_config2) - ]) + await test_bot.extend_ads([("test1.yaml", ad_cfg1, base_ad_config_with_id), ("test2.yaml", ad_cfg2, ad_config2)]) # Verify extend_ad was called only once (for the ad within window) assert mock_extend_ad.call_count == 1 class TestExtendAdMethod: - """Tests for the extend_ad() method.""" + """Tests for the extend_ad() method. + + Note: These tests mock `_navigate_paginated_ad_overview` rather than individual browser methods + (web_find, web_click, etc.) because the pagination helper involves complex multi-step browser + interactions that would require extensive, brittle mock choreography. Mocking at this level + keeps tests focused on extend_ad's own logic (dialog handling, YAML persistence, error paths). + """ @pytest.mark.asyncio - async def test_extend_ad_success( - self, - test_bot:KleinanzeigenBot, - base_ad_config_with_id:dict[str, Any], - tmp_path:Path - ) -> None: + async def test_extend_ad_success(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any], tmp_path:Path) -> None: """Test successful ad extension.""" ad_cfg = Ad.model_validate(base_ad_config_with_id) @@ -344,27 +278,27 @@ class TestExtendAdMethod: ad_file = tmp_path / "test_ad.yaml" dicts.save_dict(str(ad_file), base_ad_config_with_id) - with patch.object(test_bot, "web_open", new_callable = AsyncMock), \ - patch.object(test_bot, "web_click", new_callable = AsyncMock), \ - patch("kleinanzeigen_bot.misc.now") as mock_now: + with ( + patch.object(test_bot, "_navigate_paginated_ad_overview", new_callable = AsyncMock) as mock_paginate, + patch.object(test_bot, "web_click", new_callable = AsyncMock), + patch("kleinanzeigen_bot.misc.now") as mock_now, + ): # Test mock datetime - timezone not relevant for timestamp formatting test mock_now.return_value = datetime(2025, 1, 28, 14, 30, 0) # noqa: DTZ001 + mock_paginate.return_value = True + result = await test_bot.extend_ad(str(ad_file), ad_cfg, base_ad_config_with_id) assert result is True + assert mock_paginate.call_count == 1 # Verify updated_on was updated in the YAML file updated_config = dicts.load_dict(str(ad_file)) assert updated_config["updated_on"] == "2025-01-28T14:30:00" @pytest.mark.asyncio - async def test_extend_ad_button_not_found( - self, - test_bot:KleinanzeigenBot, - base_ad_config_with_id:dict[str, Any], - tmp_path:Path - ) -> None: + async def test_extend_ad_button_not_found(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any], tmp_path:Path) -> None: """Test extend_ad when the Verlängern button is not found.""" ad_cfg = Ad.model_validate(base_ad_config_with_id) @@ -372,22 +306,17 @@ class TestExtendAdMethod: ad_file = tmp_path / "test_ad.yaml" dicts.save_dict(str(ad_file), base_ad_config_with_id) - with patch.object(test_bot, "web_open", new_callable = AsyncMock), \ - patch.object(test_bot, "web_click", new_callable = AsyncMock) as mock_click: - # Simulate button not found by raising TimeoutError - mock_click.side_effect = TimeoutError("Button not found") + with patch.object(test_bot, "_navigate_paginated_ad_overview", new_callable = AsyncMock) as mock_paginate: + # Simulate button not found by having pagination return False (not found on any page) + mock_paginate.return_value = False result = await test_bot.extend_ad(str(ad_file), ad_cfg, base_ad_config_with_id) assert result is False + assert mock_paginate.call_count == 1 @pytest.mark.asyncio - async def test_extend_ad_dialog_timeout( - self, - test_bot:KleinanzeigenBot, - base_ad_config_with_id:dict[str, Any], - tmp_path:Path - ) -> None: + async def test_extend_ad_dialog_timeout(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any], tmp_path:Path) -> None: """Test extend_ad when the confirmation dialog times out (no dialog appears).""" ad_cfg = Ad.model_validate(base_ad_config_with_id) @@ -395,14 +324,18 @@ class TestExtendAdMethod: ad_file = tmp_path / "test_ad.yaml" dicts.save_dict(str(ad_file), base_ad_config_with_id) - with patch.object(test_bot, "web_open", new_callable = AsyncMock), \ - patch.object(test_bot, "web_click", new_callable = AsyncMock) as mock_click, \ - patch("kleinanzeigen_bot.misc.now") as mock_now: + with ( + patch.object(test_bot, "_navigate_paginated_ad_overview", new_callable = AsyncMock) as mock_paginate, + patch.object(test_bot, "web_click", new_callable = AsyncMock) as mock_click, + patch("kleinanzeigen_bot.misc.now") as mock_now, + ): # Test mock datetime - timezone not relevant for timestamp formatting test mock_now.return_value = datetime(2025, 1, 28, 14, 30, 0) # noqa: DTZ001 - # First click (Verlängern button) succeeds, second click (dialog close) times out - mock_click.side_effect = [None, TimeoutError("Dialog not found")] + # Pagination succeeds (button found and clicked) + mock_paginate.return_value = True + # Dialog close button times out + mock_click.side_effect = TimeoutError("Dialog not found") result = await test_bot.extend_ad(str(ad_file), ad_cfg, base_ad_config_with_id) @@ -410,12 +343,7 @@ class TestExtendAdMethod: assert result is True @pytest.mark.asyncio - async def test_extend_ad_exception_handling( - self, - test_bot:KleinanzeigenBot, - base_ad_config_with_id:dict[str, Any], - tmp_path:Path - ) -> None: + async def test_extend_ad_exception_handling(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any], tmp_path:Path) -> None: """Test extend_ad propagates unexpected exceptions.""" ad_cfg = Ad.model_validate(base_ad_config_with_id) @@ -423,20 +351,15 @@ class TestExtendAdMethod: ad_file = tmp_path / "test_ad.yaml" dicts.save_dict(str(ad_file), base_ad_config_with_id) - with patch.object(test_bot, "web_open", new_callable = AsyncMock) as mock_open: - # Simulate unexpected exception - mock_open.side_effect = Exception("Unexpected error") + with patch.object(test_bot, "_navigate_paginated_ad_overview", new_callable = AsyncMock) as mock_paginate: + # Simulate unexpected exception during pagination + mock_paginate.side_effect = Exception("Unexpected error") with pytest.raises(Exception, match = "Unexpected error"): await test_bot.extend_ad(str(ad_file), ad_cfg, base_ad_config_with_id) @pytest.mark.asyncio - async def test_extend_ad_updates_yaml_file( - self, - test_bot:KleinanzeigenBot, - base_ad_config_with_id:dict[str, Any], - tmp_path:Path - ) -> None: + async def test_extend_ad_updates_yaml_file(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any], tmp_path:Path) -> None: """Test that extend_ad correctly updates the YAML file with new timestamp.""" ad_cfg = Ad.model_validate(base_ad_config_with_id) @@ -445,12 +368,17 @@ class TestExtendAdMethod: original_updated_on = base_ad_config_with_id["updated_on"] dicts.save_dict(str(ad_file), base_ad_config_with_id) - with patch.object(test_bot, "web_open", new_callable = AsyncMock), \ - patch.object(test_bot, "web_click", new_callable = AsyncMock), \ - patch("kleinanzeigen_bot.misc.now") as mock_now: + with ( + patch.object(test_bot, "_navigate_paginated_ad_overview", new_callable = AsyncMock) as mock_paginate, + patch.object(test_bot, "web_click", new_callable = AsyncMock), + patch("kleinanzeigen_bot.misc.now") as mock_now, + ): # Test mock datetime - timezone not relevant for timestamp formatting test mock_now.return_value = datetime(2025, 1, 28, 14, 30, 0) # noqa: DTZ001 + # Pagination succeeds (button found and clicked) + mock_paginate.return_value = True + await test_bot.extend_ad(str(ad_file), ad_cfg, base_ad_config_with_id) # Load the updated file and verify the timestamp changed @@ -458,16 +386,67 @@ class TestExtendAdMethod: assert updated_config["updated_on"] != original_updated_on assert updated_config["updated_on"] == "2025-01-28T14:30:00" + @pytest.mark.asyncio + async def test_extend_ad_with_web_mocks(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any], tmp_path:Path) -> None: + """Test extend_ad with web-level mocks to exercise the find_and_click_extend_button callback.""" + ad_cfg = Ad.model_validate(base_ad_config_with_id) + + # Create temporary YAML file + ad_file = tmp_path / "test_ad.yaml" + dicts.save_dict(str(ad_file), base_ad_config_with_id) + + extend_button_mock = AsyncMock() + extend_button_mock.click = AsyncMock() + + pagination_section = MagicMock() + + find_call_count = {"count": 0} + + async def mock_web_find(selector_type:By, selector_value:str, **kwargs:Any) -> Element: + find_call_count["count"] += 1 + # Ad list container (called by pagination helper) + if selector_type == By.ID and selector_value == "my-manageitems-adlist": + return MagicMock() + # Pagination section (called by pagination helper) + if selector_type == By.CSS_SELECTOR and selector_value == ".Pagination": + # Raise TimeoutError on first call (pagination detection) to indicate single page + if find_call_count["count"] == 2: + raise TimeoutError("No pagination") + return pagination_section + # Extend button (called by find_and_click_extend_button callback) + if selector_type == By.XPATH and "Verlängern" in selector_value: + return extend_button_mock + raise TimeoutError(f"Unexpected find: {selector_type} {selector_value}") + + with ( + patch.object(test_bot, "web_open", new_callable = AsyncMock), + patch.object(test_bot, "web_sleep", new_callable = AsyncMock), + patch.object(test_bot, "web_find", new_callable = AsyncMock, side_effect = mock_web_find), + patch.object(test_bot, "web_find_all", new_callable = AsyncMock, return_value = []), + patch.object(test_bot, "web_scroll_page_down", new_callable = AsyncMock), + patch.object(test_bot, "web_click", new_callable = AsyncMock), + patch.object(test_bot, "_timeout", return_value = 10), + patch("kleinanzeigen_bot.misc.now") as mock_now, + ): + # Test mock datetime - timezone not relevant for timestamp formatting test + mock_now.return_value = datetime(2025, 1, 28, 15, 0, 0) # noqa: DTZ001 + + result = await test_bot.extend_ad(str(ad_file), ad_cfg, base_ad_config_with_id) + + assert result is True + # Verify the extend button was found and clicked + extend_button_mock.click.assert_awaited_once() + + # Verify updated_on was updated + updated_config = dicts.load_dict(str(ad_file)) + assert updated_config["updated_on"] == "2025-01-28T15:00:00" + class TestExtendEdgeCases: """Tests for edge cases and boundary conditions.""" @pytest.mark.asyncio - async def test_extend_ads_exactly_8_days( - self, - test_bot:KleinanzeigenBot, - base_ad_config_with_id:dict[str, Any] - ) -> None: + async def test_extend_ads_exactly_8_days(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any]) -> None: """Test that ads expiring exactly in 8 days are eligible for extension.""" ad_cfg = Ad.model_validate(base_ad_config_with_id) @@ -475,20 +454,13 @@ class TestExtendEdgeCases: future_date = misc.now() + timedelta(days = 8) end_date_str = future_date.strftime("%d.%m.%Y") - published_ads_json = { - "ads": [ - { - "id": 12345, - "title": "Test Ad Title", - "state": "active", - "endDate": end_date_str - } - ] - } + published_ads_json = {"ads": [{"id": 12345, "title": "Test Ad Title", "state": "active", "endDate": end_date_str}]} - with patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, \ - patch.object(test_bot, "web_sleep", new_callable = AsyncMock), \ - patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad: + with ( + patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, + patch.object(test_bot, "web_sleep", new_callable = AsyncMock), + patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad, + ): mock_request.return_value = {"content": json.dumps(published_ads_json)} mock_extend_ad.return_value = True @@ -498,11 +470,7 @@ class TestExtendEdgeCases: mock_extend_ad.assert_called_once() @pytest.mark.asyncio - async def test_extend_ads_exactly_9_days( - self, - test_bot:KleinanzeigenBot, - base_ad_config_with_id:dict[str, Any] - ) -> None: + async def test_extend_ads_exactly_9_days(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any]) -> None: """Test that ads expiring in exactly 9 days are not eligible for extension.""" ad_cfg = Ad.model_validate(base_ad_config_with_id) @@ -510,20 +478,13 @@ class TestExtendEdgeCases: future_date = misc.now() + timedelta(days = 9) end_date_str = future_date.strftime("%d.%m.%Y") - published_ads_json = { - "ads": [ - { - "id": 12345, - "title": "Test Ad Title", - "state": "active", - "endDate": end_date_str - } - ] - } + published_ads_json = {"ads": [{"id": 12345, "title": "Test Ad Title", "state": "active", "endDate": end_date_str}]} - with patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, \ - patch.object(test_bot, "web_sleep", new_callable = AsyncMock), \ - patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad: + with ( + patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, + patch.object(test_bot, "web_sleep", new_callable = AsyncMock), + patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad, + ): mock_request.return_value = {"content": json.dumps(published_ads_json)} await test_bot.extend_ads([("test.yaml", ad_cfg, base_ad_config_with_id)]) @@ -532,11 +493,7 @@ class TestExtendEdgeCases: mock_extend_ad.assert_not_called() @pytest.mark.asyncio - async def test_extend_ads_date_parsing_german_format( - self, - test_bot:KleinanzeigenBot, - base_ad_config_with_id:dict[str, Any] - ) -> None: + async def test_extend_ads_date_parsing_german_format(self, test_bot:KleinanzeigenBot, base_ad_config_with_id:dict[str, Any]) -> None: """Test that extend_ads correctly parses German date format (DD.MM.YYYY).""" ad_cfg = Ad.model_validate(base_ad_config_with_id) @@ -547,15 +504,17 @@ class TestExtendEdgeCases: "id": 12345, "title": "Test Ad Title", "state": "active", - "endDate": "05.02.2026" # German format: DD.MM.YYYY + "endDate": "05.02.2026", # German format: DD.MM.YYYY } ] } - with patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, \ - patch.object(test_bot, "web_sleep", new_callable = AsyncMock), \ - patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad, \ - patch("kleinanzeigen_bot.misc.now") as mock_now: + with ( + patch.object(test_bot, "web_request", new_callable = AsyncMock) as mock_request, + patch.object(test_bot, "web_sleep", new_callable = AsyncMock), + patch.object(test_bot, "extend_ad", new_callable = AsyncMock) as mock_extend_ad, + patch("kleinanzeigen_bot.misc.now") as mock_now, + ): # Mock now() to return a date where 05.02.2026 would be within 8 days # Test mock datetime - timezone not relevant for date comparison test mock_now.return_value = datetime(2026, 1, 28) # noqa: DTZ001 diff --git a/tests/unit/test_extract.py b/tests/unit/test_extract.py index 626ab7d..3e977e0 100644 --- a/tests/unit/test_extract.py +++ b/tests/unit/test_extract.py @@ -662,6 +662,80 @@ class TestAdExtractorNavigation: assert refs == ["/s-anzeige/page-one/111", "/s-anzeige/page-two/222"] next_button_enabled.click.assert_awaited() # triggered once during navigation + @pytest.mark.asyncio + async def test_extract_own_ads_urls_timeout_in_callback(self, test_extractor:AdExtractor) -> None: + """Test that TimeoutError in extract_page_refs callback stops pagination.""" + with ( + patch.object(test_extractor, "web_open", new_callable = AsyncMock), + patch.object(test_extractor, "web_sleep", new_callable = AsyncMock), + patch.object(test_extractor, "web_find", new_callable = AsyncMock) as mock_web_find, + patch.object(test_extractor, "web_find_all", new_callable = AsyncMock, return_value = []), + patch.object(test_extractor, "web_scroll_page_down", new_callable = AsyncMock), + patch.object(test_extractor, "web_execute", new_callable = AsyncMock), + ): + # Setup: ad list container exists, but web_find_all for cardbox raises TimeoutError + ad_list_container_mock = MagicMock() + + call_count = {"count": 0} + + def mock_find_side_effect(*args:Any, **kwargs:Any) -> Element: + call_count["count"] += 1 + if call_count["count"] == 1: + # First call: ad list container (before pagination loop) + return ad_list_container_mock + # Second call: ad list container (inside callback) + return ad_list_container_mock + + mock_web_find.side_effect = mock_find_side_effect + + # Make web_find_all for cardbox raise TimeoutError (simulating missing ad items) + async def mock_find_all_side_effect(*args:Any, **kwargs:Any) -> list[Element]: + raise TimeoutError("Ad items not found") + + with patch.object(test_extractor, "web_find_all", new_callable = AsyncMock, side_effect = mock_find_all_side_effect): + refs = await test_extractor.extract_own_ads_urls() + + # Pagination should stop (TimeoutError in callback returns True) + assert refs == [] + + @pytest.mark.asyncio + async def test_extract_own_ads_urls_generic_exception_in_callback(self, test_extractor:AdExtractor) -> None: + """Test that generic Exception in extract_page_refs callback continues pagination.""" + with ( + patch.object(test_extractor, "web_open", new_callable = AsyncMock), + patch.object(test_extractor, "web_sleep", new_callable = AsyncMock), + patch.object(test_extractor, "web_find", new_callable = AsyncMock) as mock_web_find, + patch.object(test_extractor, "web_scroll_page_down", new_callable = AsyncMock), + ): + # Setup: ad list container exists, but web_find_all raises generic Exception + ad_list_container_mock = MagicMock() + + call_count = {"count": 0} + + def mock_find_side_effect(*args:Any, **kwargs:Any) -> Element: + call_count["count"] += 1 + if call_count["count"] == 1: + # First call: ad list container (before pagination loop) + return ad_list_container_mock + # Second call: pagination check - raise TimeoutError to indicate no pagination + if call_count["count"] == 2: + raise TimeoutError("No pagination") + # Third call: ad list container (inside callback) + return ad_list_container_mock + + mock_web_find.side_effect = mock_find_side_effect + + # Make web_find_all raise a generic exception + async def mock_find_all_side_effect(*args:Any, **kwargs:Any) -> list[Element]: + raise AttributeError("Unexpected error") + + with patch.object(test_extractor, "web_find_all", new_callable = AsyncMock, side_effect = mock_find_all_side_effect): + refs = await test_extractor.extract_own_ads_urls() + + # Pagination should continue despite exception (callback returns False) + # Since it's a single page (no pagination), refs should be empty + assert refs == [] + class TestAdExtractorContent: """Tests for content extraction functionality.""" diff --git a/tests/unit/test_web_scraping_pagination.py b/tests/unit/test_web_scraping_pagination.py new file mode 100644 index 0000000..0feadf5 --- /dev/null +++ b/tests/unit/test_web_scraping_pagination.py @@ -0,0 +1,181 @@ +# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors +# SPDX-License-Identifier: AGPL-3.0-or-later +# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ +"""Tests for the _navigate_paginated_ad_overview helper method.""" + +from typing import Any +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from kleinanzeigen_bot.utils.web_scraping_mixin import By, Element, WebScrapingMixin + + +class TestNavigatePaginatedAdOverview: + """Tests for _navigate_paginated_ad_overview method.""" + + @pytest.mark.asyncio + async def test_single_page_action_succeeds(self) -> None: + """Test pagination on single page where action succeeds.""" + mixin = WebScrapingMixin() + + # Mock callback that succeeds + callback = AsyncMock(return_value = True) + + with ( + patch.object(mixin, "web_open", new_callable = AsyncMock), + patch.object(mixin, "web_sleep", new_callable = AsyncMock), + patch.object(mixin, "web_find", new_callable = AsyncMock) as mock_find, + patch.object(mixin, "web_find_all", new_callable = AsyncMock, return_value = []), + patch.object(mixin, "web_scroll_page_down", new_callable = AsyncMock), + patch.object(mixin, "_timeout", return_value = 10), + ): + # Ad list container exists + mock_find.return_value = MagicMock() + + result = await mixin._navigate_paginated_ad_overview(callback) + + assert result is True + callback.assert_awaited_once_with(1) + + @pytest.mark.asyncio + async def test_single_page_action_returns_false(self) -> None: + """Test pagination on single page where action returns False.""" + mixin = WebScrapingMixin() + + # Mock callback that returns False (doesn't find what it's looking for) + callback = AsyncMock(return_value = False) + + with ( + patch.object(mixin, "web_open", new_callable = AsyncMock), + patch.object(mixin, "web_sleep", new_callable = AsyncMock), + patch.object(mixin, "web_find", new_callable = AsyncMock) as mock_find, + patch.object(mixin, "web_find_all", new_callable = AsyncMock, return_value = []), + patch.object(mixin, "web_scroll_page_down", new_callable = AsyncMock), + patch.object(mixin, "_timeout", return_value = 10), + ): + # Ad list container exists + mock_find.return_value = MagicMock() + + result = await mixin._navigate_paginated_ad_overview(callback) + + assert result is False + callback.assert_awaited_once_with(1) + + @pytest.mark.asyncio + async def test_multi_page_action_succeeds_on_page_2(self) -> None: + """Test pagination across multiple pages where action succeeds on page 2.""" + mixin = WebScrapingMixin() + + # Mock callback that returns False on page 1, True on page 2 + callback_results = [False, True] + callback = AsyncMock(side_effect = callback_results) + + pagination_section = MagicMock() + next_button_enabled = MagicMock() + next_button_enabled.attrs = {} # No "disabled" attribute = enabled + next_button_enabled.click = AsyncMock() + + find_call_count = {"count": 0} + + async def mock_find_side_effect(selector_type:By, selector_value:str, **kwargs:Any) -> Element: + find_call_count["count"] += 1 + if selector_type == By.ID and selector_value == "my-manageitems-adlist": + return MagicMock() # Ad list container + if selector_type == By.CSS_SELECTOR and selector_value == ".Pagination": + return pagination_section + raise TimeoutError("Unexpected find") + + find_all_call_count = {"count": 0} + + async def mock_find_all_side_effect(selector_type:By, selector_value:str, **kwargs:Any) -> list[Element]: + find_all_call_count["count"] += 1 + if selector_type == By.CSS_SELECTOR and 'aria-label="Nächste"' in selector_value: + # Return enabled next button on both calls (initial detection and navigation) + return [next_button_enabled] + return [] + + with ( + patch.object(mixin, "web_open", new_callable = AsyncMock), + patch.object(mixin, "web_sleep", new_callable = AsyncMock), + patch.object(mixin, "web_find", new_callable = AsyncMock, side_effect = mock_find_side_effect), + patch.object(mixin, "web_find_all", new_callable = AsyncMock, side_effect = mock_find_all_side_effect), + patch.object(mixin, "web_scroll_page_down", new_callable = AsyncMock), + patch.object(mixin, "_timeout", return_value = 10), + ): + result = await mixin._navigate_paginated_ad_overview(callback) + + assert result is True + assert callback.await_count == 2 + next_button_enabled.click.assert_awaited_once() + + @pytest.mark.asyncio + async def test_web_open_raises_timeout(self) -> None: + """Test that TimeoutError on web_open is caught and returns False.""" + mixin = WebScrapingMixin() + + callback = AsyncMock() + + with patch.object(mixin, "web_open", new_callable = AsyncMock, side_effect = TimeoutError("Page load timeout")): + result = await mixin._navigate_paginated_ad_overview(callback) + + assert result is False + callback.assert_not_awaited() # Callback should not be called + + @pytest.mark.asyncio + async def test_ad_list_container_not_found(self) -> None: + """Test that missing ad list container returns False.""" + mixin = WebScrapingMixin() + + callback = AsyncMock() + + with ( + patch.object(mixin, "web_open", new_callable = AsyncMock), + patch.object(mixin, "web_sleep", new_callable = AsyncMock), + patch.object(mixin, "web_find", new_callable = AsyncMock, side_effect = TimeoutError("Container not found")), + ): + result = await mixin._navigate_paginated_ad_overview(callback) + + assert result is False + callback.assert_not_awaited() + + @pytest.mark.asyncio + async def test_web_scroll_timeout_continues(self) -> None: + """Test that TimeoutError on web_scroll_page_down is non-fatal and pagination continues.""" + mixin = WebScrapingMixin() + + callback = AsyncMock(return_value = True) + + with ( + patch.object(mixin, "web_open", new_callable = AsyncMock), + patch.object(mixin, "web_sleep", new_callable = AsyncMock), + patch.object(mixin, "web_find", new_callable = AsyncMock, return_value = MagicMock()), + patch.object(mixin, "web_find_all", new_callable = AsyncMock, return_value = []), + patch.object(mixin, "web_scroll_page_down", new_callable = AsyncMock, side_effect = TimeoutError("Scroll timeout")), + patch.object(mixin, "_timeout", return_value = 10), + ): + result = await mixin._navigate_paginated_ad_overview(callback) + + # Should continue and call callback despite scroll timeout + assert result is True + callback.assert_awaited_once_with(1) + + @pytest.mark.asyncio + async def test_page_action_raises_timeout(self) -> None: + """Test that TimeoutError from page_action is caught and returns False.""" + mixin = WebScrapingMixin() + + callback = AsyncMock(side_effect = TimeoutError("Action timeout")) + + with ( + patch.object(mixin, "web_open", new_callable = AsyncMock), + patch.object(mixin, "web_sleep", new_callable = AsyncMock), + patch.object(mixin, "web_find", new_callable = AsyncMock, return_value = MagicMock()), + patch.object(mixin, "web_find_all", new_callable = AsyncMock, return_value = []), + patch.object(mixin, "web_scroll_page_down", new_callable = AsyncMock), + patch.object(mixin, "_timeout", return_value = 10), + ): + result = await mixin._navigate_paginated_ad_overview(callback) + + assert result is False + callback.assert_awaited_once_with(1)