diff --git a/src/kleinanzeigen_bot/__init__.py b/src/kleinanzeigen_bot/__init__.py index 63891c6..bc3044d 100644 --- a/src/kleinanzeigen_bot/__init__.py +++ b/src/kleinanzeigen_bot/__init__.py @@ -1047,10 +1047,97 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904 LOG.debug("No login detected - DOM elements not found and server probe returned %s", state.name) return False + async def _fetch_published_ads(self) -> list[dict[str, Any]]: + """Fetch all published ads, handling API pagination. + + Returns: + List of all published ads across all pages. + """ + ads:list[dict[str, Any]] = [] + page = 1 + MAX_PAGE_LIMIT:Final[int] = 100 + SNIPPET_LIMIT:Final[int] = 500 + + while True: + # Safety check: don't paginate beyond reasonable limit + if page > MAX_PAGE_LIMIT: + LOG.warning("Stopping pagination after %s pages to avoid infinite loop", MAX_PAGE_LIMIT) + break + + try: + response = await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum={page}") + except TimeoutError as ex: + LOG.warning("Pagination request timed out on page %s: %s", page, ex) + break + + content = response.get("content", "") + try: + json_data = json.loads(content) + except json.JSONDecodeError as ex: + if not content: + LOG.warning("Empty JSON response content on page %s", page) + break + snippet = content[:SNIPPET_LIMIT] + ("..." if len(content) > SNIPPET_LIMIT else "") + LOG.warning("Failed to parse JSON response on page %s: %s (content: %s)", page, ex, snippet) + break + + if not isinstance(json_data, dict): + snippet = content[:SNIPPET_LIMIT] + ("..." if len(content) > SNIPPET_LIMIT else "") + LOG.warning("Unexpected JSON payload on page %s (content: %s)", page, snippet) + break + + page_ads = json_data.get("ads", []) + if not isinstance(page_ads, list): + preview = str(page_ads) + if len(preview) > SNIPPET_LIMIT: + preview = preview[:SNIPPET_LIMIT] + "..." + LOG.warning("Unexpected 'ads' type on page %s: %s value: %s", page, type(page_ads).__name__, preview) + break + + ads.extend(page_ads) + + paging = json_data.get("paging") + if not isinstance(paging, dict): + LOG.debug("No paging dict found on page %s, assuming single page", page) + break + + # Use only real API fields (confirmed from production data) + current_page_num = misc.coerce_page_number(paging.get("pageNum")) + total_pages = misc.coerce_page_number(paging.get("last")) + + if current_page_num is None: + LOG.warning("Invalid 'pageNum' in paging info: %s, stopping pagination", paging.get("pageNum")) + break + + if total_pages is None: + LOG.debug("No pagination info found, assuming single page") + break + + # Stop if reached last page + if current_page_num >= total_pages: + LOG.info("Reached last page %s of %s, stopping pagination", current_page_num, total_pages) + break + + # Safety: stop if no ads returned + if len(page_ads) == 0: + LOG.info("No ads found on page %s, stopping pagination", page) + break + + LOG.debug("Page %s: fetched %s ads (numFound=%s)", page, len(page_ads), paging.get("numFound")) + + # Use API's next field for navigation (more robust than our counter) + next_page = misc.coerce_page_number(paging.get("next")) + if next_page is None: + LOG.warning("Invalid 'next' page value in paging info: %s, stopping pagination", paging.get("next")) + break + page = next_page + + return ads + async def delete_ads(self, ad_cfgs:list[tuple[str, Ad, dict[str, Any]]]) -> None: count = 0 - published_ads = json.loads((await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT"))["content"])["ads"] + published_ads = await self._fetch_published_ads() for ad_file, ad_cfg, _ad_cfg_orig in ad_cfgs: count += 1 @@ -1094,7 +1181,7 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904 async def extend_ads(self, ad_cfgs:list[tuple[str, Ad, dict[str, Any]]]) -> None: """Extends ads that are close to expiry.""" # Fetch currently published ads from API - published_ads = json.loads((await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT"))["content"])["ads"] + published_ads = await self._fetch_published_ads() # Filter ads that need extension ads_to_extend = [] @@ -1213,7 +1300,7 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904 failed_count = 0 max_retries = 3 - published_ads = json.loads((await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT"))["content"])["ads"] + published_ads = await self._fetch_published_ads() for ad_file, ad_cfg, ad_cfg_orig in ad_cfgs: LOG.info("Processing %s/%s: '%s' from [%s]...", count + 1, len(ad_cfgs), ad_cfg.title, ad_file) @@ -1561,12 +1648,13 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904 """ count = 0 - published_ads = json.loads((await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT"))["content"])["ads"] + published_ads = await self._fetch_published_ads() for ad_file, ad_cfg, ad_cfg_orig in ad_cfgs: ad = next((ad for ad in published_ads if ad["id"] == ad_cfg.id), None) if not ad: + LOG.warning(" -> SKIPPED: ad '%s' (ID: %s) not found in published ads", ad_cfg.title, ad_cfg.id) continue LOG.info("Processing %s/%s: '%s' from [%s]...", count + 1, len(ad_cfgs), ad_cfg.title, ad_file) diff --git a/src/kleinanzeigen_bot/extract.py b/src/kleinanzeigen_bot/extract.py index 30cc653..3b4a444 100644 --- a/src/kleinanzeigen_bot/extract.py +++ b/src/kleinanzeigen_bot/extract.py @@ -25,6 +25,7 @@ __all__ = [ LOG:Final[loggers.Logger] = loggers.get_logger(__name__) _BREADCRUMB_MIN_DEPTH:Final[int] = 2 +_SELL_DIRECTLY_MAX_PAGE_LIMIT:Final[int] = 100 BREADCRUMB_RE = re.compile(r"/c(\d+)") @@ -525,19 +526,56 @@ class AdExtractor(WebScrapingMixin): LOG.warning("Could not extract ad ID from URL: %s", self.page.url) return None - # Fetch the management JSON data using web_request - response = await self.web_request("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json") - json_data = json.loads(response["content"]) + # Fetch the management JSON data using web_request with pagination support + page = 1 - # Find the current ad in the ads list - if isinstance(json_data, dict) and "ads" in json_data: - ads_list = json_data["ads"] - if isinstance(ads_list, list): - # Filter ads to find the current ad by ID - current_ad = next((ad for ad in ads_list if ad.get("id") == current_ad_id), None) - if current_ad and "buyNowEligible" in current_ad: - buy_now_eligible = current_ad["buyNowEligible"] - return buy_now_eligible if isinstance(buy_now_eligible, bool) else None + while True: + # Safety check: don't paginate beyond reasonable limit + if page > _SELL_DIRECTLY_MAX_PAGE_LIMIT: + LOG.warning("Stopping pagination after %s pages to avoid infinite loop", _SELL_DIRECTLY_MAX_PAGE_LIMIT) + break + + response = await self.web_request(f"https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum={page}") + + try: + json_data = json.loads(response["content"]) + except json.JSONDecodeError as ex: + LOG.debug("Failed to parse JSON response on page %s: %s", page, ex) + break + + # Find the current ad in the ads list + if isinstance(json_data, dict) and "ads" in json_data: + ads_list = json_data["ads"] + if isinstance(ads_list, list): + # Filter ads to find the current ad by ID + current_ad = next((ad for ad in ads_list if ad.get("id") == current_ad_id), None) + if current_ad and "buyNowEligible" in current_ad: + buy_now_eligible = current_ad["buyNowEligible"] + return buy_now_eligible if isinstance(buy_now_eligible, bool) else None + + # Check if we need to fetch more pages + paging = json_data.get("paging") if isinstance(json_data, dict) else None + if not isinstance(paging, dict): + break + + # Parse pagination info using real API fields + current_page_num = misc.coerce_page_number(paging.get("pageNum")) + total_pages = misc.coerce_page_number(paging.get("last")) + + if current_page_num is None: + LOG.warning("Invalid 'pageNum' in paging info: %s, stopping pagination", paging.get("pageNum")) + break + + # Stop if we've reached the last page + if total_pages is None or current_page_num >= total_pages: + break + + # Use API's next field for navigation (more robust than our counter) + next_page = misc.coerce_page_number(paging.get("next")) + if next_page is None: + LOG.warning("Invalid 'next' page value in paging info: %s, stopping pagination", paging.get("next")) + break + page = next_page # If the key doesn't exist or ad not found, return None (unknown) return None diff --git a/src/kleinanzeigen_bot/resources/translations.de.yaml b/src/kleinanzeigen_bot/resources/translations.de.yaml index 9dde85c..1f155b6 100644 --- a/src/kleinanzeigen_bot/resources/translations.de.yaml +++ b/src/kleinanzeigen_bot/resources/translations.de.yaml @@ -31,6 +31,18 @@ kleinanzeigen_bot/__init__.py: "App version: %s": "App Version: %s" "Python version: %s": "Python Version: %s" + _fetch_published_ads: + "Empty JSON response content on page %s": "Leerer JSON-Antwortinhalt auf Seite %s" + "Failed to parse JSON response on page %s: %s (content: %s)": "Fehler beim Parsen der JSON-Antwort auf Seite %s: %s (Inhalt: %s)" + "Stopping pagination after %s pages to avoid infinite loop": "Stoppe die Seitenaufschaltung nach %s Seiten, um eine Endlosschleife zu vermeiden" + "Pagination request timed out on page %s: %s": "Zeitueberschreitung bei der Seitenabfrage auf Seite %s: %s" + "Unexpected JSON payload on page %s (content: %s)": "Unerwartete JSON-Antwort auf Seite %s (Inhalt: %s)" + "Unexpected 'ads' type on page %s: %s value: %s": "Unerwarteter 'ads'-Typ auf Seite %s: %s Wert: %s" + "Reached last page %s of %s, stopping pagination": "Letzte Seite %s von %s erreicht, beende Paginierung" + "No ads found on page %s, stopping pagination": "Keine Anzeigen auf Seite %s gefunden, beende Paginierung" + "Invalid 'next' page value in paging info: %s, stopping pagination": "Ungültiger 'next'-Seitenwert in Paginierungsinfo: %s, beende Paginierung" + "Invalid 'pageNum' in paging info: %s, stopping pagination": "Ungültiger 'pageNum'-Wert in Paginierungsinfo: %s, beende Paginierung" + __check_ad_changed: "Hash comparison for [%s]:": "Hash-Vergleich für [%s]:" " Stored hash: %s": " Gespeicherter Hash: %s" @@ -162,6 +174,7 @@ kleinanzeigen_bot/__init__.py: update_ads: "Processing %s/%s: '%s' from [%s]...": "Verarbeite %s/%s: '%s' von [%s]..." "Skipping because ad is reserved": "Überspringen, da Anzeige reserviert ist" + " -> SKIPPED: ad '%s' (ID: %s) not found in published ads": " -> ÜBERSPRUNGEN: Anzeige '%s' (ID: %s) nicht in veröffentlichten Anzeigen gefunden" "DONE: updated %s": "FERTIG: %s aktualisiert" "ad": "Anzeige" @@ -299,6 +312,9 @@ kleinanzeigen_bot/extract.py: _extract_sell_directly_from_ad_page: "Could not extract ad ID from URL: %s": "Konnte Anzeigen-ID nicht aus der URL extrahieren: %s" + "Stopping pagination after %s pages to avoid infinite loop": "Stoppe die Seitenaufschaltung nach %s Seiten, um eine Endlosschleife zu vermeiden" + "Invalid 'next' page value in paging info: %s, stopping pagination": "Ungültiger 'next'-Seitenwert in Paginierungsinfo: %s, beende Paginierung" + "Invalid 'pageNum' in paging info: %s, stopping pagination": "Ungültiger 'pageNum'-Wert in Paginierungsinfo: %s, beende Paginierung" ################################################# kleinanzeigen_bot/utils/i18n.py: diff --git a/src/kleinanzeigen_bot/utils/misc.py b/src/kleinanzeigen_bot/utils/misc.py index 767d738..4645d5d 100644 --- a/src/kleinanzeigen_bot/utils/misc.py +++ b/src/kleinanzeigen_bot/utils/misc.py @@ -16,12 +16,55 @@ from . import i18n T = TypeVar("T") +def coerce_page_number(value:Any) -> int | None: + """Safely coerce a value to int or return None if conversion fails. + + Whole-number floats are accepted; non-integer floats are rejected. + + Args: + value: Value to coerce to int (can be int, str, float, or any type) + + Returns: + int if value can be safely coerced, None otherwise + + Examples: + >>> coerce_page_number(1) + 1 + >>> coerce_page_number("2") + 2 + >>> coerce_page_number(3.0) + 3 + >>> coerce_page_number(3.5) is None + True + >>> coerce_page_number(True) is None # Not 1! + True + >>> coerce_page_number(None) is None + True + >>> coerce_page_number("invalid") is None + True + >>> coerce_page_number([1, 2, 3]) is None + True + """ + if value is None: + return None + if isinstance(value, bool): + return None + if isinstance(value, float): + if value.is_integer(): + return int(value) + return None + try: + return int(value) + except (TypeError, ValueError): + return None + + def ensure( - condition:Any | bool | Callable[[], bool], # noqa: FBT001 Boolean-typed positional argument in function definition - error_message:str, - timeout:float = 5, - poll_frequency:float = 0.5 - ) -> None: + condition:Any | bool | Callable[[], bool], # noqa: FBT001 Boolean-typed positional argument in function definition + error_message:str, + timeout:float = 5, + poll_frequency:float = 0.5, +) -> None: """ Ensure a condition is true, retrying until timeout. @@ -152,12 +195,7 @@ def parse_decimal(number:float | int | str) -> decimal.Decimal: raise decimal.DecimalException(f"Invalid number format: {number}") from ex -def parse_datetime( - date:datetime | str | None, - *, - add_timezone_if_missing:bool = True, - use_local_timezone:bool = True -) -> datetime | None: +def parse_datetime(date:datetime | str | None, *, add_timezone_if_missing:bool = True, use_local_timezone:bool = True) -> datetime | None: """ Parses a datetime object or ISO-formatted string. @@ -184,10 +222,7 @@ def parse_datetime( dt = date if isinstance(date, datetime) else datetime.fromisoformat(date) if dt.tzinfo is None and add_timezone_if_missing: - dt = ( - dt.astimezone() if use_local_timezone - else dt.replace(tzinfo = timezone.utc) - ) + dt = dt.astimezone() if use_local_timezone else dt.replace(tzinfo = timezone.utc) return dt diff --git a/tests/unit/test_extract.py b/tests/unit/test_extract.py index 3e977e0..26c7bc7 100644 --- a/tests/unit/test_extract.py +++ b/tests/unit/test_extract.py @@ -10,7 +10,7 @@ from urllib.error import URLError import pytest -from kleinanzeigen_bot.extract import AdExtractor +import kleinanzeigen_bot.extract as extract_module from kleinanzeigen_bot.model.ad_model import AdPartial, ContactPartial from kleinanzeigen_bot.model.config_model import Config, DownloadConfig from kleinanzeigen_bot.utils.web_scraping_mixin import Browser, By, Element @@ -39,22 +39,22 @@ class _TestCaseDict(TypedDict): # noqa: PYI049 Private TypedDict `...` is never @pytest.fixture -def test_extractor(browser_mock:MagicMock, test_bot_config:Config) -> AdExtractor: - """Provides a fresh AdExtractor instance for testing. +def test_extractor(browser_mock:MagicMock, test_bot_config:Config) -> extract_module.AdExtractor: + """Provides a fresh extract_module.AdExtractor instance for testing. Dependencies: - browser_mock: Used to mock browser interactions - test_bot_config: Used to initialize the extractor with a valid configuration """ - return AdExtractor(browser_mock, test_bot_config) + return extract_module.AdExtractor(browser_mock, test_bot_config) class TestAdExtractorBasics: - """Basic synchronous tests for AdExtractor.""" + """Basic synchronous tests for extract_module.AdExtractor.""" def test_constructor(self, browser_mock:MagicMock, test_bot_config:Config) -> None: - """Test the constructor of AdExtractor""" - extractor = AdExtractor(browser_mock, test_bot_config) + """Test the constructor of extract_module.AdExtractor""" + extractor = extract_module.AdExtractor(browser_mock, test_bot_config) assert extractor.browser == browser_mock assert extractor.config == test_bot_config @@ -67,7 +67,7 @@ class TestAdExtractorBasics: ("https://www.kleinanzeigen.de/invalid-url", -1), ], ) - def test_extract_ad_id_from_ad_url(self, test_extractor:AdExtractor, url:str, expected_id:int) -> None: + def test_extract_ad_id_from_ad_url(self, test_extractor:extract_module.AdExtractor, url:str, expected_id:int) -> None: """Test extraction of ad ID from different URL formats.""" assert test_extractor.extract_ad_id_from_ad_url(url) == expected_id @@ -167,7 +167,7 @@ class TestAdExtractorBasics: patch("kleinanzeigen_bot.extract.open", mock_open()), patch("kleinanzeigen_bot.extract.shutil.copyfileobj"), ): - result = AdExtractor._download_and_save_image_sync("http://example.com/image.jpg", str(test_dir), "test_", 1) + result = extract_module.AdExtractor._download_and_save_image_sync("http://example.com/image.jpg", str(test_dir), "test_", 1) assert result is not None assert result.endswith((".jpe", ".jpeg", ".jpg")) @@ -176,7 +176,7 @@ class TestAdExtractorBasics: def test_download_and_save_image_sync_failure(self, tmp_path:Path) -> None: """Test _download_and_save_image_sync with download failure.""" with patch("kleinanzeigen_bot.extract.urllib_request.urlopen", side_effect = URLError("Network error")): - result = AdExtractor._download_and_save_image_sync("http://example.com/image.jpg", str(tmp_path), "test_", 1) + result = extract_module.AdExtractor._download_and_save_image_sync("http://example.com/image.jpg", str(tmp_path), "test_", 1) assert result is None @@ -196,7 +196,9 @@ class TestAdExtractorPricing: ) @pytest.mark.asyncio # pylint: disable=protected-access - async def test_extract_pricing_info(self, test_extractor:AdExtractor, price_text:str, expected_price:int | None, expected_type:str) -> None: + async def test_extract_pricing_info( + self, test_extractor:extract_module.AdExtractor, price_text:str, expected_price:int | None, expected_type:str + ) -> None: """Test price extraction with different formats""" with patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = price_text): price, price_type = await test_extractor._extract_pricing_info_from_ad_page() @@ -205,7 +207,7 @@ class TestAdExtractorPricing: @pytest.mark.asyncio # pylint: disable=protected-access - async def test_extract_pricing_info_timeout(self, test_extractor:AdExtractor) -> None: + async def test_extract_pricing_info_timeout(self, test_extractor:extract_module.AdExtractor) -> None: """Test price extraction when element is not found""" with patch.object(test_extractor, "web_text", new_callable = AsyncMock, side_effect = TimeoutError): price, price_type = await test_extractor._extract_pricing_info_from_ad_page() @@ -226,7 +228,9 @@ class TestAdExtractorShipping: ) @pytest.mark.asyncio # pylint: disable=protected-access - async def test_extract_shipping_info(self, test_extractor:AdExtractor, shipping_text:str, expected_type:str, expected_cost:float | None) -> None: + async def test_extract_shipping_info( + self, test_extractor:extract_module.AdExtractor, shipping_text:str, expected_type:str, expected_cost:float | None + ) -> None: """Test shipping info extraction with different text formats.""" with ( patch.object(test_extractor, "page", MagicMock()), @@ -250,7 +254,7 @@ class TestAdExtractorShipping: @pytest.mark.asyncio # pylint: disable=protected-access - async def test_extract_shipping_info_with_options(self, test_extractor:AdExtractor) -> None: + async def test_extract_shipping_info_with_options(self, test_extractor:extract_module.AdExtractor) -> None: """Test shipping info extraction with shipping options.""" shipping_response = { "content": json.dumps({"data": {"shippingOptionsResponse": {"options": [{"id": "DHL_001", "priceInEuroCent": 549, "packageSize": "SMALL"}]}}}) @@ -269,7 +273,7 @@ class TestAdExtractorShipping: @pytest.mark.asyncio # pylint: disable=protected-access - async def test_extract_shipping_info_with_all_matching_options(self, test_extractor:AdExtractor) -> None: + async def test_extract_shipping_info_with_all_matching_options(self, test_extractor:extract_module.AdExtractor) -> None: """Test shipping info extraction with all matching options enabled.""" shipping_response = { "content": json.dumps( @@ -306,7 +310,7 @@ class TestAdExtractorShipping: @pytest.mark.asyncio # pylint: disable=protected-access - async def test_extract_shipping_info_with_all_matching_options_no_match(self, test_extractor:AdExtractor) -> None: + async def test_extract_shipping_info_with_all_matching_options_no_match(self, test_extractor:extract_module.AdExtractor) -> None: """Test shipping extraction when include-all is enabled but no option matches the price.""" shipping_response = { "content": json.dumps( @@ -338,7 +342,7 @@ class TestAdExtractorShipping: @pytest.mark.asyncio # pylint: disable=protected-access - async def test_extract_shipping_info_with_excluded_options(self, test_extractor:AdExtractor) -> None: + async def test_extract_shipping_info_with_excluded_options(self, test_extractor:extract_module.AdExtractor) -> None: """Test shipping info extraction with excluded options.""" shipping_response = { "content": json.dumps( @@ -375,7 +379,7 @@ class TestAdExtractorShipping: @pytest.mark.asyncio # pylint: disable=protected-access - async def test_extract_shipping_info_with_excluded_matching_option(self, test_extractor:AdExtractor) -> None: + async def test_extract_shipping_info_with_excluded_matching_option(self, test_extractor:extract_module.AdExtractor) -> None: """Test shipping info extraction when the matching option is excluded.""" shipping_response = { "content": json.dumps( @@ -408,7 +412,7 @@ class TestAdExtractorShipping: @pytest.mark.asyncio # pylint: disable=protected-access - async def test_extract_shipping_info_with_no_matching_option(self, test_extractor:AdExtractor) -> None: + async def test_extract_shipping_info_with_no_matching_option(self, test_extractor:extract_module.AdExtractor) -> None: """Test shipping info extraction when price exists but NO matching option in API response.""" shipping_response = { "content": json.dumps( @@ -438,7 +442,7 @@ class TestAdExtractorShipping: @pytest.mark.asyncio # pylint: disable=protected-access - async def test_extract_shipping_info_timeout(self, test_extractor:AdExtractor) -> None: + async def test_extract_shipping_info_timeout(self, test_extractor:extract_module.AdExtractor) -> None: """Test shipping info extraction when shipping element is missing (TimeoutError).""" with ( patch.object(test_extractor, "page", MagicMock()), @@ -455,7 +459,7 @@ class TestAdExtractorNavigation: """Tests for navigation related functionality.""" @pytest.mark.asyncio - async def test_navigate_to_ad_page_with_url(self, test_extractor:AdExtractor) -> None: + async def test_navigate_to_ad_page_with_url(self, test_extractor:extract_module.AdExtractor) -> None: """Test navigation to ad page using a URL.""" page_mock = AsyncMock() page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345" @@ -470,7 +474,7 @@ class TestAdExtractorNavigation: mock_web_open.assert_called_with("https://www.kleinanzeigen.de/s-anzeige/test/12345") @pytest.mark.asyncio - async def test_navigate_to_ad_page_with_id(self, test_extractor:AdExtractor) -> None: + async def test_navigate_to_ad_page_with_id(self, test_extractor:extract_module.AdExtractor) -> None: """Test navigation to ad page using an ID.""" ad_id = 12345 page_mock = AsyncMock() @@ -496,7 +500,7 @@ class TestAdExtractorNavigation: popup_close_mock.click.assert_awaited_once() @pytest.mark.asyncio - async def test_navigate_to_ad_page_with_popup(self, test_extractor:AdExtractor) -> None: + async def test_navigate_to_ad_page_with_popup(self, test_extractor:extract_module.AdExtractor) -> None: """Test navigation to ad page with popup handling.""" page_mock = AsyncMock() page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345" @@ -518,7 +522,7 @@ class TestAdExtractorNavigation: mock_web_click.assert_called_with(By.CLASS_NAME, "mfp-close") @pytest.mark.asyncio - async def test_navigate_to_ad_page_invalid_id(self, test_extractor:AdExtractor) -> None: + async def test_navigate_to_ad_page_invalid_id(self, test_extractor:extract_module.AdExtractor) -> None: """Test navigation to ad page with invalid ID.""" page_mock = AsyncMock() page_mock.url = "https://www.kleinanzeigen.de/s-suchen.html?k0" @@ -538,7 +542,7 @@ class TestAdExtractorNavigation: assert result is False @pytest.mark.asyncio - async def test_extract_own_ads_urls(self, test_extractor:AdExtractor) -> None: + async def test_extract_own_ads_urls(self, test_extractor:extract_module.AdExtractor) -> None: """Test extraction of own ads URLs - basic test.""" with ( patch.object(test_extractor, "web_open", new_callable = AsyncMock), @@ -608,7 +612,7 @@ class TestAdExtractorNavigation: ) @pytest.mark.asyncio - async def test_extract_own_ads_urls_paginates_with_enabled_next_button(self, test_extractor:AdExtractor) -> None: + async def test_extract_own_ads_urls_paginates_with_enabled_next_button(self, test_extractor:extract_module.AdExtractor) -> None: """Ensure the paginator clicks the first enabled next button and advances.""" ad_list_container_mock = MagicMock() pagination_section_mock = MagicMock() @@ -663,7 +667,7 @@ class TestAdExtractorNavigation: next_button_enabled.click.assert_awaited() # triggered once during navigation @pytest.mark.asyncio - async def test_extract_own_ads_urls_timeout_in_callback(self, test_extractor:AdExtractor) -> None: + async def test_extract_own_ads_urls_timeout_in_callback(self, test_extractor:extract_module.AdExtractor) -> None: """Test that TimeoutError in extract_page_refs callback stops pagination.""" with ( patch.object(test_extractor, "web_open", new_callable = AsyncMock), @@ -699,7 +703,7 @@ class TestAdExtractorNavigation: assert refs == [] @pytest.mark.asyncio - async def test_extract_own_ads_urls_generic_exception_in_callback(self, test_extractor:AdExtractor) -> None: + async def test_extract_own_ads_urls_generic_exception_in_callback(self, test_extractor:extract_module.AdExtractor) -> None: """Test that generic Exception in extract_page_refs callback continues pagination.""" with ( patch.object(test_extractor, "web_open", new_callable = AsyncMock), @@ -742,15 +746,9 @@ class TestAdExtractorContent: # pylint: disable=protected-access - @pytest.fixture - def extractor_with_config(self) -> AdExtractor: - """Create extractor with specific config for testing prefix/suffix handling.""" - browser_mock = MagicMock(spec = Browser) - return AdExtractor(browser_mock, Config()) # Empty config, will be overridden in tests - @pytest.mark.asyncio async def test_extract_description_with_affixes( - self, test_extractor:AdExtractor, description_test_cases:list[tuple[dict[str, Any], str, str]], test_bot_config:Config + self, test_extractor:extract_module.AdExtractor, description_test_cases:list[tuple[dict[str, Any], str, str]], test_bot_config:Config ) -> None: """Test extraction of description with various prefix/suffix configurations.""" # Mock the page @@ -783,7 +781,7 @@ class TestAdExtractorContent: assert info.description == raw_description @pytest.mark.asyncio - async def test_extract_description_with_affixes_timeout(self, test_extractor:AdExtractor) -> None: + async def test_extract_description_with_affixes_timeout(self, test_extractor:extract_module.AdExtractor) -> None: """Test handling of timeout when extracting description.""" # Mock the page page_mock = MagicMock() @@ -816,7 +814,7 @@ class TestAdExtractorContent: pass @pytest.mark.asyncio - async def test_extract_description_with_affixes_no_affixes(self, test_extractor:AdExtractor) -> None: + async def test_extract_description_with_affixes_no_affixes(self, test_extractor:extract_module.AdExtractor) -> None: """Test extraction of description without any affixes in config.""" # Mock the page page_mock = MagicMock() @@ -846,7 +844,7 @@ class TestAdExtractorContent: assert info.description == raw_description @pytest.mark.asyncio - async def test_extract_sell_directly(self, test_extractor:AdExtractor) -> None: + async def test_extract_sell_directly(self, test_extractor:extract_module.AdExtractor) -> None: """Test extraction of sell directly option.""" # Mock the page URL to extract the ad ID test_extractor.page = MagicMock() @@ -856,6 +854,8 @@ class TestAdExtractorContent: test_extractor.page.url = "https://www.kleinanzeigen.de/invalid-url" with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: result = await test_extractor._extract_sell_directly_from_ad_page() + # When pageNum is missing from the API response, coerce_page_number() returns None, + # causing the pagination loop to break and return None without making a web_request call. assert result is None # Verify web_request was NOT called when URL is invalid @@ -873,8 +873,8 @@ class TestAdExtractorContent: result = await test_extractor._extract_sell_directly_from_ad_page() assert result is True - # Verify web_request was called with the correct URL - mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json") + # Verify web_request was called with the correct URL (now includes pagination) + mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") # Test successful extraction with buyNowEligible = false with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: @@ -885,8 +885,35 @@ class TestAdExtractorContent: result = await test_extractor._extract_sell_directly_from_ad_page() assert result is False - # Verify web_request was called with the correct URL - mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json") + # Verify web_request was called with the correct URL (now includes pagination) + mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") + + # Test pagination: ad found on second page + with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: + mock_web_request.side_effect = [ + { + "content": json.dumps( + { + "ads": [{"id": 987654321, "buyNowEligible": False}], + "paging": {"pageNum": 1, "last": 2, "next": 2}, + } + ) + }, + { + "content": json.dumps( + { + "ads": [{"id": 123456789, "buyNowEligible": True}], + "paging": {"pageNum": 2, "last": 2}, + } + ) + }, + ] + + result = await test_extractor._extract_sell_directly_from_ad_page() + assert result is True + + mock_web_request.assert_any_await("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") + mock_web_request.assert_any_await("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=2") # Test when buyNowEligible is missing from the current ad with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: @@ -904,8 +931,8 @@ class TestAdExtractorContent: result = await test_extractor._extract_sell_directly_from_ad_page() assert result is None - # Verify web_request was called with the correct URL - mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json") + # Verify web_request was called with the correct URL (now includes pagination) + mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") # Test when current ad is not found in the ads list with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: @@ -914,16 +941,16 @@ class TestAdExtractorContent: result = await test_extractor._extract_sell_directly_from_ad_page() assert result is None - # Verify web_request was called with the correct URL - mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json") + # Verify web_request was called with the correct URL (now includes pagination) + mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") # Test timeout error with patch.object(test_extractor, "web_request", new_callable = AsyncMock, side_effect = TimeoutError) as mock_web_request: result = await test_extractor._extract_sell_directly_from_ad_page() assert result is None - # Verify web_request was called with the correct URL - mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json") + # Verify web_request was called with the correct URL (now includes pagination) + mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") # Test JSON decode error with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: @@ -932,8 +959,8 @@ class TestAdExtractorContent: result = await test_extractor._extract_sell_directly_from_ad_page() assert result is None - # Verify web_request was called with the correct URL - mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json") + # Verify web_request was called with the correct URL (now includes pagination) + mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") # Test when ads list is empty with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: @@ -942,8 +969,8 @@ class TestAdExtractorContent: result = await test_extractor._extract_sell_directly_from_ad_page() assert result is None - # Verify web_request was called with the correct URL - mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json") + # Verify web_request was called with the correct URL (now includes pagination) + mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") # Test when buyNowEligible is a non-boolean value (string "true") with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: @@ -954,8 +981,8 @@ class TestAdExtractorContent: result = await test_extractor._extract_sell_directly_from_ad_page() assert result is None - # Verify web_request was called with the correct URL - mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json") + # Verify web_request was called with the correct URL (now includes pagination) + mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") # Test when buyNowEligible is a non-boolean value (integer 1) with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: @@ -966,8 +993,8 @@ class TestAdExtractorContent: result = await test_extractor._extract_sell_directly_from_ad_page() assert result is None - # Verify web_request was called with the correct URL - mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json") + # Verify web_request was called with the correct URL (now includes pagination) + mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") # Test when json_data is not a dict (covers line 622) with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: @@ -976,8 +1003,8 @@ class TestAdExtractorContent: result = await test_extractor._extract_sell_directly_from_ad_page() assert result is None - # Verify web_request was called with the correct URL - mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json") + # Verify web_request was called with the correct URL (now includes pagination) + mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") # Test when json_data is a dict but doesn't have "ads" key (covers line 622) with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: @@ -986,8 +1013,8 @@ class TestAdExtractorContent: result = await test_extractor._extract_sell_directly_from_ad_page() assert result is None - # Verify web_request was called with the correct URL - mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json") + # Verify web_request was called with the correct URL (now includes pagination) + mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") # Test when ads_list is not a list (covers line 624) with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: @@ -996,22 +1023,119 @@ class TestAdExtractorContent: result = await test_extractor._extract_sell_directly_from_ad_page() assert result is None - # Verify web_request was called with the correct URL - mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json") + # Verify web_request was called with the correct URL (now includes pagination) + mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") + + @pytest.mark.asyncio + async def test_extract_sell_directly_page_limit_zero(self, test_extractor:extract_module.AdExtractor, monkeypatch:pytest.MonkeyPatch) -> None: + test_extractor.page = MagicMock() + test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789" + monkeypatch.setattr(extract_module, "_SELL_DIRECTLY_MAX_PAGE_LIMIT", 0) + + with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: + result = await test_extractor._extract_sell_directly_from_ad_page() + assert result is None + mock_web_request.assert_not_awaited() + + @pytest.mark.asyncio + async def test_extract_sell_directly_paging_key_resolution(self, test_extractor:extract_module.AdExtractor) -> None: + test_extractor.page = MagicMock() + test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789" + + with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: + mock_web_request.return_value = { + "content": json.dumps( + { + "ads": [{"id": 987654321, "buyNowEligible": True}], + "paging": {"pageNum": None, "page": "1", "currentPage": None, "last": 0}, + } + ) + } + + result = await test_extractor._extract_sell_directly_from_ad_page() + assert result is None + + @pytest.mark.asyncio + async def test_extract_sell_directly_current_page_minus_one(self, test_extractor:extract_module.AdExtractor) -> None: + test_extractor.page = MagicMock() + test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789" + + with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: + mock_web_request.side_effect = [ + {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": 1, "last": 2, "next": 2}})}, + {"content": json.dumps({"ads": []})}, + ] + + result = await test_extractor._extract_sell_directly_from_ad_page() + assert result is None + mock_web_request.assert_any_await("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") + mock_web_request.assert_any_await("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=2") + + @pytest.mark.asyncio + async def test_extract_sell_directly_invalid_page_number_type(self, test_extractor:extract_module.AdExtractor) -> None: + test_extractor.page = MagicMock() + test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789" + + with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: + mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": [1], "last": "invalid"}})} + + result = await test_extractor._extract_sell_directly_from_ad_page() + assert result is None + + @pytest.mark.asyncio + async def test_extract_sell_directly_float_page_numbers(self, test_extractor:extract_module.AdExtractor) -> None: + test_extractor.page = MagicMock() + test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789" + + with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: + mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": 1.5, "last": 0}})} + + result = await test_extractor._extract_sell_directly_from_ad_page() + assert result is None + + with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: + mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": 2.0, "last": 1}})} + + result = await test_extractor._extract_sell_directly_from_ad_page() + assert result is None + + @pytest.mark.asyncio + async def test_extract_sell_directly_page_limit(self, test_extractor:extract_module.AdExtractor, monkeypatch:pytest.MonkeyPatch) -> None: + test_extractor.page = MagicMock() + test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789" + monkeypatch.setattr(extract_module, "_SELL_DIRECTLY_MAX_PAGE_LIMIT", 1) + + with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: + mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": 1, "last": 2}})} + + result = await test_extractor._extract_sell_directly_from_ad_page() + assert result is None + mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") + + @pytest.mark.asyncio + async def test_extract_sell_directly_paging_helper_edge_cases(self, test_extractor:extract_module.AdExtractor) -> None: + test_extractor.page = MagicMock() + test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789" + + with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: + mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {}})} + + result = await test_extractor._extract_sell_directly_from_ad_page() + assert result is None class TestAdExtractorCategory: """Tests for category extraction functionality.""" @pytest.fixture - def extractor(self, test_bot_config:Config) -> AdExtractor: + def extractor(self, test_bot_config:Config) -> extract_module.AdExtractor: browser_mock = MagicMock(spec = Browser) config = test_bot_config.with_values({"ad_defaults": {"description": {"prefix": "Test Prefix", "suffix": "Test Suffix"}}}) - return AdExtractor(browser_mock, config) + return extract_module.AdExtractor(browser_mock, config) @pytest.mark.asyncio # pylint: disable=protected-access - async def test_extract_category(self, extractor:AdExtractor) -> None: + async def test_extract_category(self, extractor:extract_module.AdExtractor) -> None: """Test category extraction from breadcrumb.""" category_line = MagicMock() first_part = MagicMock() @@ -1031,7 +1155,7 @@ class TestAdExtractorCategory: @pytest.mark.asyncio # pylint: disable=protected-access - async def test_extract_category_single_identifier(self, extractor:AdExtractor) -> None: + async def test_extract_category_single_identifier(self, extractor:extract_module.AdExtractor) -> None: """Test category extraction when only a single breadcrumb code exists.""" category_line = MagicMock() first_part = MagicMock() @@ -1049,7 +1173,7 @@ class TestAdExtractorCategory: @pytest.mark.asyncio # pylint: disable=protected-access - async def test_extract_category_fallback_to_legacy_selectors(self, extractor:AdExtractor, caplog:pytest.LogCaptureFixture) -> None: + async def test_extract_category_fallback_to_legacy_selectors(self, extractor:extract_module.AdExtractor, caplog:pytest.LogCaptureFixture) -> None: """Test category extraction when breadcrumb links are not available and legacy selectors are used.""" category_line = MagicMock() first_part = MagicMock() @@ -1075,7 +1199,7 @@ class TestAdExtractorCategory: mock_web_find_all.assert_awaited_once_with(By.CSS_SELECTOR, "a", parent = category_line) @pytest.mark.asyncio - async def test_extract_category_legacy_selectors_timeout(self, extractor:AdExtractor, caplog:pytest.LogCaptureFixture) -> None: + async def test_extract_category_legacy_selectors_timeout(self, extractor:extract_module.AdExtractor, caplog:pytest.LogCaptureFixture) -> None: """Ensure fallback timeout logs the error and re-raises with translated message.""" category_line = MagicMock() @@ -1096,7 +1220,7 @@ class TestAdExtractorCategory: @pytest.mark.asyncio # pylint: disable=protected-access - async def test_extract_special_attributes_empty(self, extractor:AdExtractor) -> None: + async def test_extract_special_attributes_empty(self, extractor:extract_module.AdExtractor) -> None: """Test extraction of special attributes when empty.""" with patch.object(extractor, "web_execute", new_callable = AsyncMock) as mock_web_execute: mock_web_execute.return_value = {"universalAnalyticsOpts": {"dimensions": {"ad_attributes": ""}}} @@ -1105,7 +1229,7 @@ class TestAdExtractorCategory: @pytest.mark.asyncio # pylint: disable=protected-access - async def test_extract_special_attributes_not_empty(self, extractor:AdExtractor) -> None: + async def test_extract_special_attributes_not_empty(self, extractor:extract_module.AdExtractor) -> None: """Test extraction of special attributes when not empty.""" special_atts = { @@ -1129,7 +1253,7 @@ class TestAdExtractorCategory: @pytest.mark.asyncio # pylint: disable=protected-access - async def test_extract_special_attributes_missing_ad_attributes(self, extractor:AdExtractor) -> None: + async def test_extract_special_attributes_missing_ad_attributes(self, extractor:extract_module.AdExtractor) -> None: """Test extraction of special attributes when ad_attributes key is missing.""" belen_conf:dict[str, Any] = { "universalAnalyticsOpts": { @@ -1146,14 +1270,14 @@ class TestAdExtractorContact: """Tests for contact information extraction.""" @pytest.fixture - def extractor(self, test_bot_config:Config) -> AdExtractor: + def extractor(self, test_bot_config:Config) -> extract_module.AdExtractor: browser_mock = MagicMock(spec = Browser) config = test_bot_config.with_values({"ad_defaults": {"description": {"prefix": "Test Prefix", "suffix": "Test Suffix"}}}) - return AdExtractor(browser_mock, config) + return extract_module.AdExtractor(browser_mock, config) @pytest.mark.asyncio # pylint: disable=protected-access - async def test_extract_contact_info(self, extractor:AdExtractor) -> None: + async def test_extract_contact_info(self, extractor:extract_module.AdExtractor) -> None: """Test extraction of contact information.""" with ( patch.object(extractor, "page", MagicMock()), @@ -1181,7 +1305,7 @@ class TestAdExtractorContact: @pytest.mark.asyncio # pylint: disable=protected-access - async def test_extract_contact_info_timeout(self, extractor:AdExtractor) -> None: + async def test_extract_contact_info_timeout(self, extractor:extract_module.AdExtractor) -> None: """Test contact info extraction when elements are not found.""" with ( patch.object(extractor, "page", MagicMock()), @@ -1193,7 +1317,7 @@ class TestAdExtractorContact: @pytest.mark.asyncio # pylint: disable=protected-access - async def test_extract_contact_info_with_phone(self, extractor:AdExtractor) -> None: + async def test_extract_contact_info_with_phone(self, extractor:extract_module.AdExtractor) -> None: """Test extraction of contact information including phone number.""" with ( patch.object(extractor, "page", MagicMock()), @@ -1217,13 +1341,13 @@ class TestAdExtractorDownload: """Tests for download functionality.""" @pytest.fixture - def extractor(self, test_bot_config:Config) -> AdExtractor: + def extractor(self, test_bot_config:Config) -> extract_module.AdExtractor: browser_mock = MagicMock(spec = Browser) config = test_bot_config.with_values({"ad_defaults": {"description": {"prefix": "Test Prefix", "suffix": "Test Suffix"}}}) - return AdExtractor(browser_mock, config) + return extract_module.AdExtractor(browser_mock, config) @pytest.mark.asyncio - async def test_download_ad(self, extractor:AdExtractor, tmp_path:Path) -> None: + async def test_download_ad(self, extractor:extract_module.AdExtractor, tmp_path:Path) -> None: """Test downloading an ad - directory creation and saving ad data.""" # Use tmp_path for OS-agnostic path handling download_base = tmp_path / "downloaded-ads" @@ -1263,7 +1387,7 @@ class TestAdExtractorDownload: @pytest.mark.asyncio # pylint: disable=protected-access - async def test_download_images_no_images(self, extractor:AdExtractor) -> None: + async def test_download_images_no_images(self, extractor:extract_module.AdExtractor) -> None: """Test image download when no images are found.""" with patch.object(extractor, "web_find", new_callable = AsyncMock, side_effect = TimeoutError): image_paths = await extractor._download_images_from_ad_page("/some/dir", 12345) @@ -1271,7 +1395,7 @@ class TestAdExtractorDownload: @pytest.mark.asyncio # pylint: disable=protected-access - async def test_download_images_with_none_url(self, extractor:AdExtractor) -> None: + async def test_download_images_with_none_url(self, extractor:extract_module.AdExtractor) -> None: """Test image download when some images have None as src attribute.""" image_box_mock = MagicMock() @@ -1285,7 +1409,7 @@ class TestAdExtractorDownload: with ( patch.object(extractor, "web_find", new_callable = AsyncMock, return_value = image_box_mock), patch.object(extractor, "web_find_all", new_callable = AsyncMock, return_value = [img_with_url, img_without_url]), - patch.object(AdExtractor, "_download_and_save_image_sync", return_value = "/some/dir/ad_12345__img1.jpg"), + patch.object(extract_module.AdExtractor, "_download_and_save_image_sync", return_value = "/some/dir/ad_12345__img1.jpg"), ): image_paths = await extractor._download_images_from_ad_page("/some/dir", 12345) @@ -1295,7 +1419,7 @@ class TestAdExtractorDownload: @pytest.mark.asyncio # pylint: disable=protected-access - async def test_extract_ad_page_info_with_directory_handling_final_dir_exists(self, extractor:AdExtractor, tmp_path:Path) -> None: + async def test_extract_ad_page_info_with_directory_handling_final_dir_exists(self, extractor:extract_module.AdExtractor, tmp_path:Path) -> None: """Test directory handling when final_dir already exists - it should be deleted.""" base_dir = tmp_path / "downloaded-ads" base_dir.mkdir() @@ -1356,7 +1480,7 @@ class TestAdExtractorDownload: @pytest.mark.asyncio # pylint: disable=protected-access - async def test_extract_ad_page_info_with_directory_handling_rename_enabled(self, extractor:AdExtractor, tmp_path:Path) -> None: + async def test_extract_ad_page_info_with_directory_handling_rename_enabled(self, extractor:extract_module.AdExtractor, tmp_path:Path) -> None: """Test directory handling when temp_dir exists and rename_existing_folders is True.""" base_dir = tmp_path / "downloaded-ads" base_dir.mkdir() @@ -1422,7 +1546,7 @@ class TestAdExtractorDownload: @pytest.mark.asyncio # pylint: disable=protected-access - async def test_extract_ad_page_info_with_directory_handling_use_existing(self, extractor:AdExtractor, tmp_path:Path) -> None: + async def test_extract_ad_page_info_with_directory_handling_use_existing(self, extractor:extract_module.AdExtractor, tmp_path:Path) -> None: """Test directory handling when temp_dir exists and rename_existing_folders is False (default).""" base_dir = tmp_path / "downloaded-ads" base_dir.mkdir() @@ -1485,7 +1609,7 @@ class TestAdExtractorDownload: assert ad_cfg.title == "Test Title" @pytest.mark.asyncio - async def test_download_ad_with_umlauts_in_title(self, extractor:AdExtractor, tmp_path:Path) -> None: + async def test_download_ad_with_umlauts_in_title(self, extractor:extract_module.AdExtractor, tmp_path:Path) -> None: """Test cross-platform Unicode handling for ad titles with umlauts (issue #728). Verifies that: diff --git a/tests/unit/test_init.py b/tests/unit/test_init.py index 6cfedb8..9ffaf76 100644 --- a/tests/unit/test_init.py +++ b/tests/unit/test_init.py @@ -676,7 +676,8 @@ class TestKleinanzeigenBotBasics: ): await test_bot.publish_ads(ad_cfgs) - web_request_mock.assert_awaited_once_with(f"{test_bot.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT") + # With pagination, the URL now includes pageNum parameter + web_request_mock.assert_awaited_once_with(f"{test_bot.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") publish_ad_mock.assert_awaited_once_with("ad.yaml", ad_cfgs[0][1], {}, [], AdUpdateStrategy.REPLACE) web_await_mock.assert_awaited_once() delete_ad_mock.assert_awaited_once_with(ad_cfgs[0][1], [], delete_old_ads_by_title = False) diff --git a/tests/unit/test_json_pagination.py b/tests/unit/test_json_pagination.py new file mode 100644 index 0000000..bfe967f --- /dev/null +++ b/tests/unit/test_json_pagination.py @@ -0,0 +1,231 @@ +# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors +# SPDX-License-Identifier: AGPL-3.0-or-later +# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ +"""Tests for JSON API pagination helper methods.""" + +import json +from unittest.mock import AsyncMock, patch + +import pytest + +from kleinanzeigen_bot import KleinanzeigenBot +from kleinanzeigen_bot.utils import misc + + +@pytest.mark.unit +class TestJSONPagination: + """Tests for _coerce_page_number and _fetch_published_ads methods.""" + + @pytest.fixture + def bot(self) -> KleinanzeigenBot: + return KleinanzeigenBot() + + def test_coerce_page_number_with_valid_int(self) -> None: + """Test that valid integers are returned as-is.""" + result = misc.coerce_page_number(1) + if result != 1: + pytest.fail(f"_coerce_page_number(1) expected 1, got {result}") + + result = misc.coerce_page_number(0) + if result != 0: + pytest.fail(f"_coerce_page_number(0) expected 0, got {result}") + + result = misc.coerce_page_number(42) + if result != 42: + pytest.fail(f"_coerce_page_number(42) expected 42, got {result}") + + def test_coerce_page_number_with_string_int(self) -> None: + """Test that string integers are converted to int.""" + result = misc.coerce_page_number("1") + if result != 1: + pytest.fail(f"_coerce_page_number('1') expected 1, got {result}") + + result = misc.coerce_page_number("0") + if result != 0: + pytest.fail(f"_coerce_page_number('0') expected 0, got {result}") + + result = misc.coerce_page_number("42") + if result != 42: + pytest.fail(f"_coerce_page_number('42') expected 42, got {result}") + + def test_coerce_page_number_with_none(self) -> None: + """Test that None returns None.""" + result = misc.coerce_page_number(None) + if result is not None: + pytest.fail(f"_coerce_page_number(None) expected None, got {result}") + + def test_coerce_page_number_with_invalid_types(self) -> None: + """Test that invalid types return None.""" + result = misc.coerce_page_number("invalid") + if result is not None: + pytest.fail(f'_coerce_page_number("invalid") expected None, got {result}') + + result = misc.coerce_page_number("") + if result is not None: + pytest.fail(f'_coerce_page_number("") expected None, got {result}') + + result = misc.coerce_page_number([]) + if result is not None: + pytest.fail(f"_coerce_page_number([]) expected None, got {result}") + + result = misc.coerce_page_number({}) + if result is not None: + pytest.fail(f"_coerce_page_number({{}}) expected None, got {result}") + + result = misc.coerce_page_number(3.14) + if result is not None: + pytest.fail(f"_coerce_page_number(3.14) expected None, got {result}") + + def test_coerce_page_number_with_whole_number_float(self) -> None: + """Test that whole-number floats are accepted and converted to int.""" + result = misc.coerce_page_number(2.0) + if result != 2: + pytest.fail(f"_coerce_page_number(2.0) expected 2, got {result}") + + result = misc.coerce_page_number(0.0) + if result != 0: + pytest.fail(f"_coerce_page_number(0.0) expected 0, got {result}") + + result = misc.coerce_page_number(42.0) + if result != 42: + pytest.fail(f"_coerce_page_number(42.0) expected 42, got {result}") + + @pytest.mark.asyncio + async def test_fetch_published_ads_single_page_no_paging(self, bot:KleinanzeigenBot) -> None: + """Test fetching ads from single page with no paging info.""" + with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request: + mock_request.return_value = {"content": '{"ads": [{"id": 1, "title": "Ad 1"}, {"id": 2, "title": "Ad 2"}]}'} + + result = await bot._fetch_published_ads() + + if len(result) != 2: + pytest.fail(f"Expected 2 results, got {len(result)}") + if result[0]["id"] != 1: + pytest.fail(f"Expected result[0]['id'] == 1, got {result[0]['id']}") + if result[1]["id"] != 2: + pytest.fail(f"Expected result[1]['id'] == 2, got {result[1]['id']}") + mock_request.assert_awaited_once_with(f"{bot.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") + + @pytest.mark.asyncio + async def test_fetch_published_ads_single_page_with_paging(self, bot:KleinanzeigenBot) -> None: + """Test fetching ads from single page with paging info showing 1/1.""" + response_data = {"ads": [{"id": 1, "title": "Ad 1"}], "paging": {"pageNum": 1, "last": 1}} + + with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request: + mock_request.return_value = {"content": json.dumps(response_data)} + + result = await bot._fetch_published_ads() + + if len(result) != 1: + pytest.fail(f"Expected 1 ad, got {len(result)}") + if result[0].get("id") != 1: + pytest.fail(f"Expected ad id 1, got {result[0].get('id')}") + mock_request.assert_awaited_once_with(f"{bot.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") + + @pytest.mark.asyncio + async def test_fetch_published_ads_multi_page(self, bot:KleinanzeigenBot) -> None: + """Test fetching ads from multiple pages (3 pages, 2 ads each).""" + page1_data = {"ads": [{"id": 1}, {"id": 2}], "paging": {"pageNum": 1, "last": 3, "next": 2}} + page2_data = {"ads": [{"id": 3}, {"id": 4}], "paging": {"pageNum": 2, "last": 3, "next": 3}} + page3_data = {"ads": [{"id": 5}, {"id": 6}], "paging": {"pageNum": 3, "last": 3}} + + with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request: + mock_request.side_effect = [ + {"content": json.dumps(page1_data)}, + {"content": json.dumps(page2_data)}, + {"content": json.dumps(page3_data)}, + ] + + result = await bot._fetch_published_ads() + + if len(result) != 6: + pytest.fail(f"Expected 6 ads but got {len(result)}") + if [ad["id"] for ad in result] != [1, 2, 3, 4, 5, 6]: + pytest.fail(f"Expected ids [1, 2, 3, 4, 5, 6] but got {[ad['id'] for ad in result]}") + if mock_request.call_count != 3: + pytest.fail(f"Expected 3 web_request calls but got {mock_request.call_count}") + mock_request.assert_any_await(f"{bot.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") + mock_request.assert_any_await(f"{bot.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=2") + mock_request.assert_any_await(f"{bot.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=3") + + @pytest.mark.asyncio + async def test_fetch_published_ads_empty_list(self, bot:KleinanzeigenBot) -> None: + """Test handling of empty ads list.""" + response_data = {"ads": [], "paging": {"pageNum": 1, "last": 1}} + + with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request: + mock_request.return_value = {"content": json.dumps(response_data)} + + result = await bot._fetch_published_ads() + + if not isinstance(result, list): + pytest.fail(f"expected result to be list, got {type(result).__name__}") + if len(result) != 0: + pytest.fail(f"expected empty list from _fetch_published_ads, got {len(result)} items") + + @pytest.mark.asyncio + async def test_fetch_published_ads_invalid_json(self, bot:KleinanzeigenBot) -> None: + """Test handling of invalid JSON response.""" + with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request: + mock_request.return_value = {"content": "invalid json"} + + result = await bot._fetch_published_ads() + if result != []: + pytest.fail(f"Expected empty list on invalid JSON, got {result}") + + @pytest.mark.asyncio + async def test_fetch_published_ads_missing_paging_dict(self, bot:KleinanzeigenBot) -> None: + """Test handling of missing paging dict.""" + response_data = {"ads": [{"id": 1}, {"id": 2}]} + + with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request: + mock_request.return_value = {"content": json.dumps(response_data)} + + result = await bot._fetch_published_ads() + + if len(result) != 2: + pytest.fail(f"expected 2 ads, got {len(result)}") + mock_request.assert_awaited_once() + + @pytest.mark.asyncio + async def test_fetch_published_ads_non_integer_paging_values(self, bot:KleinanzeigenBot) -> None: + """Test handling of non-integer paging values.""" + response_data = {"ads": [{"id": 1}], "paging": {"pageNum": "invalid", "last": "also-invalid"}} + + with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request: + mock_request.return_value = {"content": json.dumps(response_data)} + + result = await bot._fetch_published_ads() + + # Should return ads from first page and stop due to invalid paging + if len(result) != 1: + pytest.fail(f"Expected 1 ad, got {len(result)}") + if result[0].get("id") != 1: + pytest.fail(f"Expected ad id 1, got {result[0].get('id')}") + + @pytest.mark.asyncio + async def test_fetch_published_ads_non_list_ads(self, bot:KleinanzeigenBot) -> None: + """Test handling of non-list ads field.""" + response_data = {"ads": "not a list", "paging": {"pageNum": 1, "last": 1}} + + with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request: + mock_request.return_value = {"content": json.dumps(response_data)} + + result = await bot._fetch_published_ads() + + # Should return empty list when ads is not a list + if not isinstance(result, list): + pytest.fail(f"expected empty list when 'ads' is not a list, got: {result}") + if len(result) != 0: + pytest.fail(f"expected empty list when 'ads' is not a list, got: {result}") + + @pytest.mark.asyncio + async def test_fetch_published_ads_timeout(self, bot:KleinanzeigenBot) -> None: + """Test handling of timeout during pagination.""" + with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request: + mock_request.side_effect = TimeoutError("timeout") + + result = await bot._fetch_published_ads() + + if result != []: + pytest.fail(f"Expected empty list on timeout, got {result}")