From a8051c381446b3da06ecb3f0a0912a410df3e98e Mon Sep 17 00:00:00 2001 From: Jens <1742418+1cu@users.noreply.github.com> Date: Tue, 3 Feb 2026 14:51:59 +0100 Subject: [PATCH] feat: cache published ads data to avoid repetitive API calls during ad download (#809) --- src/kleinanzeigen_bot/__init__.py | 21 +- src/kleinanzeigen_bot/extract.py | 94 ++--- .../resources/translations.de.yaml | 6 +- tests/unit/test_extract.py | 325 ++++-------------- tests/unit/test_init.py | 16 +- 5 files changed, 136 insertions(+), 326 deletions(-) diff --git a/src/kleinanzeigen_bot/__init__.py b/src/kleinanzeigen_bot/__init__.py index 2283a62..4140314 100644 --- a/src/kleinanzeigen_bot/__init__.py +++ b/src/kleinanzeigen_bot/__init__.py @@ -581,11 +581,7 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904 dicts.save_commented_model( self.config_file_path, default_config, - header=( - "# yaml-language-server: $schema=" - "https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot" - "/main/schemas/config.schema.json" - ), + header = ("# yaml-language-server: $schema=https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot/main/schemas/config.schema.json"), exclude = {"ad_defaults": {"description"}}, ) @@ -2020,8 +2016,21 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904 Determines which download mode was chosen with the arguments, and calls the specified download routine. This downloads either all, only unsaved (new), or specific ads given by ID. """ + # Fetch published ads once from manage-ads JSON to avoid repetitive API calls during extraction + # Build lookup dict inline and pass directly to extractor (no cache abstraction needed) + LOG.info("Fetching published ads...") + published_ads = await self._fetch_published_ads() + published_ads_by_id:dict[int, dict[str, Any]] = {} + for published_ad in published_ads: + try: + ad_id = published_ad.get("id") + if ad_id is not None: + published_ads_by_id[int(ad_id)] = published_ad + except (ValueError, TypeError): + LOG.warning("Skipping ad with non-numeric id: %s", published_ad.get("id")) + LOG.info("Loaded %s published ads.", len(published_ads_by_id)) - ad_extractor = extract.AdExtractor(self.browser, self.config, self.installation_mode_or_portable) + ad_extractor = extract.AdExtractor(self.browser, self.config, self.installation_mode_or_portable, published_ads_by_id = published_ads_by_id) # use relevant download routine if self.ads_selector in {"all", "new"}: # explore ads overview for these two modes diff --git a/src/kleinanzeigen_bot/extract.py b/src/kleinanzeigen_bot/extract.py index 3b4a444..a41175b 100644 --- a/src/kleinanzeigen_bot/extract.py +++ b/src/kleinanzeigen_bot/extract.py @@ -25,7 +25,6 @@ __all__ = [ LOG:Final[loggers.Logger] = loggers.get_logger(__name__) _BREADCRUMB_MIN_DEPTH:Final[int] = 2 -_SELL_DIRECTLY_MAX_PAGE_LIMIT:Final[int] = 100 BREADCRUMB_RE = re.compile(r"/c(\d+)") @@ -34,13 +33,20 @@ class AdExtractor(WebScrapingMixin): Wrapper class for ad extraction that uses an active bot´s browser session to extract specific elements from an ad page. """ - def __init__(self, browser:Browser, config:Config, installation_mode:xdg_paths.InstallationMode = "portable") -> None: + def __init__( + self, + browser:Browser, + config:Config, + installation_mode:xdg_paths.InstallationMode = "portable", + published_ads_by_id:dict[int, dict[str, Any]] | None = None, + ) -> None: super().__init__() self.browser = browser self.config:Config = config if installation_mode not in {"portable", "xdg"}: raise ValueError(f"Unsupported installation mode: {installation_mode}") self.installation_mode:xdg_paths.InstallationMode = installation_mode + self.published_ads_by_id:dict[int, dict[str, Any]] = published_ads_by_id or {} async def download_ad(self, ad_id:int) -> None: """ @@ -231,14 +237,19 @@ class AdExtractor(WebScrapingMixin): """ info:dict[str, Any] = {"active": True} - # extract basic info - info["type"] = "OFFER" if "s-anzeige" in self.page.url else "WANTED" - - # Extract title + # Extract title first (needed for directory creation) title = await self._extract_title_from_ad_page() + # Get BelenConf data which contains accurate ad_type information belen_conf = await self.web_execute("window.BelenConf") + # Extract ad type from BelenConf - more reliable than URL pattern matching + # BelenConf contains "ad_type":"WANTED" or "ad_type":"OFFER" in dimensions + ad_type_from_conf = None + if isinstance(belen_conf, dict): + ad_type_from_conf = belen_conf.get("universalAnalyticsOpts", {}).get("dimensions", {}).get("ad_type") + info["type"] = ad_type_from_conf if ad_type_from_conf in {"OFFER", "WANTED"} else ("OFFER" if "s-anzeige" in self.page.url else "WANTED") + info["category"] = await self._extract_category_from_ad_page() # append subcategory and change e.g. category "161/172" to "161/172/lautsprecher_kopfhoerer" @@ -515,72 +526,35 @@ class AdExtractor(WebScrapingMixin): async def _extract_sell_directly_from_ad_page(self) -> bool | None: """ - Extracts the sell directly option from an ad page using the JSON API. + Extracts the sell directly option from an ad page using the published ads data. + + Uses data passed at construction time (from the manage-ads JSON) to avoid + repetitive API calls that create a bot detection signature. :return: bool | None - True if buyNowEligible, False if not eligible, None if unknown """ try: - # Extract current ad ID from the page URL first + # Extract current ad ID from the page URL current_ad_id = self.extract_ad_id_from_ad_url(self.page.url) if current_ad_id == -1: LOG.warning("Could not extract ad ID from URL: %s", self.page.url) return None - # Fetch the management JSON data using web_request with pagination support - page = 1 + # Direct dict lookup (O(1) instead of O(pages) API calls) + cached_ad = self.published_ads_by_id.get(current_ad_id) + if cached_ad is not None: + buy_now_eligible = cached_ad.get("buyNowEligible") + if isinstance(buy_now_eligible, bool): + LOG.debug("sell_directly from data for ad %s: %s", current_ad_id, buy_now_eligible) + return buy_now_eligible + LOG.debug("buyNowEligible not a bool for ad %s: %s", current_ad_id, buy_now_eligible) + return None - while True: - # Safety check: don't paginate beyond reasonable limit - if page > _SELL_DIRECTLY_MAX_PAGE_LIMIT: - LOG.warning("Stopping pagination after %s pages to avoid infinite loop", _SELL_DIRECTLY_MAX_PAGE_LIMIT) - break - - response = await self.web_request(f"https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum={page}") - - try: - json_data = json.loads(response["content"]) - except json.JSONDecodeError as ex: - LOG.debug("Failed to parse JSON response on page %s: %s", page, ex) - break - - # Find the current ad in the ads list - if isinstance(json_data, dict) and "ads" in json_data: - ads_list = json_data["ads"] - if isinstance(ads_list, list): - # Filter ads to find the current ad by ID - current_ad = next((ad for ad in ads_list if ad.get("id") == current_ad_id), None) - if current_ad and "buyNowEligible" in current_ad: - buy_now_eligible = current_ad["buyNowEligible"] - return buy_now_eligible if isinstance(buy_now_eligible, bool) else None - - # Check if we need to fetch more pages - paging = json_data.get("paging") if isinstance(json_data, dict) else None - if not isinstance(paging, dict): - break - - # Parse pagination info using real API fields - current_page_num = misc.coerce_page_number(paging.get("pageNum")) - total_pages = misc.coerce_page_number(paging.get("last")) - - if current_page_num is None: - LOG.warning("Invalid 'pageNum' in paging info: %s, stopping pagination", paging.get("pageNum")) - break - - # Stop if we've reached the last page - if total_pages is None or current_page_num >= total_pages: - break - - # Use API's next field for navigation (more robust than our counter) - next_page = misc.coerce_page_number(paging.get("next")) - if next_page is None: - LOG.warning("Invalid 'next' page value in paging info: %s, stopping pagination", paging.get("next")) - break - page = next_page - - # If the key doesn't exist or ad not found, return None (unknown) + # Ad not in user's published ads (may be someone else's ad) + LOG.debug("No data for ad %s, returning None for sell_directly", current_ad_id) return None - except (TimeoutError, json.JSONDecodeError, KeyError, TypeError) as e: + except (KeyError, TypeError) as e: LOG.debug("Could not determine sell_directly status: %s", e) return None diff --git a/src/kleinanzeigen_bot/resources/translations.de.yaml b/src/kleinanzeigen_bot/resources/translations.de.yaml index a7aa538..ba31d94 100644 --- a/src/kleinanzeigen_bot/resources/translations.de.yaml +++ b/src/kleinanzeigen_bot/resources/translations.de.yaml @@ -225,12 +225,15 @@ kleinanzeigen_bot/__init__.py: "Attribute field '%s' seems to be a Combobox (i.e. text input with filtering dropdown)...": "Attributfeld '%s' scheint eine Combobox zu sein (d.h. Texteingabefeld mit Dropdown-Filter)..." download_ads: + "Fetching published ads...": "Lade veröffentlichte Anzeigen..." + "Loaded %s published ads.": "%s veröffentlichte Anzeigen geladen." "Scanning your ad overview...": "Scanne Anzeigenübersicht..." "%s found.": "%s gefunden." "ad": "Anzeige" "Starting download of all ads...": "Starte den Download aller Anzeigen..." "%d of %d ads were downloaded from your profile.": "%d von %d Anzeigen wurden aus Ihrem Profil heruntergeladen." "Starting download of not yet downloaded ads...": "Starte den Download noch nicht heruntergeladener Anzeigen..." + "Skipping ad with non-numeric id: %s": "Überspringe Anzeige mit nicht-numerischer ID: %s" "The ad with id %d has already been saved.": "Die Anzeige mit der ID %d wurde bereits gespeichert." "%s were downloaded from your profile.": "%s wurden aus Ihrem Profil heruntergeladen." "new ad": "neue Anzeige" @@ -317,9 +320,6 @@ kleinanzeigen_bot/extract.py: _extract_sell_directly_from_ad_page: "Could not extract ad ID from URL: %s": "Konnte Anzeigen-ID nicht aus der URL extrahieren: %s" - "Stopping pagination after %s pages to avoid infinite loop": "Stoppe die Seitenaufschaltung nach %s Seiten, um eine Endlosschleife zu vermeiden" - "Invalid 'next' page value in paging info: %s, stopping pagination": "Ungültiger 'next'-Seitenwert in Paginierungsinfo: %s, beende Paginierung" - "Invalid 'pageNum' in paging info: %s, stopping pagination": "Ungültiger 'pageNum'-Wert in Paginierungsinfo: %s, beende Paginierung" ################################################# kleinanzeigen_bot/utils/i18n.py: diff --git a/tests/unit/test_extract.py b/tests/unit/test_extract.py index 26c7bc7..1c76214 100644 --- a/tests/unit/test_extract.py +++ b/tests/unit/test_extract.py @@ -844,284 +844,103 @@ class TestAdExtractorContent: assert info.description == raw_description @pytest.mark.asyncio - async def test_extract_sell_directly(self, test_extractor:extract_module.AdExtractor) -> None: - """Test extraction of sell directly option.""" - # Mock the page URL to extract the ad ID + async def test_extract_sell_directly_data_hit_true(self, test_extractor:extract_module.AdExtractor) -> None: + """Test sell_directly extraction with data hit - buyNowEligible=True.""" + # Setup extractor with published ads data + test_extractor.published_ads_by_id = {123456789: {"id": 123456789, "buyNowEligible": True}} + + # Setup page URL test_extractor.page = MagicMock() test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789" - # Test when extract_ad_id_from_ad_url returns -1 (invalid URL) + result = await test_extractor._extract_sell_directly_from_ad_page() + + assert result is True + + @pytest.mark.asyncio + async def test_extract_sell_directly_data_hit_false(self, test_extractor:extract_module.AdExtractor) -> None: + """Test sell_directly extraction with data hit - buyNowEligible=False.""" + test_extractor.published_ads_by_id = {123456789: {"id": 123456789, "buyNowEligible": False}} + + test_extractor.page = MagicMock() + test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789" + + result = await test_extractor._extract_sell_directly_from_ad_page() + + assert result is False + + @pytest.mark.asyncio + async def test_extract_sell_directly_data_miss(self, test_extractor:extract_module.AdExtractor) -> None: + """Test sell_directly extraction with data miss - ad ID not in cache returns None.""" + # Cache has a different ad ID than the one in the URL - true data miss + test_extractor.published_ads_by_id = {987654321: {"id": 987654321, "buyNowEligible": True}} + + test_extractor.page = MagicMock() + test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789" + + result = await test_extractor._extract_sell_directly_from_ad_page() + + assert result is None + + @pytest.mark.asyncio + async def test_extract_sell_directly_empty_published_ads(self, test_extractor:extract_module.AdExtractor) -> None: + """Test sell_directly extraction with empty published_ads_by_id - returns None.""" + test_extractor.published_ads_by_id = {} + + test_extractor.page = MagicMock() + test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789" + + result = await test_extractor._extract_sell_directly_from_ad_page() + + assert result is None + + @pytest.mark.asyncio + async def test_extract_sell_directly_invalid_url(self, test_extractor:extract_module.AdExtractor) -> None: + """Test sell_directly extraction with invalid URL - returns None.""" + test_extractor.published_ads_by_id = {123456789: {"id": 123456789, "buyNowEligible": True}} + + test_extractor.page = MagicMock() test_extractor.page.url = "https://www.kleinanzeigen.de/invalid-url" - with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: - result = await test_extractor._extract_sell_directly_from_ad_page() - # When pageNum is missing from the API response, coerce_page_number() returns None, - # causing the pagination loop to break and return None without making a web_request call. - assert result is None - # Verify web_request was NOT called when URL is invalid - mock_web_request.assert_not_awaited() + result = await test_extractor._extract_sell_directly_from_ad_page() - # Reset to valid URL for subsequent tests - test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789" - - # Test successful extraction with buyNowEligible = true - with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: - mock_web_request.return_value = { - "content": json.dumps({"ads": [{"id": 123456789, "buyNowEligible": True}, {"id": 987654321, "buyNowEligible": False}]}) - } - - result = await test_extractor._extract_sell_directly_from_ad_page() - assert result is True - - # Verify web_request was called with the correct URL (now includes pagination) - mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") - - # Test successful extraction with buyNowEligible = false - with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: - mock_web_request.return_value = { - "content": json.dumps({"ads": [{"id": 123456789, "buyNowEligible": False}, {"id": 987654321, "buyNowEligible": True}]}) - } - - result = await test_extractor._extract_sell_directly_from_ad_page() - assert result is False - - # Verify web_request was called with the correct URL (now includes pagination) - mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") - - # Test pagination: ad found on second page - with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: - mock_web_request.side_effect = [ - { - "content": json.dumps( - { - "ads": [{"id": 987654321, "buyNowEligible": False}], - "paging": {"pageNum": 1, "last": 2, "next": 2}, - } - ) - }, - { - "content": json.dumps( - { - "ads": [{"id": 123456789, "buyNowEligible": True}], - "paging": {"pageNum": 2, "last": 2}, - } - ) - }, - ] - - result = await test_extractor._extract_sell_directly_from_ad_page() - assert result is True - - mock_web_request.assert_any_await("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") - mock_web_request.assert_any_await("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=2") - - # Test when buyNowEligible is missing from the current ad - with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: - mock_web_request.return_value = { - "content": json.dumps( - { - "ads": [ - {"id": 123456789}, # No buyNowEligible field - {"id": 987654321, "buyNowEligible": True}, - ] - } - ) - } - - result = await test_extractor._extract_sell_directly_from_ad_page() - assert result is None - - # Verify web_request was called with the correct URL (now includes pagination) - mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") - - # Test when current ad is not found in the ads list - with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: - mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321, "buyNowEligible": True}]})} - - result = await test_extractor._extract_sell_directly_from_ad_page() - assert result is None - - # Verify web_request was called with the correct URL (now includes pagination) - mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") - - # Test timeout error - with patch.object(test_extractor, "web_request", new_callable = AsyncMock, side_effect = TimeoutError) as mock_web_request: - result = await test_extractor._extract_sell_directly_from_ad_page() - assert result is None - - # Verify web_request was called with the correct URL (now includes pagination) - mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") - - # Test JSON decode error - with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: - mock_web_request.return_value = {"content": "invalid json"} - - result = await test_extractor._extract_sell_directly_from_ad_page() - assert result is None - - # Verify web_request was called with the correct URL (now includes pagination) - mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") - - # Test when ads list is empty - with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: - mock_web_request.return_value = {"content": json.dumps({"ads": []})} - - result = await test_extractor._extract_sell_directly_from_ad_page() - assert result is None - - # Verify web_request was called with the correct URL (now includes pagination) - mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") - - # Test when buyNowEligible is a non-boolean value (string "true") - with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: - mock_web_request.return_value = { - "content": json.dumps({"ads": [{"id": 123456789, "buyNowEligible": "true"}, {"id": 987654321, "buyNowEligible": False}]}) - } - - result = await test_extractor._extract_sell_directly_from_ad_page() - assert result is None - - # Verify web_request was called with the correct URL (now includes pagination) - mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") - - # Test when buyNowEligible is a non-boolean value (integer 1) - with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: - mock_web_request.return_value = { - "content": json.dumps({"ads": [{"id": 123456789, "buyNowEligible": 1}, {"id": 987654321, "buyNowEligible": False}]}) - } - - result = await test_extractor._extract_sell_directly_from_ad_page() - assert result is None - - # Verify web_request was called with the correct URL (now includes pagination) - mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") - - # Test when json_data is not a dict (covers line 622) - with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: - mock_web_request.return_value = {"content": json.dumps(["not", "a", "dict"])} - - result = await test_extractor._extract_sell_directly_from_ad_page() - assert result is None - - # Verify web_request was called with the correct URL (now includes pagination) - mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") - - # Test when json_data is a dict but doesn't have "ads" key (covers line 622) - with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: - mock_web_request.return_value = {"content": json.dumps({"other_key": "value"})} - - result = await test_extractor._extract_sell_directly_from_ad_page() - assert result is None - - # Verify web_request was called with the correct URL (now includes pagination) - mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") - - # Test when ads_list is not a list (covers line 624) - with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: - mock_web_request.return_value = {"content": json.dumps({"ads": "not a list"})} - - result = await test_extractor._extract_sell_directly_from_ad_page() - assert result is None - - # Verify web_request was called with the correct URL (now includes pagination) - mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") + assert result is None @pytest.mark.asyncio - async def test_extract_sell_directly_page_limit_zero(self, test_extractor:extract_module.AdExtractor, monkeypatch:pytest.MonkeyPatch) -> None: - test_extractor.page = MagicMock() - test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789" - monkeypatch.setattr(extract_module, "_SELL_DIRECTLY_MAX_PAGE_LIMIT", 0) + async def test_extract_sell_directly_non_boolean_value(self, test_extractor:extract_module.AdExtractor) -> None: + """Test sell_directly extraction when buyNowEligible is not a boolean.""" + test_extractor.published_ads_by_id = {123456789: {"id": 123456789, "buyNowEligible": "true"}} # String, not bool - with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: - result = await test_extractor._extract_sell_directly_from_ad_page() - assert result is None - mock_web_request.assert_not_awaited() - - @pytest.mark.asyncio - async def test_extract_sell_directly_paging_key_resolution(self, test_extractor:extract_module.AdExtractor) -> None: test_extractor.page = MagicMock() test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789" - with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: - mock_web_request.return_value = { - "content": json.dumps( - { - "ads": [{"id": 987654321, "buyNowEligible": True}], - "paging": {"pageNum": None, "page": "1", "currentPage": None, "last": 0}, - } - ) - } + result = await test_extractor._extract_sell_directly_from_ad_page() - result = await test_extractor._extract_sell_directly_from_ad_page() - assert result is None + assert result is None @pytest.mark.asyncio - async def test_extract_sell_directly_current_page_minus_one(self, test_extractor:extract_module.AdExtractor) -> None: + async def test_extract_sell_directly_missing_buy_now_field(self, test_extractor:extract_module.AdExtractor) -> None: + """Test sell_directly extraction when buyNowEligible field is missing.""" + test_extractor.published_ads_by_id = {123456789: {"id": 123456789, "state": "active"}} # No buyNowEligible + test_extractor.page = MagicMock() test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789" - with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: - mock_web_request.side_effect = [ - {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": 1, "last": 2, "next": 2}})}, - {"content": json.dumps({"ads": []})}, - ] + result = await test_extractor._extract_sell_directly_from_ad_page() - result = await test_extractor._extract_sell_directly_from_ad_page() - assert result is None - mock_web_request.assert_any_await("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") - mock_web_request.assert_any_await("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=2") + assert result is None @pytest.mark.asyncio - async def test_extract_sell_directly_invalid_page_number_type(self, test_extractor:extract_module.AdExtractor) -> None: + async def test_extract_sell_directly_integer_value(self, test_extractor:extract_module.AdExtractor) -> None: + """Test sell_directly extraction when buyNowEligible is an integer (not bool).""" + test_extractor.published_ads_by_id = {123456789: {"id": 123456789, "buyNowEligible": 1}} # Integer, not bool + test_extractor.page = MagicMock() test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789" - with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: - mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": [1], "last": "invalid"}})} + result = await test_extractor._extract_sell_directly_from_ad_page() - result = await test_extractor._extract_sell_directly_from_ad_page() - assert result is None - - @pytest.mark.asyncio - async def test_extract_sell_directly_float_page_numbers(self, test_extractor:extract_module.AdExtractor) -> None: - test_extractor.page = MagicMock() - test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789" - - with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: - mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": 1.5, "last": 0}})} - - result = await test_extractor._extract_sell_directly_from_ad_page() - assert result is None - - with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: - mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": 2.0, "last": 1}})} - - result = await test_extractor._extract_sell_directly_from_ad_page() - assert result is None - - @pytest.mark.asyncio - async def test_extract_sell_directly_page_limit(self, test_extractor:extract_module.AdExtractor, monkeypatch:pytest.MonkeyPatch) -> None: - test_extractor.page = MagicMock() - test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789" - monkeypatch.setattr(extract_module, "_SELL_DIRECTLY_MAX_PAGE_LIMIT", 1) - - with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: - mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": 1, "last": 2}})} - - result = await test_extractor._extract_sell_directly_from_ad_page() - assert result is None - mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") - - @pytest.mark.asyncio - async def test_extract_sell_directly_paging_helper_edge_cases(self, test_extractor:extract_module.AdExtractor) -> None: - test_extractor.page = MagicMock() - test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789" - - with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request: - mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {}})} - - result = await test_extractor._extract_sell_directly_from_ad_page() - assert result is None + assert result is None class TestAdExtractorCategory: diff --git a/tests/unit/test_init.py b/tests/unit/test_init.py index f16eb43..3fbc38d 100644 --- a/tests/unit/test_init.py +++ b/tests/unit/test_init.py @@ -165,8 +165,8 @@ class TestKleinanzeigenBotInitialization: assert update_checker_calls == [(test_bot.config, "xdg")] @pytest.mark.asyncio - async def test_download_ads_passes_installation_mode(self, test_bot:KleinanzeigenBot) -> None: - """Ensure download_ads wires installation mode into AdExtractor.""" + async def test_download_ads_passes_installation_mode_and_published_ads(self, test_bot:KleinanzeigenBot) -> None: + """Ensure download_ads wires installation mode and published_ads_by_id into AdExtractor.""" test_bot.installation_mode = "xdg" test_bot.ads_selector = "all" test_bot.browser = MagicMock() @@ -174,10 +174,18 @@ class TestKleinanzeigenBotInitialization: extractor_mock = MagicMock() extractor_mock.extract_own_ads_urls = AsyncMock(return_value = []) - with patch("kleinanzeigen_bot.extract.AdExtractor", return_value = extractor_mock) as mock_extractor: + mock_published_ads = [{"id": 123, "buyNowEligible": True}, {"id": 456, "buyNowEligible": False}] + + with ( + patch.object(test_bot, "_fetch_published_ads", new_callable = AsyncMock, return_value = mock_published_ads), + patch("kleinanzeigen_bot.extract.AdExtractor", return_value = extractor_mock) as mock_extractor, + ): await test_bot.download_ads() - mock_extractor.assert_called_once_with(test_bot.browser, test_bot.config, "xdg") + # Verify published_ads_by_id is built correctly and passed to extractor + mock_extractor.assert_called_once_with( + test_bot.browser, test_bot.config, "xdg", published_ads_by_id = {123: mock_published_ads[0], 456: mock_published_ads[1]} + ) class TestKleinanzeigenBotLogging: