feat: cache published ads data to avoid repetitive API calls during ad download (#809)

2026-03-12 02:31:45 +01:00 · 2026-02-03 14:51:59 +01:00
parent e994ce1b1f
commit a8051c3814
5 changed files with 136 additions and 326 deletions
--- a/src/kleinanzeigen_bot/init.py
+++ b/src/kleinanzeigen_bot/init.py
@@ -581,11 +581,7 @@ class KleinanzeigenBot(WebScrapingMixin):  # noqa: PLR0904
        dicts.save_commented_model(
            self.config_file_path,
            default_config,
-            header=(
-                "# yaml-language-server: $schema="
-                "https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot"
-                "/main/schemas/config.schema.json"
-            ),
+            header = ("# yaml-language-server: $schema=https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot/main/schemas/config.schema.json"),
            exclude = {"ad_defaults": {"description"}},
        )

@@ -2020,8 +2016,21 @@ class KleinanzeigenBot(WebScrapingMixin):  # noqa: PLR0904
        Determines which download mode was chosen with the arguments, and calls the specified download routine.
        This downloads either all, only unsaved (new), or specific ads given by ID.
        """
+        # Fetch published ads once from manage-ads JSON to avoid repetitive API calls during extraction
+        # Build lookup dict inline and pass directly to extractor (no cache abstraction needed)
+        LOG.info("Fetching published ads...")
+        published_ads = await self._fetch_published_ads()
+        published_ads_by_id:dict[int, dict[str, Any]] = {}
+        for published_ad in published_ads:
+            try:
+                ad_id = published_ad.get("id")
+                if ad_id is not None:
+                    published_ads_by_id[int(ad_id)] = published_ad
+            except (ValueError, TypeError):
+                LOG.warning("Skipping ad with non-numeric id: %s", published_ad.get("id"))
+        LOG.info("Loaded %s published ads.", len(published_ads_by_id))

-        ad_extractor = extract.AdExtractor(self.browser, self.config, self.installation_mode_or_portable)
+        ad_extractor = extract.AdExtractor(self.browser, self.config, self.installation_mode_or_portable, published_ads_by_id = published_ads_by_id)

        # use relevant download routine
        if self.ads_selector in {"all", "new"}:  # explore ads overview for these two modes
--- a/src/kleinanzeigen_bot/extract.py
+++ b/src/kleinanzeigen_bot/extract.py
@@ -25,7 +25,6 @@ __all__ = [
 LOG:Final[loggers.Logger] = loggers.get_logger(__name__)

 _BREADCRUMB_MIN_DEPTH:Final[int] = 2
-_SELL_DIRECTLY_MAX_PAGE_LIMIT:Final[int] = 100
 BREADCRUMB_RE = re.compile(r"/c(\d+)")


@@ -34,13 +33,20 @@ class AdExtractor(WebScrapingMixin):
    Wrapper class for ad extraction that uses an active bot´s browser session to extract specific elements from an ad page.
    """

-    def __init__(self, browser:Browser, config:Config, installation_mode:xdg_paths.InstallationMode = "portable") -> None:
+    def __init__(
+        self,
+        browser:Browser,
+        config:Config,
+        installation_mode:xdg_paths.InstallationMode = "portable",
+        published_ads_by_id:dict[int, dict[str, Any]] | None = None,
+    ) -> None:
        super().__init__()
        self.browser = browser
        self.config:Config = config
        if installation_mode not in {"portable", "xdg"}:
            raise ValueError(f"Unsupported installation mode: {installation_mode}")
        self.installation_mode:xdg_paths.InstallationMode = installation_mode
+        self.published_ads_by_id:dict[int, dict[str, Any]] = published_ads_by_id or {}

    async def download_ad(self, ad_id:int) -> None:
        """
@@ -231,14 +237,19 @@ class AdExtractor(WebScrapingMixin):
        """
        info:dict[str, Any] = {"active": True}

-        # extract basic info
-        info["type"] = "OFFER" if "s-anzeige" in self.page.url else "WANTED"
-
-        # Extract title
+        # Extract title first (needed for directory creation)
        title = await self._extract_title_from_ad_page()

+        # Get BelenConf data which contains accurate ad_type information
        belen_conf = await self.web_execute("window.BelenConf")

+        # Extract ad type from BelenConf - more reliable than URL pattern matching
+        # BelenConf contains "ad_type":"WANTED" or "ad_type":"OFFER" in dimensions
+        ad_type_from_conf = None
+        if isinstance(belen_conf, dict):
+            ad_type_from_conf = belen_conf.get("universalAnalyticsOpts", {}).get("dimensions", {}).get("ad_type")
+        info["type"] = ad_type_from_conf if ad_type_from_conf in {"OFFER", "WANTED"} else ("OFFER" if "s-anzeige" in self.page.url else "WANTED")
+
        info["category"] = await self._extract_category_from_ad_page()

        # append subcategory and change e.g. category "161/172" to "161/172/lautsprecher_kopfhoerer"
@@ -515,72 +526,35 @@ class AdExtractor(WebScrapingMixin):

    async def _extract_sell_directly_from_ad_page(self) -> bool | None:
        """
-        Extracts the sell directly option from an ad page using the JSON API.
+        Extracts the sell directly option from an ad page using the published ads data.
+
+        Uses data passed at construction time (from the manage-ads JSON) to avoid
+        repetitive API calls that create a bot detection signature.

        :return: bool | None - True if buyNowEligible, False if not eligible, None if unknown
        """
        try:
-            # Extract current ad ID from the page URL first
+            # Extract current ad ID from the page URL
            current_ad_id = self.extract_ad_id_from_ad_url(self.page.url)
            if current_ad_id == -1:
                LOG.warning("Could not extract ad ID from URL: %s", self.page.url)
                return None

-            # Fetch the management JSON data using web_request with pagination support
-            page = 1
+            # Direct dict lookup (O(1) instead of O(pages) API calls)
+            cached_ad = self.published_ads_by_id.get(current_ad_id)
+            if cached_ad is not None:
+                buy_now_eligible = cached_ad.get("buyNowEligible")
+                if isinstance(buy_now_eligible, bool):
+                    LOG.debug("sell_directly from data for ad %s: %s", current_ad_id, buy_now_eligible)
+                    return buy_now_eligible
+                LOG.debug("buyNowEligible not a bool for ad %s: %s", current_ad_id, buy_now_eligible)
+                return None

-            while True:
-                # Safety check: don't paginate beyond reasonable limit
-                if page > _SELL_DIRECTLY_MAX_PAGE_LIMIT:
-                    LOG.warning("Stopping pagination after %s pages to avoid infinite loop", _SELL_DIRECTLY_MAX_PAGE_LIMIT)
-                    break
-
-                response = await self.web_request(f"https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum={page}")
-
-                try:
-                    json_data = json.loads(response["content"])
-                except json.JSONDecodeError as ex:
-                    LOG.debug("Failed to parse JSON response on page %s: %s", page, ex)
-                    break
-
-                # Find the current ad in the ads list
-                if isinstance(json_data, dict) and "ads" in json_data:
-                    ads_list = json_data["ads"]
-                    if isinstance(ads_list, list):
-                        # Filter ads to find the current ad by ID
-                        current_ad = next((ad for ad in ads_list if ad.get("id") == current_ad_id), None)
-                        if current_ad and "buyNowEligible" in current_ad:
-                            buy_now_eligible = current_ad["buyNowEligible"]
-                            return buy_now_eligible if isinstance(buy_now_eligible, bool) else None
-
-                # Check if we need to fetch more pages
-                paging = json_data.get("paging") if isinstance(json_data, dict) else None
-                if not isinstance(paging, dict):
-                    break
-
-                # Parse pagination info using real API fields
-                current_page_num = misc.coerce_page_number(paging.get("pageNum"))
-                total_pages = misc.coerce_page_number(paging.get("last"))
-
-                if current_page_num is None:
-                    LOG.warning("Invalid 'pageNum' in paging info: %s, stopping pagination", paging.get("pageNum"))
-                    break
-
-                # Stop if we've reached the last page
-                if total_pages is None or current_page_num >= total_pages:
-                    break
-
-                # Use API's next field for navigation (more robust than our counter)
-                next_page = misc.coerce_page_number(paging.get("next"))
-                if next_page is None:
-                    LOG.warning("Invalid 'next' page value in paging info: %s, stopping pagination", paging.get("next"))
-                    break
-                page = next_page
-
-            # If the key doesn't exist or ad not found, return None (unknown)
+            # Ad not in user's published ads (may be someone else's ad)
+            LOG.debug("No data for ad %s, returning None for sell_directly", current_ad_id)
            return None

-        except (TimeoutError, json.JSONDecodeError, KeyError, TypeError) as e:
+        except (KeyError, TypeError) as e:
            LOG.debug("Could not determine sell_directly status: %s", e)
            return None

--- a/src/kleinanzeigen_bot/resources/translations.de.yaml
+++ b/src/kleinanzeigen_bot/resources/translations.de.yaml
@@ -225,12 +225,15 @@ kleinanzeigen_bot/__init__.py:
    "Attribute field '%s' seems to be a Combobox (i.e. text input with filtering dropdown)...": "Attributfeld '%s' scheint eine Combobox zu sein (d.h. Texteingabefeld mit Dropdown-Filter)..."

  download_ads:
+    "Fetching published ads...": "Lade veröffentlichte Anzeigen..."
+    "Loaded %s published ads.": "%s veröffentlichte Anzeigen geladen."
    "Scanning your ad overview...": "Scanne Anzeigenübersicht..."
    "%s found.": "%s gefunden."
    "ad": "Anzeige"
    "Starting download of all ads...": "Starte den Download aller Anzeigen..."
    "%d of %d ads were downloaded from your profile.": "%d von %d Anzeigen wurden aus Ihrem Profil heruntergeladen."
    "Starting download of not yet downloaded ads...": "Starte den Download noch nicht heruntergeladener Anzeigen..."
+    "Skipping ad with non-numeric id: %s": "Überspringe Anzeige mit nicht-numerischer ID: %s"
    "The ad with id %d has already been saved.": "Die Anzeige mit der ID %d wurde bereits gespeichert."
    "%s were downloaded from your profile.": "%s wurden aus Ihrem Profil heruntergeladen."
    "new ad": "neue Anzeige"
@@ -317,9 +320,6 @@ kleinanzeigen_bot/extract.py:

  _extract_sell_directly_from_ad_page:
    "Could not extract ad ID from URL: %s": "Konnte Anzeigen-ID nicht aus der URL extrahieren: %s"
-    "Stopping pagination after %s pages to avoid infinite loop": "Stoppe die Seitenaufschaltung nach %s Seiten, um eine Endlosschleife zu vermeiden"
-    "Invalid 'next' page value in paging info: %s, stopping pagination": "Ungültiger 'next'-Seitenwert in Paginierungsinfo: %s, beende Paginierung"
-    "Invalid 'pageNum' in paging info: %s, stopping pagination": "Ungültiger 'pageNum'-Wert in Paginierungsinfo: %s, beende Paginierung"

 #################################################
 kleinanzeigen_bot/utils/i18n.py:
--- a/tests/unit/test_extract.py
+++ b/tests/unit/test_extract.py
@@ -844,284 +844,103 @@ class TestAdExtractorContent:
            assert info.description == raw_description

    @pytest.mark.asyncio
-    async def test_extract_sell_directly(self, test_extractor:extract_module.AdExtractor) -> None:
-        """Test extraction of sell directly option."""
-        # Mock the page URL to extract the ad ID
+    async def test_extract_sell_directly_data_hit_true(self, test_extractor:extract_module.AdExtractor) -> None:
+        """Test sell_directly extraction with data hit - buyNowEligible=True."""
+        # Setup extractor with published ads data
+        test_extractor.published_ads_by_id = {123456789: {"id": 123456789, "buyNowEligible": True}}
+
+        # Setup page URL
        test_extractor.page = MagicMock()
        test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"

-        # Test when extract_ad_id_from_ad_url returns -1 (invalid URL)
+        result = await test_extractor._extract_sell_directly_from_ad_page()
+
+        assert result is True
+
+    @pytest.mark.asyncio
+    async def test_extract_sell_directly_data_hit_false(self, test_extractor:extract_module.AdExtractor) -> None:
+        """Test sell_directly extraction with data hit - buyNowEligible=False."""
+        test_extractor.published_ads_by_id = {123456789: {"id": 123456789, "buyNowEligible": False}}
+
+        test_extractor.page = MagicMock()
+        test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
+
+        result = await test_extractor._extract_sell_directly_from_ad_page()
+
+        assert result is False
+
+    @pytest.mark.asyncio
+    async def test_extract_sell_directly_data_miss(self, test_extractor:extract_module.AdExtractor) -> None:
+        """Test sell_directly extraction with data miss - ad ID not in cache returns None."""
+        # Cache has a different ad ID than the one in the URL - true data miss
+        test_extractor.published_ads_by_id = {987654321: {"id": 987654321, "buyNowEligible": True}}
+
+        test_extractor.page = MagicMock()
+        test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
+
+        result = await test_extractor._extract_sell_directly_from_ad_page()
+
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_extract_sell_directly_empty_published_ads(self, test_extractor:extract_module.AdExtractor) -> None:
+        """Test sell_directly extraction with empty published_ads_by_id - returns None."""
+        test_extractor.published_ads_by_id = {}
+
+        test_extractor.page = MagicMock()
+        test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
+
+        result = await test_extractor._extract_sell_directly_from_ad_page()
+
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_extract_sell_directly_invalid_url(self, test_extractor:extract_module.AdExtractor) -> None:
+        """Test sell_directly extraction with invalid URL - returns None."""
+        test_extractor.published_ads_by_id = {123456789: {"id": 123456789, "buyNowEligible": True}}
+
+        test_extractor.page = MagicMock()
        test_extractor.page.url = "https://www.kleinanzeigen.de/invalid-url"
-        with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
-            result = await test_extractor._extract_sell_directly_from_ad_page()
-            # When pageNum is missing from the API response, coerce_page_number() returns None,
-            # causing the pagination loop to break and return None without making a web_request call.
-            assert result is None

-            # Verify web_request was NOT called when URL is invalid
-            mock_web_request.assert_not_awaited()
+        result = await test_extractor._extract_sell_directly_from_ad_page()

-        # Reset to valid URL for subsequent tests
-        test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
-
-        # Test successful extraction with buyNowEligible = true
-        with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
-            mock_web_request.return_value = {
-                "content": json.dumps({"ads": [{"id": 123456789, "buyNowEligible": True}, {"id": 987654321, "buyNowEligible": False}]})
-            }
-
-            result = await test_extractor._extract_sell_directly_from_ad_page()
-            assert result is True
-
-            # Verify web_request was called with the correct URL (now includes pagination)
-            mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
-
-        # Test successful extraction with buyNowEligible = false
-        with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
-            mock_web_request.return_value = {
-                "content": json.dumps({"ads": [{"id": 123456789, "buyNowEligible": False}, {"id": 987654321, "buyNowEligible": True}]})
-            }
-
-            result = await test_extractor._extract_sell_directly_from_ad_page()
-            assert result is False
-
-            # Verify web_request was called with the correct URL (now includes pagination)
-            mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
-
-        # Test pagination: ad found on second page
-        with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
-            mock_web_request.side_effect = [
-                {
-                    "content": json.dumps(
-                        {
-                            "ads": [{"id": 987654321, "buyNowEligible": False}],
-                            "paging": {"pageNum": 1, "last": 2, "next": 2},
-                        }
-                    )
-                },
-                {
-                    "content": json.dumps(
-                        {
-                            "ads": [{"id": 123456789, "buyNowEligible": True}],
-                            "paging": {"pageNum": 2, "last": 2},
-                        }
-                    )
-                },
-            ]
-
-            result = await test_extractor._extract_sell_directly_from_ad_page()
-            assert result is True
-
-            mock_web_request.assert_any_await("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
-            mock_web_request.assert_any_await("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=2")
-
-        # Test when buyNowEligible is missing from the current ad
-        with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
-            mock_web_request.return_value = {
-                "content": json.dumps(
-                    {
-                        "ads": [
-                            {"id": 123456789},  # No buyNowEligible field
-                            {"id": 987654321, "buyNowEligible": True},
-                        ]
-                    }
-                )
-            }
-
-            result = await test_extractor._extract_sell_directly_from_ad_page()
-            assert result is None
-
-            # Verify web_request was called with the correct URL (now includes pagination)
-            mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
-
-        # Test when current ad is not found in the ads list
-        with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
-            mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321, "buyNowEligible": True}]})}
-
-            result = await test_extractor._extract_sell_directly_from_ad_page()
-            assert result is None
-
-            # Verify web_request was called with the correct URL (now includes pagination)
-            mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
-
-        # Test timeout error
-        with patch.object(test_extractor, "web_request", new_callable = AsyncMock, side_effect = TimeoutError) as mock_web_request:
-            result = await test_extractor._extract_sell_directly_from_ad_page()
-            assert result is None
-
-            # Verify web_request was called with the correct URL (now includes pagination)
-            mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
-
-        # Test JSON decode error
-        with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
-            mock_web_request.return_value = {"content": "invalid json"}
-
-            result = await test_extractor._extract_sell_directly_from_ad_page()
-            assert result is None
-
-            # Verify web_request was called with the correct URL (now includes pagination)
-            mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
-
-        # Test when ads list is empty
-        with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
-            mock_web_request.return_value = {"content": json.dumps({"ads": []})}
-
-            result = await test_extractor._extract_sell_directly_from_ad_page()
-            assert result is None
-
-            # Verify web_request was called with the correct URL (now includes pagination)
-            mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
-
-        # Test when buyNowEligible is a non-boolean value (string "true")
-        with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
-            mock_web_request.return_value = {
-                "content": json.dumps({"ads": [{"id": 123456789, "buyNowEligible": "true"}, {"id": 987654321, "buyNowEligible": False}]})
-            }
-
-            result = await test_extractor._extract_sell_directly_from_ad_page()
-            assert result is None
-
-            # Verify web_request was called with the correct URL (now includes pagination)
-            mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
-
-        # Test when buyNowEligible is a non-boolean value (integer 1)
-        with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
-            mock_web_request.return_value = {
-                "content": json.dumps({"ads": [{"id": 123456789, "buyNowEligible": 1}, {"id": 987654321, "buyNowEligible": False}]})
-            }
-
-            result = await test_extractor._extract_sell_directly_from_ad_page()
-            assert result is None
-
-            # Verify web_request was called with the correct URL (now includes pagination)
-            mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
-
-        # Test when json_data is not a dict (covers line 622)
-        with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
-            mock_web_request.return_value = {"content": json.dumps(["not", "a", "dict"])}
-
-            result = await test_extractor._extract_sell_directly_from_ad_page()
-            assert result is None
-
-            # Verify web_request was called with the correct URL (now includes pagination)
-            mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
-
-        # Test when json_data is a dict but doesn't have "ads" key (covers line 622)
-        with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
-            mock_web_request.return_value = {"content": json.dumps({"other_key": "value"})}
-
-            result = await test_extractor._extract_sell_directly_from_ad_page()
-            assert result is None
-
-            # Verify web_request was called with the correct URL (now includes pagination)
-            mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
-
-        # Test when ads_list is not a list (covers line 624)
-        with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
-            mock_web_request.return_value = {"content": json.dumps({"ads": "not a list"})}
-
-            result = await test_extractor._extract_sell_directly_from_ad_page()
-            assert result is None
-
-            # Verify web_request was called with the correct URL (now includes pagination)
-            mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
+        assert result is None

    @pytest.mark.asyncio
-    async def test_extract_sell_directly_page_limit_zero(self, test_extractor:extract_module.AdExtractor, monkeypatch:pytest.MonkeyPatch) -> None:
-        test_extractor.page = MagicMock()
-        test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
-        monkeypatch.setattr(extract_module, "_SELL_DIRECTLY_MAX_PAGE_LIMIT", 0)
+    async def test_extract_sell_directly_non_boolean_value(self, test_extractor:extract_module.AdExtractor) -> None:
+        """Test sell_directly extraction when buyNowEligible is not a boolean."""
+        test_extractor.published_ads_by_id = {123456789: {"id": 123456789, "buyNowEligible": "true"}}  # String, not bool

-        with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
-            result = await test_extractor._extract_sell_directly_from_ad_page()
-            assert result is None
-            mock_web_request.assert_not_awaited()
-
-    @pytest.mark.asyncio
-    async def test_extract_sell_directly_paging_key_resolution(self, test_extractor:extract_module.AdExtractor) -> None:
        test_extractor.page = MagicMock()
        test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"

-        with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
-            mock_web_request.return_value = {
-                "content": json.dumps(
-                    {
-                        "ads": [{"id": 987654321, "buyNowEligible": True}],
-                        "paging": {"pageNum": None, "page": "1", "currentPage": None, "last": 0},
-                    }
-                )
-            }
+        result = await test_extractor._extract_sell_directly_from_ad_page()

-            result = await test_extractor._extract_sell_directly_from_ad_page()
-            assert result is None
+        assert result is None

    @pytest.mark.asyncio
-    async def test_extract_sell_directly_current_page_minus_one(self, test_extractor:extract_module.AdExtractor) -> None:
+    async def test_extract_sell_directly_missing_buy_now_field(self, test_extractor:extract_module.AdExtractor) -> None:
+        """Test sell_directly extraction when buyNowEligible field is missing."""
+        test_extractor.published_ads_by_id = {123456789: {"id": 123456789, "state": "active"}}  # No buyNowEligible
+
        test_extractor.page = MagicMock()
        test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"

-        with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
-            mock_web_request.side_effect = [
-                {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": 1, "last": 2, "next": 2}})},
-                {"content": json.dumps({"ads": []})},
-            ]
+        result = await test_extractor._extract_sell_directly_from_ad_page()

-            result = await test_extractor._extract_sell_directly_from_ad_page()
-            assert result is None
-            mock_web_request.assert_any_await("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
-            mock_web_request.assert_any_await("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=2")
+        assert result is None

    @pytest.mark.asyncio
-    async def test_extract_sell_directly_invalid_page_number_type(self, test_extractor:extract_module.AdExtractor) -> None:
+    async def test_extract_sell_directly_integer_value(self, test_extractor:extract_module.AdExtractor) -> None:
+        """Test sell_directly extraction when buyNowEligible is an integer (not bool)."""
+        test_extractor.published_ads_by_id = {123456789: {"id": 123456789, "buyNowEligible": 1}}  # Integer, not bool
+
        test_extractor.page = MagicMock()
        test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"

-        with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
-            mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": [1], "last": "invalid"}})}
+        result = await test_extractor._extract_sell_directly_from_ad_page()

-            result = await test_extractor._extract_sell_directly_from_ad_page()
-            assert result is None
-
-    @pytest.mark.asyncio
-    async def test_extract_sell_directly_float_page_numbers(self, test_extractor:extract_module.AdExtractor) -> None:
-        test_extractor.page = MagicMock()
-        test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
-
-        with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
-            mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": 1.5, "last": 0}})}
-
-            result = await test_extractor._extract_sell_directly_from_ad_page()
-            assert result is None
-
-        with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
-            mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": 2.0, "last": 1}})}
-
-            result = await test_extractor._extract_sell_directly_from_ad_page()
-            assert result is None
-
-    @pytest.mark.asyncio
-    async def test_extract_sell_directly_page_limit(self, test_extractor:extract_module.AdExtractor, monkeypatch:pytest.MonkeyPatch) -> None:
-        test_extractor.page = MagicMock()
-        test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
-        monkeypatch.setattr(extract_module, "_SELL_DIRECTLY_MAX_PAGE_LIMIT", 1)
-
-        with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
-            mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": 1, "last": 2}})}
-
-            result = await test_extractor._extract_sell_directly_from_ad_page()
-            assert result is None
-            mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
-
-    @pytest.mark.asyncio
-    async def test_extract_sell_directly_paging_helper_edge_cases(self, test_extractor:extract_module.AdExtractor) -> None:
-        test_extractor.page = MagicMock()
-        test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
-
-        with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
-            mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {}})}
-
-            result = await test_extractor._extract_sell_directly_from_ad_page()
-            assert result is None
+        assert result is None


 class TestAdExtractorCategory:
--- a/tests/unit/test_init.py
+++ b/tests/unit/test_init.py
@@ -165,8 +165,8 @@ class TestKleinanzeigenBotInitialization:
        assert update_checker_calls == [(test_bot.config, "xdg")]

    @pytest.mark.asyncio
-    async def test_download_ads_passes_installation_mode(self, test_bot:KleinanzeigenBot) -> None:
-        """Ensure download_ads wires installation mode into AdExtractor."""
+    async def test_download_ads_passes_installation_mode_and_published_ads(self, test_bot:KleinanzeigenBot) -> None:
+        """Ensure download_ads wires installation mode and published_ads_by_id into AdExtractor."""
        test_bot.installation_mode = "xdg"
        test_bot.ads_selector = "all"
        test_bot.browser = MagicMock()
@@ -174,10 +174,18 @@ class TestKleinanzeigenBotInitialization:
        extractor_mock = MagicMock()
        extractor_mock.extract_own_ads_urls = AsyncMock(return_value = [])

-        with patch("kleinanzeigen_bot.extract.AdExtractor", return_value = extractor_mock) as mock_extractor:
+        mock_published_ads = [{"id": 123, "buyNowEligible": True}, {"id": 456, "buyNowEligible": False}]
+
+        with (
+            patch.object(test_bot, "_fetch_published_ads", new_callable = AsyncMock, return_value = mock_published_ads),
+            patch("kleinanzeigen_bot.extract.AdExtractor", return_value = extractor_mock) as mock_extractor,
+        ):
            await test_bot.download_ads()

-        mock_extractor.assert_called_once_with(test_bot.browser, test_bot.config, "xdg")
+        # Verify published_ads_by_id is built correctly and passed to extractor
+        mock_extractor.assert_called_once_with(
+            test_bot.browser, test_bot.config, "xdg", published_ads_by_id = {123: mock_published_ads[0], 456: mock_published_ads[1]}
+        )


 class TestKleinanzeigenBotLogging: