feat: cache published ads data to avoid repetitive API calls during ad download (#809)

This commit is contained in:
Jens
2026-02-03 14:51:59 +01:00
committed by GitHub
parent e994ce1b1f
commit a8051c3814
5 changed files with 136 additions and 326 deletions

View File

@@ -581,11 +581,7 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
dicts.save_commented_model(
self.config_file_path,
default_config,
header=(
"# yaml-language-server: $schema="
"https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot"
"/main/schemas/config.schema.json"
),
header = ("# yaml-language-server: $schema=https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot/main/schemas/config.schema.json"),
exclude = {"ad_defaults": {"description"}},
)
@@ -2020,8 +2016,21 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
Determines which download mode was chosen with the arguments, and calls the specified download routine.
This downloads either all, only unsaved (new), or specific ads given by ID.
"""
# Fetch published ads once from manage-ads JSON to avoid repetitive API calls during extraction
# Build lookup dict inline and pass directly to extractor (no cache abstraction needed)
LOG.info("Fetching published ads...")
published_ads = await self._fetch_published_ads()
published_ads_by_id:dict[int, dict[str, Any]] = {}
for published_ad in published_ads:
try:
ad_id = published_ad.get("id")
if ad_id is not None:
published_ads_by_id[int(ad_id)] = published_ad
except (ValueError, TypeError):
LOG.warning("Skipping ad with non-numeric id: %s", published_ad.get("id"))
LOG.info("Loaded %s published ads.", len(published_ads_by_id))
ad_extractor = extract.AdExtractor(self.browser, self.config, self.installation_mode_or_portable)
ad_extractor = extract.AdExtractor(self.browser, self.config, self.installation_mode_or_portable, published_ads_by_id = published_ads_by_id)
# use relevant download routine
if self.ads_selector in {"all", "new"}: # explore ads overview for these two modes

View File

@@ -25,7 +25,6 @@ __all__ = [
LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
_BREADCRUMB_MIN_DEPTH:Final[int] = 2
_SELL_DIRECTLY_MAX_PAGE_LIMIT:Final[int] = 100
BREADCRUMB_RE = re.compile(r"/c(\d+)")
@@ -34,13 +33,20 @@ class AdExtractor(WebScrapingMixin):
Wrapper class for ad extraction that uses an active bot´s browser session to extract specific elements from an ad page.
"""
def __init__(self, browser:Browser, config:Config, installation_mode:xdg_paths.InstallationMode = "portable") -> None:
def __init__(
self,
browser:Browser,
config:Config,
installation_mode:xdg_paths.InstallationMode = "portable",
published_ads_by_id:dict[int, dict[str, Any]] | None = None,
) -> None:
super().__init__()
self.browser = browser
self.config:Config = config
if installation_mode not in {"portable", "xdg"}:
raise ValueError(f"Unsupported installation mode: {installation_mode}")
self.installation_mode:xdg_paths.InstallationMode = installation_mode
self.published_ads_by_id:dict[int, dict[str, Any]] = published_ads_by_id or {}
async def download_ad(self, ad_id:int) -> None:
"""
@@ -231,14 +237,19 @@ class AdExtractor(WebScrapingMixin):
"""
info:dict[str, Any] = {"active": True}
# extract basic info
info["type"] = "OFFER" if "s-anzeige" in self.page.url else "WANTED"
# Extract title
# Extract title first (needed for directory creation)
title = await self._extract_title_from_ad_page()
# Get BelenConf data which contains accurate ad_type information
belen_conf = await self.web_execute("window.BelenConf")
# Extract ad type from BelenConf - more reliable than URL pattern matching
# BelenConf contains "ad_type":"WANTED" or "ad_type":"OFFER" in dimensions
ad_type_from_conf = None
if isinstance(belen_conf, dict):
ad_type_from_conf = belen_conf.get("universalAnalyticsOpts", {}).get("dimensions", {}).get("ad_type")
info["type"] = ad_type_from_conf if ad_type_from_conf in {"OFFER", "WANTED"} else ("OFFER" if "s-anzeige" in self.page.url else "WANTED")
info["category"] = await self._extract_category_from_ad_page()
# append subcategory and change e.g. category "161/172" to "161/172/lautsprecher_kopfhoerer"
@@ -515,72 +526,35 @@ class AdExtractor(WebScrapingMixin):
async def _extract_sell_directly_from_ad_page(self) -> bool | None:
"""
Extracts the sell directly option from an ad page using the JSON API.
Extracts the sell directly option from an ad page using the published ads data.
Uses data passed at construction time (from the manage-ads JSON) to avoid
repetitive API calls that create a bot detection signature.
:return: bool | None - True if buyNowEligible, False if not eligible, None if unknown
"""
try:
# Extract current ad ID from the page URL first
# Extract current ad ID from the page URL
current_ad_id = self.extract_ad_id_from_ad_url(self.page.url)
if current_ad_id == -1:
LOG.warning("Could not extract ad ID from URL: %s", self.page.url)
return None
# Fetch the management JSON data using web_request with pagination support
page = 1
# Direct dict lookup (O(1) instead of O(pages) API calls)
cached_ad = self.published_ads_by_id.get(current_ad_id)
if cached_ad is not None:
buy_now_eligible = cached_ad.get("buyNowEligible")
if isinstance(buy_now_eligible, bool):
LOG.debug("sell_directly from data for ad %s: %s", current_ad_id, buy_now_eligible)
return buy_now_eligible
LOG.debug("buyNowEligible not a bool for ad %s: %s", current_ad_id, buy_now_eligible)
return None
while True:
# Safety check: don't paginate beyond reasonable limit
if page > _SELL_DIRECTLY_MAX_PAGE_LIMIT:
LOG.warning("Stopping pagination after %s pages to avoid infinite loop", _SELL_DIRECTLY_MAX_PAGE_LIMIT)
break
response = await self.web_request(f"https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum={page}")
try:
json_data = json.loads(response["content"])
except json.JSONDecodeError as ex:
LOG.debug("Failed to parse JSON response on page %s: %s", page, ex)
break
# Find the current ad in the ads list
if isinstance(json_data, dict) and "ads" in json_data:
ads_list = json_data["ads"]
if isinstance(ads_list, list):
# Filter ads to find the current ad by ID
current_ad = next((ad for ad in ads_list if ad.get("id") == current_ad_id), None)
if current_ad and "buyNowEligible" in current_ad:
buy_now_eligible = current_ad["buyNowEligible"]
return buy_now_eligible if isinstance(buy_now_eligible, bool) else None
# Check if we need to fetch more pages
paging = json_data.get("paging") if isinstance(json_data, dict) else None
if not isinstance(paging, dict):
break
# Parse pagination info using real API fields
current_page_num = misc.coerce_page_number(paging.get("pageNum"))
total_pages = misc.coerce_page_number(paging.get("last"))
if current_page_num is None:
LOG.warning("Invalid 'pageNum' in paging info: %s, stopping pagination", paging.get("pageNum"))
break
# Stop if we've reached the last page
if total_pages is None or current_page_num >= total_pages:
break
# Use API's next field for navigation (more robust than our counter)
next_page = misc.coerce_page_number(paging.get("next"))
if next_page is None:
LOG.warning("Invalid 'next' page value in paging info: %s, stopping pagination", paging.get("next"))
break
page = next_page
# If the key doesn't exist or ad not found, return None (unknown)
# Ad not in user's published ads (may be someone else's ad)
LOG.debug("No data for ad %s, returning None for sell_directly", current_ad_id)
return None
except (TimeoutError, json.JSONDecodeError, KeyError, TypeError) as e:
except (KeyError, TypeError) as e:
LOG.debug("Could not determine sell_directly status: %s", e)
return None

View File

@@ -225,12 +225,15 @@ kleinanzeigen_bot/__init__.py:
"Attribute field '%s' seems to be a Combobox (i.e. text input with filtering dropdown)...": "Attributfeld '%s' scheint eine Combobox zu sein (d.h. Texteingabefeld mit Dropdown-Filter)..."
download_ads:
"Fetching published ads...": "Lade veröffentlichte Anzeigen..."
"Loaded %s published ads.": "%s veröffentlichte Anzeigen geladen."
"Scanning your ad overview...": "Scanne Anzeigenübersicht..."
"%s found.": "%s gefunden."
"ad": "Anzeige"
"Starting download of all ads...": "Starte den Download aller Anzeigen..."
"%d of %d ads were downloaded from your profile.": "%d von %d Anzeigen wurden aus Ihrem Profil heruntergeladen."
"Starting download of not yet downloaded ads...": "Starte den Download noch nicht heruntergeladener Anzeigen..."
"Skipping ad with non-numeric id: %s": "Überspringe Anzeige mit nicht-numerischer ID: %s"
"The ad with id %d has already been saved.": "Die Anzeige mit der ID %d wurde bereits gespeichert."
"%s were downloaded from your profile.": "%s wurden aus Ihrem Profil heruntergeladen."
"new ad": "neue Anzeige"
@@ -317,9 +320,6 @@ kleinanzeigen_bot/extract.py:
_extract_sell_directly_from_ad_page:
"Could not extract ad ID from URL: %s": "Konnte Anzeigen-ID nicht aus der URL extrahieren: %s"
"Stopping pagination after %s pages to avoid infinite loop": "Stoppe die Seitenaufschaltung nach %s Seiten, um eine Endlosschleife zu vermeiden"
"Invalid 'next' page value in paging info: %s, stopping pagination": "Ungültiger 'next'-Seitenwert in Paginierungsinfo: %s, beende Paginierung"
"Invalid 'pageNum' in paging info: %s, stopping pagination": "Ungültiger 'pageNum'-Wert in Paginierungsinfo: %s, beende Paginierung"
#################################################
kleinanzeigen_bot/utils/i18n.py:

View File

@@ -844,284 +844,103 @@ class TestAdExtractorContent:
assert info.description == raw_description
@pytest.mark.asyncio
async def test_extract_sell_directly(self, test_extractor:extract_module.AdExtractor) -> None:
"""Test extraction of sell directly option."""
# Mock the page URL to extract the ad ID
async def test_extract_sell_directly_data_hit_true(self, test_extractor:extract_module.AdExtractor) -> None:
"""Test sell_directly extraction with data hit - buyNowEligible=True."""
# Setup extractor with published ads data
test_extractor.published_ads_by_id = {123456789: {"id": 123456789, "buyNowEligible": True}}
# Setup page URL
test_extractor.page = MagicMock()
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
# Test when extract_ad_id_from_ad_url returns -1 (invalid URL)
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is True
@pytest.mark.asyncio
async def test_extract_sell_directly_data_hit_false(self, test_extractor:extract_module.AdExtractor) -> None:
"""Test sell_directly extraction with data hit - buyNowEligible=False."""
test_extractor.published_ads_by_id = {123456789: {"id": 123456789, "buyNowEligible": False}}
test_extractor.page = MagicMock()
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is False
@pytest.mark.asyncio
async def test_extract_sell_directly_data_miss(self, test_extractor:extract_module.AdExtractor) -> None:
"""Test sell_directly extraction with data miss - ad ID not in cache returns None."""
# Cache has a different ad ID than the one in the URL - true data miss
test_extractor.published_ads_by_id = {987654321: {"id": 987654321, "buyNowEligible": True}}
test_extractor.page = MagicMock()
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
@pytest.mark.asyncio
async def test_extract_sell_directly_empty_published_ads(self, test_extractor:extract_module.AdExtractor) -> None:
"""Test sell_directly extraction with empty published_ads_by_id - returns None."""
test_extractor.published_ads_by_id = {}
test_extractor.page = MagicMock()
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
@pytest.mark.asyncio
async def test_extract_sell_directly_invalid_url(self, test_extractor:extract_module.AdExtractor) -> None:
"""Test sell_directly extraction with invalid URL - returns None."""
test_extractor.published_ads_by_id = {123456789: {"id": 123456789, "buyNowEligible": True}}
test_extractor.page = MagicMock()
test_extractor.page.url = "https://www.kleinanzeigen.de/invalid-url"
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
result = await test_extractor._extract_sell_directly_from_ad_page()
# When pageNum is missing from the API response, coerce_page_number() returns None,
# causing the pagination loop to break and return None without making a web_request call.
assert result is None
# Verify web_request was NOT called when URL is invalid
mock_web_request.assert_not_awaited()
result = await test_extractor._extract_sell_directly_from_ad_page()
# Reset to valid URL for subsequent tests
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
# Test successful extraction with buyNowEligible = true
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {
"content": json.dumps({"ads": [{"id": 123456789, "buyNowEligible": True}, {"id": 987654321, "buyNowEligible": False}]})
}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is True
# Verify web_request was called with the correct URL (now includes pagination)
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
# Test successful extraction with buyNowEligible = false
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {
"content": json.dumps({"ads": [{"id": 123456789, "buyNowEligible": False}, {"id": 987654321, "buyNowEligible": True}]})
}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is False
# Verify web_request was called with the correct URL (now includes pagination)
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
# Test pagination: ad found on second page
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.side_effect = [
{
"content": json.dumps(
{
"ads": [{"id": 987654321, "buyNowEligible": False}],
"paging": {"pageNum": 1, "last": 2, "next": 2},
}
)
},
{
"content": json.dumps(
{
"ads": [{"id": 123456789, "buyNowEligible": True}],
"paging": {"pageNum": 2, "last": 2},
}
)
},
]
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is True
mock_web_request.assert_any_await("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
mock_web_request.assert_any_await("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=2")
# Test when buyNowEligible is missing from the current ad
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {
"content": json.dumps(
{
"ads": [
{"id": 123456789}, # No buyNowEligible field
{"id": 987654321, "buyNowEligible": True},
]
}
)
}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL (now includes pagination)
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
# Test when current ad is not found in the ads list
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321, "buyNowEligible": True}]})}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL (now includes pagination)
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
# Test timeout error
with patch.object(test_extractor, "web_request", new_callable = AsyncMock, side_effect = TimeoutError) as mock_web_request:
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL (now includes pagination)
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
# Test JSON decode error
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {"content": "invalid json"}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL (now includes pagination)
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
# Test when ads list is empty
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {"content": json.dumps({"ads": []})}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL (now includes pagination)
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
# Test when buyNowEligible is a non-boolean value (string "true")
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {
"content": json.dumps({"ads": [{"id": 123456789, "buyNowEligible": "true"}, {"id": 987654321, "buyNowEligible": False}]})
}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL (now includes pagination)
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
# Test when buyNowEligible is a non-boolean value (integer 1)
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {
"content": json.dumps({"ads": [{"id": 123456789, "buyNowEligible": 1}, {"id": 987654321, "buyNowEligible": False}]})
}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL (now includes pagination)
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
# Test when json_data is not a dict (covers line 622)
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {"content": json.dumps(["not", "a", "dict"])}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL (now includes pagination)
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
# Test when json_data is a dict but doesn't have "ads" key (covers line 622)
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {"content": json.dumps({"other_key": "value"})}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL (now includes pagination)
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
# Test when ads_list is not a list (covers line 624)
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {"content": json.dumps({"ads": "not a list"})}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL (now includes pagination)
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
assert result is None
@pytest.mark.asyncio
async def test_extract_sell_directly_page_limit_zero(self, test_extractor:extract_module.AdExtractor, monkeypatch:pytest.MonkeyPatch) -> None:
test_extractor.page = MagicMock()
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
monkeypatch.setattr(extract_module, "_SELL_DIRECTLY_MAX_PAGE_LIMIT", 0)
async def test_extract_sell_directly_non_boolean_value(self, test_extractor:extract_module.AdExtractor) -> None:
"""Test sell_directly extraction when buyNowEligible is not a boolean."""
test_extractor.published_ads_by_id = {123456789: {"id": 123456789, "buyNowEligible": "true"}} # String, not bool
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
mock_web_request.assert_not_awaited()
@pytest.mark.asyncio
async def test_extract_sell_directly_paging_key_resolution(self, test_extractor:extract_module.AdExtractor) -> None:
test_extractor.page = MagicMock()
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {
"content": json.dumps(
{
"ads": [{"id": 987654321, "buyNowEligible": True}],
"paging": {"pageNum": None, "page": "1", "currentPage": None, "last": 0},
}
)
}
result = await test_extractor._extract_sell_directly_from_ad_page()
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
assert result is None
@pytest.mark.asyncio
async def test_extract_sell_directly_current_page_minus_one(self, test_extractor:extract_module.AdExtractor) -> None:
async def test_extract_sell_directly_missing_buy_now_field(self, test_extractor:extract_module.AdExtractor) -> None:
"""Test sell_directly extraction when buyNowEligible field is missing."""
test_extractor.published_ads_by_id = {123456789: {"id": 123456789, "state": "active"}} # No buyNowEligible
test_extractor.page = MagicMock()
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.side_effect = [
{"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": 1, "last": 2, "next": 2}})},
{"content": json.dumps({"ads": []})},
]
result = await test_extractor._extract_sell_directly_from_ad_page()
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
mock_web_request.assert_any_await("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
mock_web_request.assert_any_await("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=2")
assert result is None
@pytest.mark.asyncio
async def test_extract_sell_directly_invalid_page_number_type(self, test_extractor:extract_module.AdExtractor) -> None:
async def test_extract_sell_directly_integer_value(self, test_extractor:extract_module.AdExtractor) -> None:
"""Test sell_directly extraction when buyNowEligible is an integer (not bool)."""
test_extractor.published_ads_by_id = {123456789: {"id": 123456789, "buyNowEligible": 1}} # Integer, not bool
test_extractor.page = MagicMock()
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": [1], "last": "invalid"}})}
result = await test_extractor._extract_sell_directly_from_ad_page()
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
@pytest.mark.asyncio
async def test_extract_sell_directly_float_page_numbers(self, test_extractor:extract_module.AdExtractor) -> None:
test_extractor.page = MagicMock()
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": 1.5, "last": 0}})}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": 2.0, "last": 1}})}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
@pytest.mark.asyncio
async def test_extract_sell_directly_page_limit(self, test_extractor:extract_module.AdExtractor, monkeypatch:pytest.MonkeyPatch) -> None:
test_extractor.page = MagicMock()
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
monkeypatch.setattr(extract_module, "_SELL_DIRECTLY_MAX_PAGE_LIMIT", 1)
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": 1, "last": 2}})}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
@pytest.mark.asyncio
async def test_extract_sell_directly_paging_helper_edge_cases(self, test_extractor:extract_module.AdExtractor) -> None:
test_extractor.page = MagicMock()
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {}})}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
assert result is None
class TestAdExtractorCategory:

View File

@@ -165,8 +165,8 @@ class TestKleinanzeigenBotInitialization:
assert update_checker_calls == [(test_bot.config, "xdg")]
@pytest.mark.asyncio
async def test_download_ads_passes_installation_mode(self, test_bot:KleinanzeigenBot) -> None:
"""Ensure download_ads wires installation mode into AdExtractor."""
async def test_download_ads_passes_installation_mode_and_published_ads(self, test_bot:KleinanzeigenBot) -> None:
"""Ensure download_ads wires installation mode and published_ads_by_id into AdExtractor."""
test_bot.installation_mode = "xdg"
test_bot.ads_selector = "all"
test_bot.browser = MagicMock()
@@ -174,10 +174,18 @@ class TestKleinanzeigenBotInitialization:
extractor_mock = MagicMock()
extractor_mock.extract_own_ads_urls = AsyncMock(return_value = [])
with patch("kleinanzeigen_bot.extract.AdExtractor", return_value = extractor_mock) as mock_extractor:
mock_published_ads = [{"id": 123, "buyNowEligible": True}, {"id": 456, "buyNowEligible": False}]
with (
patch.object(test_bot, "_fetch_published_ads", new_callable = AsyncMock, return_value = mock_published_ads),
patch("kleinanzeigen_bot.extract.AdExtractor", return_value = extractor_mock) as mock_extractor,
):
await test_bot.download_ads()
mock_extractor.assert_called_once_with(test_bot.browser, test_bot.config, "xdg")
# Verify published_ads_by_id is built correctly and passed to extractor
mock_extractor.assert_called_once_with(
test_bot.browser, test_bot.config, "xdg", published_ads_by_id = {123: mock_published_ads[0], 456: mock_published_ads[1]}
)
class TestKleinanzeigenBotLogging: