mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 02:31:45 +01:00
feat: cache published ads data to avoid repetitive API calls during ad download (#809)
This commit is contained in:
@@ -581,11 +581,7 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
|
||||
dicts.save_commented_model(
|
||||
self.config_file_path,
|
||||
default_config,
|
||||
header=(
|
||||
"# yaml-language-server: $schema="
|
||||
"https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot"
|
||||
"/main/schemas/config.schema.json"
|
||||
),
|
||||
header = ("# yaml-language-server: $schema=https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot/main/schemas/config.schema.json"),
|
||||
exclude = {"ad_defaults": {"description"}},
|
||||
)
|
||||
|
||||
@@ -2020,8 +2016,21 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
|
||||
Determines which download mode was chosen with the arguments, and calls the specified download routine.
|
||||
This downloads either all, only unsaved (new), or specific ads given by ID.
|
||||
"""
|
||||
# Fetch published ads once from manage-ads JSON to avoid repetitive API calls during extraction
|
||||
# Build lookup dict inline and pass directly to extractor (no cache abstraction needed)
|
||||
LOG.info("Fetching published ads...")
|
||||
published_ads = await self._fetch_published_ads()
|
||||
published_ads_by_id:dict[int, dict[str, Any]] = {}
|
||||
for published_ad in published_ads:
|
||||
try:
|
||||
ad_id = published_ad.get("id")
|
||||
if ad_id is not None:
|
||||
published_ads_by_id[int(ad_id)] = published_ad
|
||||
except (ValueError, TypeError):
|
||||
LOG.warning("Skipping ad with non-numeric id: %s", published_ad.get("id"))
|
||||
LOG.info("Loaded %s published ads.", len(published_ads_by_id))
|
||||
|
||||
ad_extractor = extract.AdExtractor(self.browser, self.config, self.installation_mode_or_portable)
|
||||
ad_extractor = extract.AdExtractor(self.browser, self.config, self.installation_mode_or_portable, published_ads_by_id = published_ads_by_id)
|
||||
|
||||
# use relevant download routine
|
||||
if self.ads_selector in {"all", "new"}: # explore ads overview for these two modes
|
||||
|
||||
@@ -25,7 +25,6 @@ __all__ = [
|
||||
LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
|
||||
|
||||
_BREADCRUMB_MIN_DEPTH:Final[int] = 2
|
||||
_SELL_DIRECTLY_MAX_PAGE_LIMIT:Final[int] = 100
|
||||
BREADCRUMB_RE = re.compile(r"/c(\d+)")
|
||||
|
||||
|
||||
@@ -34,13 +33,20 @@ class AdExtractor(WebScrapingMixin):
|
||||
Wrapper class for ad extraction that uses an active bot´s browser session to extract specific elements from an ad page.
|
||||
"""
|
||||
|
||||
def __init__(self, browser:Browser, config:Config, installation_mode:xdg_paths.InstallationMode = "portable") -> None:
|
||||
def __init__(
|
||||
self,
|
||||
browser:Browser,
|
||||
config:Config,
|
||||
installation_mode:xdg_paths.InstallationMode = "portable",
|
||||
published_ads_by_id:dict[int, dict[str, Any]] | None = None,
|
||||
) -> None:
|
||||
super().__init__()
|
||||
self.browser = browser
|
||||
self.config:Config = config
|
||||
if installation_mode not in {"portable", "xdg"}:
|
||||
raise ValueError(f"Unsupported installation mode: {installation_mode}")
|
||||
self.installation_mode:xdg_paths.InstallationMode = installation_mode
|
||||
self.published_ads_by_id:dict[int, dict[str, Any]] = published_ads_by_id or {}
|
||||
|
||||
async def download_ad(self, ad_id:int) -> None:
|
||||
"""
|
||||
@@ -231,14 +237,19 @@ class AdExtractor(WebScrapingMixin):
|
||||
"""
|
||||
info:dict[str, Any] = {"active": True}
|
||||
|
||||
# extract basic info
|
||||
info["type"] = "OFFER" if "s-anzeige" in self.page.url else "WANTED"
|
||||
|
||||
# Extract title
|
||||
# Extract title first (needed for directory creation)
|
||||
title = await self._extract_title_from_ad_page()
|
||||
|
||||
# Get BelenConf data which contains accurate ad_type information
|
||||
belen_conf = await self.web_execute("window.BelenConf")
|
||||
|
||||
# Extract ad type from BelenConf - more reliable than URL pattern matching
|
||||
# BelenConf contains "ad_type":"WANTED" or "ad_type":"OFFER" in dimensions
|
||||
ad_type_from_conf = None
|
||||
if isinstance(belen_conf, dict):
|
||||
ad_type_from_conf = belen_conf.get("universalAnalyticsOpts", {}).get("dimensions", {}).get("ad_type")
|
||||
info["type"] = ad_type_from_conf if ad_type_from_conf in {"OFFER", "WANTED"} else ("OFFER" if "s-anzeige" in self.page.url else "WANTED")
|
||||
|
||||
info["category"] = await self._extract_category_from_ad_page()
|
||||
|
||||
# append subcategory and change e.g. category "161/172" to "161/172/lautsprecher_kopfhoerer"
|
||||
@@ -515,72 +526,35 @@ class AdExtractor(WebScrapingMixin):
|
||||
|
||||
async def _extract_sell_directly_from_ad_page(self) -> bool | None:
|
||||
"""
|
||||
Extracts the sell directly option from an ad page using the JSON API.
|
||||
Extracts the sell directly option from an ad page using the published ads data.
|
||||
|
||||
Uses data passed at construction time (from the manage-ads JSON) to avoid
|
||||
repetitive API calls that create a bot detection signature.
|
||||
|
||||
:return: bool | None - True if buyNowEligible, False if not eligible, None if unknown
|
||||
"""
|
||||
try:
|
||||
# Extract current ad ID from the page URL first
|
||||
# Extract current ad ID from the page URL
|
||||
current_ad_id = self.extract_ad_id_from_ad_url(self.page.url)
|
||||
if current_ad_id == -1:
|
||||
LOG.warning("Could not extract ad ID from URL: %s", self.page.url)
|
||||
return None
|
||||
|
||||
# Fetch the management JSON data using web_request with pagination support
|
||||
page = 1
|
||||
# Direct dict lookup (O(1) instead of O(pages) API calls)
|
||||
cached_ad = self.published_ads_by_id.get(current_ad_id)
|
||||
if cached_ad is not None:
|
||||
buy_now_eligible = cached_ad.get("buyNowEligible")
|
||||
if isinstance(buy_now_eligible, bool):
|
||||
LOG.debug("sell_directly from data for ad %s: %s", current_ad_id, buy_now_eligible)
|
||||
return buy_now_eligible
|
||||
LOG.debug("buyNowEligible not a bool for ad %s: %s", current_ad_id, buy_now_eligible)
|
||||
return None
|
||||
|
||||
while True:
|
||||
# Safety check: don't paginate beyond reasonable limit
|
||||
if page > _SELL_DIRECTLY_MAX_PAGE_LIMIT:
|
||||
LOG.warning("Stopping pagination after %s pages to avoid infinite loop", _SELL_DIRECTLY_MAX_PAGE_LIMIT)
|
||||
break
|
||||
|
||||
response = await self.web_request(f"https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum={page}")
|
||||
|
||||
try:
|
||||
json_data = json.loads(response["content"])
|
||||
except json.JSONDecodeError as ex:
|
||||
LOG.debug("Failed to parse JSON response on page %s: %s", page, ex)
|
||||
break
|
||||
|
||||
# Find the current ad in the ads list
|
||||
if isinstance(json_data, dict) and "ads" in json_data:
|
||||
ads_list = json_data["ads"]
|
||||
if isinstance(ads_list, list):
|
||||
# Filter ads to find the current ad by ID
|
||||
current_ad = next((ad for ad in ads_list if ad.get("id") == current_ad_id), None)
|
||||
if current_ad and "buyNowEligible" in current_ad:
|
||||
buy_now_eligible = current_ad["buyNowEligible"]
|
||||
return buy_now_eligible if isinstance(buy_now_eligible, bool) else None
|
||||
|
||||
# Check if we need to fetch more pages
|
||||
paging = json_data.get("paging") if isinstance(json_data, dict) else None
|
||||
if not isinstance(paging, dict):
|
||||
break
|
||||
|
||||
# Parse pagination info using real API fields
|
||||
current_page_num = misc.coerce_page_number(paging.get("pageNum"))
|
||||
total_pages = misc.coerce_page_number(paging.get("last"))
|
||||
|
||||
if current_page_num is None:
|
||||
LOG.warning("Invalid 'pageNum' in paging info: %s, stopping pagination", paging.get("pageNum"))
|
||||
break
|
||||
|
||||
# Stop if we've reached the last page
|
||||
if total_pages is None or current_page_num >= total_pages:
|
||||
break
|
||||
|
||||
# Use API's next field for navigation (more robust than our counter)
|
||||
next_page = misc.coerce_page_number(paging.get("next"))
|
||||
if next_page is None:
|
||||
LOG.warning("Invalid 'next' page value in paging info: %s, stopping pagination", paging.get("next"))
|
||||
break
|
||||
page = next_page
|
||||
|
||||
# If the key doesn't exist or ad not found, return None (unknown)
|
||||
# Ad not in user's published ads (may be someone else's ad)
|
||||
LOG.debug("No data for ad %s, returning None for sell_directly", current_ad_id)
|
||||
return None
|
||||
|
||||
except (TimeoutError, json.JSONDecodeError, KeyError, TypeError) as e:
|
||||
except (KeyError, TypeError) as e:
|
||||
LOG.debug("Could not determine sell_directly status: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
@@ -225,12 +225,15 @@ kleinanzeigen_bot/__init__.py:
|
||||
"Attribute field '%s' seems to be a Combobox (i.e. text input with filtering dropdown)...": "Attributfeld '%s' scheint eine Combobox zu sein (d.h. Texteingabefeld mit Dropdown-Filter)..."
|
||||
|
||||
download_ads:
|
||||
"Fetching published ads...": "Lade veröffentlichte Anzeigen..."
|
||||
"Loaded %s published ads.": "%s veröffentlichte Anzeigen geladen."
|
||||
"Scanning your ad overview...": "Scanne Anzeigenübersicht..."
|
||||
"%s found.": "%s gefunden."
|
||||
"ad": "Anzeige"
|
||||
"Starting download of all ads...": "Starte den Download aller Anzeigen..."
|
||||
"%d of %d ads were downloaded from your profile.": "%d von %d Anzeigen wurden aus Ihrem Profil heruntergeladen."
|
||||
"Starting download of not yet downloaded ads...": "Starte den Download noch nicht heruntergeladener Anzeigen..."
|
||||
"Skipping ad with non-numeric id: %s": "Überspringe Anzeige mit nicht-numerischer ID: %s"
|
||||
"The ad with id %d has already been saved.": "Die Anzeige mit der ID %d wurde bereits gespeichert."
|
||||
"%s were downloaded from your profile.": "%s wurden aus Ihrem Profil heruntergeladen."
|
||||
"new ad": "neue Anzeige"
|
||||
@@ -317,9 +320,6 @@ kleinanzeigen_bot/extract.py:
|
||||
|
||||
_extract_sell_directly_from_ad_page:
|
||||
"Could not extract ad ID from URL: %s": "Konnte Anzeigen-ID nicht aus der URL extrahieren: %s"
|
||||
"Stopping pagination after %s pages to avoid infinite loop": "Stoppe die Seitenaufschaltung nach %s Seiten, um eine Endlosschleife zu vermeiden"
|
||||
"Invalid 'next' page value in paging info: %s, stopping pagination": "Ungültiger 'next'-Seitenwert in Paginierungsinfo: %s, beende Paginierung"
|
||||
"Invalid 'pageNum' in paging info: %s, stopping pagination": "Ungültiger 'pageNum'-Wert in Paginierungsinfo: %s, beende Paginierung"
|
||||
|
||||
#################################################
|
||||
kleinanzeigen_bot/utils/i18n.py:
|
||||
|
||||
@@ -844,284 +844,103 @@ class TestAdExtractorContent:
|
||||
assert info.description == raw_description
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_sell_directly(self, test_extractor:extract_module.AdExtractor) -> None:
|
||||
"""Test extraction of sell directly option."""
|
||||
# Mock the page URL to extract the ad ID
|
||||
async def test_extract_sell_directly_data_hit_true(self, test_extractor:extract_module.AdExtractor) -> None:
|
||||
"""Test sell_directly extraction with data hit - buyNowEligible=True."""
|
||||
# Setup extractor with published ads data
|
||||
test_extractor.published_ads_by_id = {123456789: {"id": 123456789, "buyNowEligible": True}}
|
||||
|
||||
# Setup page URL
|
||||
test_extractor.page = MagicMock()
|
||||
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
|
||||
|
||||
# Test when extract_ad_id_from_ad_url returns -1 (invalid URL)
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
|
||||
assert result is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_sell_directly_data_hit_false(self, test_extractor:extract_module.AdExtractor) -> None:
|
||||
"""Test sell_directly extraction with data hit - buyNowEligible=False."""
|
||||
test_extractor.published_ads_by_id = {123456789: {"id": 123456789, "buyNowEligible": False}}
|
||||
|
||||
test_extractor.page = MagicMock()
|
||||
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
|
||||
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
|
||||
assert result is False
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_sell_directly_data_miss(self, test_extractor:extract_module.AdExtractor) -> None:
|
||||
"""Test sell_directly extraction with data miss - ad ID not in cache returns None."""
|
||||
# Cache has a different ad ID than the one in the URL - true data miss
|
||||
test_extractor.published_ads_by_id = {987654321: {"id": 987654321, "buyNowEligible": True}}
|
||||
|
||||
test_extractor.page = MagicMock()
|
||||
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
|
||||
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
|
||||
assert result is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_sell_directly_empty_published_ads(self, test_extractor:extract_module.AdExtractor) -> None:
|
||||
"""Test sell_directly extraction with empty published_ads_by_id - returns None."""
|
||||
test_extractor.published_ads_by_id = {}
|
||||
|
||||
test_extractor.page = MagicMock()
|
||||
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
|
||||
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
|
||||
assert result is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_sell_directly_invalid_url(self, test_extractor:extract_module.AdExtractor) -> None:
|
||||
"""Test sell_directly extraction with invalid URL - returns None."""
|
||||
test_extractor.published_ads_by_id = {123456789: {"id": 123456789, "buyNowEligible": True}}
|
||||
|
||||
test_extractor.page = MagicMock()
|
||||
test_extractor.page.url = "https://www.kleinanzeigen.de/invalid-url"
|
||||
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
# When pageNum is missing from the API response, coerce_page_number() returns None,
|
||||
# causing the pagination loop to break and return None without making a web_request call.
|
||||
assert result is None
|
||||
|
||||
# Verify web_request was NOT called when URL is invalid
|
||||
mock_web_request.assert_not_awaited()
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
|
||||
# Reset to valid URL for subsequent tests
|
||||
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
|
||||
|
||||
# Test successful extraction with buyNowEligible = true
|
||||
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||
mock_web_request.return_value = {
|
||||
"content": json.dumps({"ads": [{"id": 123456789, "buyNowEligible": True}, {"id": 987654321, "buyNowEligible": False}]})
|
||||
}
|
||||
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
assert result is True
|
||||
|
||||
# Verify web_request was called with the correct URL (now includes pagination)
|
||||
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
||||
|
||||
# Test successful extraction with buyNowEligible = false
|
||||
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||
mock_web_request.return_value = {
|
||||
"content": json.dumps({"ads": [{"id": 123456789, "buyNowEligible": False}, {"id": 987654321, "buyNowEligible": True}]})
|
||||
}
|
||||
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
assert result is False
|
||||
|
||||
# Verify web_request was called with the correct URL (now includes pagination)
|
||||
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
||||
|
||||
# Test pagination: ad found on second page
|
||||
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||
mock_web_request.side_effect = [
|
||||
{
|
||||
"content": json.dumps(
|
||||
{
|
||||
"ads": [{"id": 987654321, "buyNowEligible": False}],
|
||||
"paging": {"pageNum": 1, "last": 2, "next": 2},
|
||||
}
|
||||
)
|
||||
},
|
||||
{
|
||||
"content": json.dumps(
|
||||
{
|
||||
"ads": [{"id": 123456789, "buyNowEligible": True}],
|
||||
"paging": {"pageNum": 2, "last": 2},
|
||||
}
|
||||
)
|
||||
},
|
||||
]
|
||||
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
assert result is True
|
||||
|
||||
mock_web_request.assert_any_await("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
||||
mock_web_request.assert_any_await("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=2")
|
||||
|
||||
# Test when buyNowEligible is missing from the current ad
|
||||
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||
mock_web_request.return_value = {
|
||||
"content": json.dumps(
|
||||
{
|
||||
"ads": [
|
||||
{"id": 123456789}, # No buyNowEligible field
|
||||
{"id": 987654321, "buyNowEligible": True},
|
||||
]
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
assert result is None
|
||||
|
||||
# Verify web_request was called with the correct URL (now includes pagination)
|
||||
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
||||
|
||||
# Test when current ad is not found in the ads list
|
||||
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||
mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321, "buyNowEligible": True}]})}
|
||||
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
assert result is None
|
||||
|
||||
# Verify web_request was called with the correct URL (now includes pagination)
|
||||
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
||||
|
||||
# Test timeout error
|
||||
with patch.object(test_extractor, "web_request", new_callable = AsyncMock, side_effect = TimeoutError) as mock_web_request:
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
assert result is None
|
||||
|
||||
# Verify web_request was called with the correct URL (now includes pagination)
|
||||
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
||||
|
||||
# Test JSON decode error
|
||||
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||
mock_web_request.return_value = {"content": "invalid json"}
|
||||
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
assert result is None
|
||||
|
||||
# Verify web_request was called with the correct URL (now includes pagination)
|
||||
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
||||
|
||||
# Test when ads list is empty
|
||||
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||
mock_web_request.return_value = {"content": json.dumps({"ads": []})}
|
||||
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
assert result is None
|
||||
|
||||
# Verify web_request was called with the correct URL (now includes pagination)
|
||||
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
||||
|
||||
# Test when buyNowEligible is a non-boolean value (string "true")
|
||||
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||
mock_web_request.return_value = {
|
||||
"content": json.dumps({"ads": [{"id": 123456789, "buyNowEligible": "true"}, {"id": 987654321, "buyNowEligible": False}]})
|
||||
}
|
||||
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
assert result is None
|
||||
|
||||
# Verify web_request was called with the correct URL (now includes pagination)
|
||||
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
||||
|
||||
# Test when buyNowEligible is a non-boolean value (integer 1)
|
||||
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||
mock_web_request.return_value = {
|
||||
"content": json.dumps({"ads": [{"id": 123456789, "buyNowEligible": 1}, {"id": 987654321, "buyNowEligible": False}]})
|
||||
}
|
||||
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
assert result is None
|
||||
|
||||
# Verify web_request was called with the correct URL (now includes pagination)
|
||||
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
||||
|
||||
# Test when json_data is not a dict (covers line 622)
|
||||
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||
mock_web_request.return_value = {"content": json.dumps(["not", "a", "dict"])}
|
||||
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
assert result is None
|
||||
|
||||
# Verify web_request was called with the correct URL (now includes pagination)
|
||||
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
||||
|
||||
# Test when json_data is a dict but doesn't have "ads" key (covers line 622)
|
||||
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||
mock_web_request.return_value = {"content": json.dumps({"other_key": "value"})}
|
||||
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
assert result is None
|
||||
|
||||
# Verify web_request was called with the correct URL (now includes pagination)
|
||||
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
||||
|
||||
# Test when ads_list is not a list (covers line 624)
|
||||
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||
mock_web_request.return_value = {"content": json.dumps({"ads": "not a list"})}
|
||||
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
assert result is None
|
||||
|
||||
# Verify web_request was called with the correct URL (now includes pagination)
|
||||
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
||||
assert result is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_sell_directly_page_limit_zero(self, test_extractor:extract_module.AdExtractor, monkeypatch:pytest.MonkeyPatch) -> None:
|
||||
test_extractor.page = MagicMock()
|
||||
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
|
||||
monkeypatch.setattr(extract_module, "_SELL_DIRECTLY_MAX_PAGE_LIMIT", 0)
|
||||
async def test_extract_sell_directly_non_boolean_value(self, test_extractor:extract_module.AdExtractor) -> None:
|
||||
"""Test sell_directly extraction when buyNowEligible is not a boolean."""
|
||||
test_extractor.published_ads_by_id = {123456789: {"id": 123456789, "buyNowEligible": "true"}} # String, not bool
|
||||
|
||||
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
assert result is None
|
||||
mock_web_request.assert_not_awaited()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_sell_directly_paging_key_resolution(self, test_extractor:extract_module.AdExtractor) -> None:
|
||||
test_extractor.page = MagicMock()
|
||||
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
|
||||
|
||||
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||
mock_web_request.return_value = {
|
||||
"content": json.dumps(
|
||||
{
|
||||
"ads": [{"id": 987654321, "buyNowEligible": True}],
|
||||
"paging": {"pageNum": None, "page": "1", "currentPage": None, "last": 0},
|
||||
}
|
||||
)
|
||||
}
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
assert result is None
|
||||
assert result is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_sell_directly_current_page_minus_one(self, test_extractor:extract_module.AdExtractor) -> None:
|
||||
async def test_extract_sell_directly_missing_buy_now_field(self, test_extractor:extract_module.AdExtractor) -> None:
|
||||
"""Test sell_directly extraction when buyNowEligible field is missing."""
|
||||
test_extractor.published_ads_by_id = {123456789: {"id": 123456789, "state": "active"}} # No buyNowEligible
|
||||
|
||||
test_extractor.page = MagicMock()
|
||||
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
|
||||
|
||||
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||
mock_web_request.side_effect = [
|
||||
{"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": 1, "last": 2, "next": 2}})},
|
||||
{"content": json.dumps({"ads": []})},
|
||||
]
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
assert result is None
|
||||
mock_web_request.assert_any_await("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
||||
mock_web_request.assert_any_await("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=2")
|
||||
assert result is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_sell_directly_invalid_page_number_type(self, test_extractor:extract_module.AdExtractor) -> None:
|
||||
async def test_extract_sell_directly_integer_value(self, test_extractor:extract_module.AdExtractor) -> None:
|
||||
"""Test sell_directly extraction when buyNowEligible is an integer (not bool)."""
|
||||
test_extractor.published_ads_by_id = {123456789: {"id": 123456789, "buyNowEligible": 1}} # Integer, not bool
|
||||
|
||||
test_extractor.page = MagicMock()
|
||||
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
|
||||
|
||||
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||
mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": [1], "last": "invalid"}})}
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
assert result is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_sell_directly_float_page_numbers(self, test_extractor:extract_module.AdExtractor) -> None:
|
||||
test_extractor.page = MagicMock()
|
||||
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
|
||||
|
||||
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||
mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": 1.5, "last": 0}})}
|
||||
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
assert result is None
|
||||
|
||||
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||
mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": 2.0, "last": 1}})}
|
||||
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
assert result is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_sell_directly_page_limit(self, test_extractor:extract_module.AdExtractor, monkeypatch:pytest.MonkeyPatch) -> None:
|
||||
test_extractor.page = MagicMock()
|
||||
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
|
||||
monkeypatch.setattr(extract_module, "_SELL_DIRECTLY_MAX_PAGE_LIMIT", 1)
|
||||
|
||||
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||
mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": 1, "last": 2}})}
|
||||
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
assert result is None
|
||||
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_sell_directly_paging_helper_edge_cases(self, test_extractor:extract_module.AdExtractor) -> None:
|
||||
test_extractor.page = MagicMock()
|
||||
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
|
||||
|
||||
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
||||
mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {}})}
|
||||
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
assert result is None
|
||||
assert result is None
|
||||
|
||||
|
||||
class TestAdExtractorCategory:
|
||||
|
||||
@@ -165,8 +165,8 @@ class TestKleinanzeigenBotInitialization:
|
||||
assert update_checker_calls == [(test_bot.config, "xdg")]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_download_ads_passes_installation_mode(self, test_bot:KleinanzeigenBot) -> None:
|
||||
"""Ensure download_ads wires installation mode into AdExtractor."""
|
||||
async def test_download_ads_passes_installation_mode_and_published_ads(self, test_bot:KleinanzeigenBot) -> None:
|
||||
"""Ensure download_ads wires installation mode and published_ads_by_id into AdExtractor."""
|
||||
test_bot.installation_mode = "xdg"
|
||||
test_bot.ads_selector = "all"
|
||||
test_bot.browser = MagicMock()
|
||||
@@ -174,10 +174,18 @@ class TestKleinanzeigenBotInitialization:
|
||||
extractor_mock = MagicMock()
|
||||
extractor_mock.extract_own_ads_urls = AsyncMock(return_value = [])
|
||||
|
||||
with patch("kleinanzeigen_bot.extract.AdExtractor", return_value = extractor_mock) as mock_extractor:
|
||||
mock_published_ads = [{"id": 123, "buyNowEligible": True}, {"id": 456, "buyNowEligible": False}]
|
||||
|
||||
with (
|
||||
patch.object(test_bot, "_fetch_published_ads", new_callable = AsyncMock, return_value = mock_published_ads),
|
||||
patch("kleinanzeigen_bot.extract.AdExtractor", return_value = extractor_mock) as mock_extractor,
|
||||
):
|
||||
await test_bot.download_ads()
|
||||
|
||||
mock_extractor.assert_called_once_with(test_bot.browser, test_bot.config, "xdg")
|
||||
# Verify published_ads_by_id is built correctly and passed to extractor
|
||||
mock_extractor.assert_called_once_with(
|
||||
test_bot.browser, test_bot.config, "xdg", published_ads_by_id = {123: mock_published_ads[0], 456: mock_published_ads[1]}
|
||||
)
|
||||
|
||||
|
||||
class TestKleinanzeigenBotLogging:
|
||||
|
||||
Reference in New Issue
Block a user