fix: JSON API Pagination for >25 Ads (#797)

## ℹ️ Description
*Provide a concise summary of the changes introduced in this pull
request.*

- Link to the related issue(s): Closes #789 (completes the fix started
in #793)
- **Motivation**: Fix JSON API pagination for accounts with >25 ads.
Aligns pagination logic with weidi’s approach (starts at page 1), while
hardening error handling and tests. Based on
https://github.com/weidi/kleinanzeigen-bot/pull/1.

## 📋 Changes Summary

- Added pagination helper to fetch all published ads and use it in
delete/extend/publish/update flows
- Added robust handling for malformed JSON payloads and unexpected ads
types (with translated warnings)
- Improved sell_directly extraction with pagination, bounds checks, and
shared coercion helper
- Added/updated tests for pagination and edge cases; updated assertions
to pytest.fail style

### ⚙️ Type of Change
Select the type(s) of change(s) included in this pull request:
- [x] 🐞 Bug fix (non-breaking change which fixes an issue)
- [ ]  New feature (adds new functionality without breaking existing
usage)
- [ ] 💥 Breaking change (changes that might break existing user setups,
scripts, or configurations)


##  Checklist
Before requesting a review, confirm the following:
- [x] I have reviewed my changes to ensure they meet the project's
standards.
- [x] I have tested my changes and ensured that all tests pass (`pdm run
test:cov:unified`).
- [x] I have formatted the code (`pdm run format`).
- [x] I have verified that linting passes (`pdm run lint`).
- [x] I have updated documentation where necessary.

By submitting this pull request, I confirm that you can use, modify,
copy, and redistribute this contribution, under the terms of your
choice.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **New Features**
* Reliable multi-page fetching for published ads and buy-now eligibility
checks.

* **Bug Fixes**
* Safer pagination with per-page JSON handling, limits and improved
termination diagnostics; ensures pageNum is used when needed.

* **Tests**
* New comprehensive pagination tests and updates to existing tests to
reflect multi-page behavior.

* **Chores**
* Added a utility to safely coerce page numbers; minor utility signature
cleanup.

<sub>✏️ Tip: You can customize this high-level summary in your review
settings.</sub>
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
Jens
2026-01-31 22:17:37 +01:00
committed by GitHub
parent 51a8042cda
commit 96f465d5bc
7 changed files with 651 additions and 118 deletions

View File

@@ -1047,10 +1047,97 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
LOG.debug("No login detected - DOM elements not found and server probe returned %s", state.name)
return False
async def _fetch_published_ads(self) -> list[dict[str, Any]]:
"""Fetch all published ads, handling API pagination.
Returns:
List of all published ads across all pages.
"""
ads:list[dict[str, Any]] = []
page = 1
MAX_PAGE_LIMIT:Final[int] = 100
SNIPPET_LIMIT:Final[int] = 500
while True:
# Safety check: don't paginate beyond reasonable limit
if page > MAX_PAGE_LIMIT:
LOG.warning("Stopping pagination after %s pages to avoid infinite loop", MAX_PAGE_LIMIT)
break
try:
response = await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum={page}")
except TimeoutError as ex:
LOG.warning("Pagination request timed out on page %s: %s", page, ex)
break
content = response.get("content", "")
try:
json_data = json.loads(content)
except json.JSONDecodeError as ex:
if not content:
LOG.warning("Empty JSON response content on page %s", page)
break
snippet = content[:SNIPPET_LIMIT] + ("..." if len(content) > SNIPPET_LIMIT else "")
LOG.warning("Failed to parse JSON response on page %s: %s (content: %s)", page, ex, snippet)
break
if not isinstance(json_data, dict):
snippet = content[:SNIPPET_LIMIT] + ("..." if len(content) > SNIPPET_LIMIT else "")
LOG.warning("Unexpected JSON payload on page %s (content: %s)", page, snippet)
break
page_ads = json_data.get("ads", [])
if not isinstance(page_ads, list):
preview = str(page_ads)
if len(preview) > SNIPPET_LIMIT:
preview = preview[:SNIPPET_LIMIT] + "..."
LOG.warning("Unexpected 'ads' type on page %s: %s value: %s", page, type(page_ads).__name__, preview)
break
ads.extend(page_ads)
paging = json_data.get("paging")
if not isinstance(paging, dict):
LOG.debug("No paging dict found on page %s, assuming single page", page)
break
# Use only real API fields (confirmed from production data)
current_page_num = misc.coerce_page_number(paging.get("pageNum"))
total_pages = misc.coerce_page_number(paging.get("last"))
if current_page_num is None:
LOG.warning("Invalid 'pageNum' in paging info: %s, stopping pagination", paging.get("pageNum"))
break
if total_pages is None:
LOG.debug("No pagination info found, assuming single page")
break
# Stop if reached last page
if current_page_num >= total_pages:
LOG.info("Reached last page %s of %s, stopping pagination", current_page_num, total_pages)
break
# Safety: stop if no ads returned
if len(page_ads) == 0:
LOG.info("No ads found on page %s, stopping pagination", page)
break
LOG.debug("Page %s: fetched %s ads (numFound=%s)", page, len(page_ads), paging.get("numFound"))
# Use API's next field for navigation (more robust than our counter)
next_page = misc.coerce_page_number(paging.get("next"))
if next_page is None:
LOG.warning("Invalid 'next' page value in paging info: %s, stopping pagination", paging.get("next"))
break
page = next_page
return ads
async def delete_ads(self, ad_cfgs:list[tuple[str, Ad, dict[str, Any]]]) -> None:
count = 0
published_ads = json.loads((await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT"))["content"])["ads"]
published_ads = await self._fetch_published_ads()
for ad_file, ad_cfg, _ad_cfg_orig in ad_cfgs:
count += 1
@@ -1094,7 +1181,7 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
async def extend_ads(self, ad_cfgs:list[tuple[str, Ad, dict[str, Any]]]) -> None:
"""Extends ads that are close to expiry."""
# Fetch currently published ads from API
published_ads = json.loads((await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT"))["content"])["ads"]
published_ads = await self._fetch_published_ads()
# Filter ads that need extension
ads_to_extend = []
@@ -1213,7 +1300,7 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
failed_count = 0
max_retries = 3
published_ads = json.loads((await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT"))["content"])["ads"]
published_ads = await self._fetch_published_ads()
for ad_file, ad_cfg, ad_cfg_orig in ad_cfgs:
LOG.info("Processing %s/%s: '%s' from [%s]...", count + 1, len(ad_cfgs), ad_cfg.title, ad_file)
@@ -1561,12 +1648,13 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
"""
count = 0
published_ads = json.loads((await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT"))["content"])["ads"]
published_ads = await self._fetch_published_ads()
for ad_file, ad_cfg, ad_cfg_orig in ad_cfgs:
ad = next((ad for ad in published_ads if ad["id"] == ad_cfg.id), None)
if not ad:
LOG.warning(" -> SKIPPED: ad '%s' (ID: %s) not found in published ads", ad_cfg.title, ad_cfg.id)
continue
LOG.info("Processing %s/%s: '%s' from [%s]...", count + 1, len(ad_cfgs), ad_cfg.title, ad_file)

View File

@@ -25,6 +25,7 @@ __all__ = [
LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
_BREADCRUMB_MIN_DEPTH:Final[int] = 2
_SELL_DIRECTLY_MAX_PAGE_LIMIT:Final[int] = 100
BREADCRUMB_RE = re.compile(r"/c(\d+)")
@@ -525,19 +526,56 @@ class AdExtractor(WebScrapingMixin):
LOG.warning("Could not extract ad ID from URL: %s", self.page.url)
return None
# Fetch the management JSON data using web_request
response = await self.web_request("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
json_data = json.loads(response["content"])
# Fetch the management JSON data using web_request with pagination support
page = 1
# Find the current ad in the ads list
if isinstance(json_data, dict) and "ads" in json_data:
ads_list = json_data["ads"]
if isinstance(ads_list, list):
# Filter ads to find the current ad by ID
current_ad = next((ad for ad in ads_list if ad.get("id") == current_ad_id), None)
if current_ad and "buyNowEligible" in current_ad:
buy_now_eligible = current_ad["buyNowEligible"]
return buy_now_eligible if isinstance(buy_now_eligible, bool) else None
while True:
# Safety check: don't paginate beyond reasonable limit
if page > _SELL_DIRECTLY_MAX_PAGE_LIMIT:
LOG.warning("Stopping pagination after %s pages to avoid infinite loop", _SELL_DIRECTLY_MAX_PAGE_LIMIT)
break
response = await self.web_request(f"https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum={page}")
try:
json_data = json.loads(response["content"])
except json.JSONDecodeError as ex:
LOG.debug("Failed to parse JSON response on page %s: %s", page, ex)
break
# Find the current ad in the ads list
if isinstance(json_data, dict) and "ads" in json_data:
ads_list = json_data["ads"]
if isinstance(ads_list, list):
# Filter ads to find the current ad by ID
current_ad = next((ad for ad in ads_list if ad.get("id") == current_ad_id), None)
if current_ad and "buyNowEligible" in current_ad:
buy_now_eligible = current_ad["buyNowEligible"]
return buy_now_eligible if isinstance(buy_now_eligible, bool) else None
# Check if we need to fetch more pages
paging = json_data.get("paging") if isinstance(json_data, dict) else None
if not isinstance(paging, dict):
break
# Parse pagination info using real API fields
current_page_num = misc.coerce_page_number(paging.get("pageNum"))
total_pages = misc.coerce_page_number(paging.get("last"))
if current_page_num is None:
LOG.warning("Invalid 'pageNum' in paging info: %s, stopping pagination", paging.get("pageNum"))
break
# Stop if we've reached the last page
if total_pages is None or current_page_num >= total_pages:
break
# Use API's next field for navigation (more robust than our counter)
next_page = misc.coerce_page_number(paging.get("next"))
if next_page is None:
LOG.warning("Invalid 'next' page value in paging info: %s, stopping pagination", paging.get("next"))
break
page = next_page
# If the key doesn't exist or ad not found, return None (unknown)
return None

View File

@@ -31,6 +31,18 @@ kleinanzeigen_bot/__init__.py:
"App version: %s": "App Version: %s"
"Python version: %s": "Python Version: %s"
_fetch_published_ads:
"Empty JSON response content on page %s": "Leerer JSON-Antwortinhalt auf Seite %s"
"Failed to parse JSON response on page %s: %s (content: %s)": "Fehler beim Parsen der JSON-Antwort auf Seite %s: %s (Inhalt: %s)"
"Stopping pagination after %s pages to avoid infinite loop": "Stoppe die Seitenaufschaltung nach %s Seiten, um eine Endlosschleife zu vermeiden"
"Pagination request timed out on page %s: %s": "Zeitueberschreitung bei der Seitenabfrage auf Seite %s: %s"
"Unexpected JSON payload on page %s (content: %s)": "Unerwartete JSON-Antwort auf Seite %s (Inhalt: %s)"
"Unexpected 'ads' type on page %s: %s value: %s": "Unerwarteter 'ads'-Typ auf Seite %s: %s Wert: %s"
"Reached last page %s of %s, stopping pagination": "Letzte Seite %s von %s erreicht, beende Paginierung"
"No ads found on page %s, stopping pagination": "Keine Anzeigen auf Seite %s gefunden, beende Paginierung"
"Invalid 'next' page value in paging info: %s, stopping pagination": "Ungültiger 'next'-Seitenwert in Paginierungsinfo: %s, beende Paginierung"
"Invalid 'pageNum' in paging info: %s, stopping pagination": "Ungültiger 'pageNum'-Wert in Paginierungsinfo: %s, beende Paginierung"
__check_ad_changed:
"Hash comparison for [%s]:": "Hash-Vergleich für [%s]:"
" Stored hash: %s": " Gespeicherter Hash: %s"
@@ -162,6 +174,7 @@ kleinanzeigen_bot/__init__.py:
update_ads:
"Processing %s/%s: '%s' from [%s]...": "Verarbeite %s/%s: '%s' von [%s]..."
"Skipping because ad is reserved": "Überspringen, da Anzeige reserviert ist"
" -> SKIPPED: ad '%s' (ID: %s) not found in published ads": " -> ÜBERSPRUNGEN: Anzeige '%s' (ID: %s) nicht in veröffentlichten Anzeigen gefunden"
"DONE: updated %s": "FERTIG: %s aktualisiert"
"ad": "Anzeige"
@@ -299,6 +312,9 @@ kleinanzeigen_bot/extract.py:
_extract_sell_directly_from_ad_page:
"Could not extract ad ID from URL: %s": "Konnte Anzeigen-ID nicht aus der URL extrahieren: %s"
"Stopping pagination after %s pages to avoid infinite loop": "Stoppe die Seitenaufschaltung nach %s Seiten, um eine Endlosschleife zu vermeiden"
"Invalid 'next' page value in paging info: %s, stopping pagination": "Ungültiger 'next'-Seitenwert in Paginierungsinfo: %s, beende Paginierung"
"Invalid 'pageNum' in paging info: %s, stopping pagination": "Ungültiger 'pageNum'-Wert in Paginierungsinfo: %s, beende Paginierung"
#################################################
kleinanzeigen_bot/utils/i18n.py:

View File

@@ -16,12 +16,55 @@ from . import i18n
T = TypeVar("T")
def coerce_page_number(value:Any) -> int | None:
"""Safely coerce a value to int or return None if conversion fails.
Whole-number floats are accepted; non-integer floats are rejected.
Args:
value: Value to coerce to int (can be int, str, float, or any type)
Returns:
int if value can be safely coerced, None otherwise
Examples:
>>> coerce_page_number(1)
1
>>> coerce_page_number("2")
2
>>> coerce_page_number(3.0)
3
>>> coerce_page_number(3.5) is None
True
>>> coerce_page_number(True) is None # Not 1!
True
>>> coerce_page_number(None) is None
True
>>> coerce_page_number("invalid") is None
True
>>> coerce_page_number([1, 2, 3]) is None
True
"""
if value is None:
return None
if isinstance(value, bool):
return None
if isinstance(value, float):
if value.is_integer():
return int(value)
return None
try:
return int(value)
except (TypeError, ValueError):
return None
def ensure(
condition:Any | bool | Callable[[], bool], # noqa: FBT001 Boolean-typed positional argument in function definition
error_message:str,
timeout:float = 5,
poll_frequency:float = 0.5
) -> None:
condition:Any | bool | Callable[[], bool], # noqa: FBT001 Boolean-typed positional argument in function definition
error_message:str,
timeout:float = 5,
poll_frequency:float = 0.5,
) -> None:
"""
Ensure a condition is true, retrying until timeout.
@@ -152,12 +195,7 @@ def parse_decimal(number:float | int | str) -> decimal.Decimal:
raise decimal.DecimalException(f"Invalid number format: {number}") from ex
def parse_datetime(
date:datetime | str | None,
*,
add_timezone_if_missing:bool = True,
use_local_timezone:bool = True
) -> datetime | None:
def parse_datetime(date:datetime | str | None, *, add_timezone_if_missing:bool = True, use_local_timezone:bool = True) -> datetime | None:
"""
Parses a datetime object or ISO-formatted string.
@@ -184,10 +222,7 @@ def parse_datetime(
dt = date if isinstance(date, datetime) else datetime.fromisoformat(date)
if dt.tzinfo is None and add_timezone_if_missing:
dt = (
dt.astimezone() if use_local_timezone
else dt.replace(tzinfo = timezone.utc)
)
dt = dt.astimezone() if use_local_timezone else dt.replace(tzinfo = timezone.utc)
return dt

View File

@@ -10,7 +10,7 @@ from urllib.error import URLError
import pytest
from kleinanzeigen_bot.extract import AdExtractor
import kleinanzeigen_bot.extract as extract_module
from kleinanzeigen_bot.model.ad_model import AdPartial, ContactPartial
from kleinanzeigen_bot.model.config_model import Config, DownloadConfig
from kleinanzeigen_bot.utils.web_scraping_mixin import Browser, By, Element
@@ -39,22 +39,22 @@ class _TestCaseDict(TypedDict): # noqa: PYI049 Private TypedDict `...` is never
@pytest.fixture
def test_extractor(browser_mock:MagicMock, test_bot_config:Config) -> AdExtractor:
"""Provides a fresh AdExtractor instance for testing.
def test_extractor(browser_mock:MagicMock, test_bot_config:Config) -> extract_module.AdExtractor:
"""Provides a fresh extract_module.AdExtractor instance for testing.
Dependencies:
- browser_mock: Used to mock browser interactions
- test_bot_config: Used to initialize the extractor with a valid configuration
"""
return AdExtractor(browser_mock, test_bot_config)
return extract_module.AdExtractor(browser_mock, test_bot_config)
class TestAdExtractorBasics:
"""Basic synchronous tests for AdExtractor."""
"""Basic synchronous tests for extract_module.AdExtractor."""
def test_constructor(self, browser_mock:MagicMock, test_bot_config:Config) -> None:
"""Test the constructor of AdExtractor"""
extractor = AdExtractor(browser_mock, test_bot_config)
"""Test the constructor of extract_module.AdExtractor"""
extractor = extract_module.AdExtractor(browser_mock, test_bot_config)
assert extractor.browser == browser_mock
assert extractor.config == test_bot_config
@@ -67,7 +67,7 @@ class TestAdExtractorBasics:
("https://www.kleinanzeigen.de/invalid-url", -1),
],
)
def test_extract_ad_id_from_ad_url(self, test_extractor:AdExtractor, url:str, expected_id:int) -> None:
def test_extract_ad_id_from_ad_url(self, test_extractor:extract_module.AdExtractor, url:str, expected_id:int) -> None:
"""Test extraction of ad ID from different URL formats."""
assert test_extractor.extract_ad_id_from_ad_url(url) == expected_id
@@ -167,7 +167,7 @@ class TestAdExtractorBasics:
patch("kleinanzeigen_bot.extract.open", mock_open()),
patch("kleinanzeigen_bot.extract.shutil.copyfileobj"),
):
result = AdExtractor._download_and_save_image_sync("http://example.com/image.jpg", str(test_dir), "test_", 1)
result = extract_module.AdExtractor._download_and_save_image_sync("http://example.com/image.jpg", str(test_dir), "test_", 1)
assert result is not None
assert result.endswith((".jpe", ".jpeg", ".jpg"))
@@ -176,7 +176,7 @@ class TestAdExtractorBasics:
def test_download_and_save_image_sync_failure(self, tmp_path:Path) -> None:
"""Test _download_and_save_image_sync with download failure."""
with patch("kleinanzeigen_bot.extract.urllib_request.urlopen", side_effect = URLError("Network error")):
result = AdExtractor._download_and_save_image_sync("http://example.com/image.jpg", str(tmp_path), "test_", 1)
result = extract_module.AdExtractor._download_and_save_image_sync("http://example.com/image.jpg", str(tmp_path), "test_", 1)
assert result is None
@@ -196,7 +196,9 @@ class TestAdExtractorPricing:
)
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_pricing_info(self, test_extractor:AdExtractor, price_text:str, expected_price:int | None, expected_type:str) -> None:
async def test_extract_pricing_info(
self, test_extractor:extract_module.AdExtractor, price_text:str, expected_price:int | None, expected_type:str
) -> None:
"""Test price extraction with different formats"""
with patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = price_text):
price, price_type = await test_extractor._extract_pricing_info_from_ad_page()
@@ -205,7 +207,7 @@ class TestAdExtractorPricing:
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_pricing_info_timeout(self, test_extractor:AdExtractor) -> None:
async def test_extract_pricing_info_timeout(self, test_extractor:extract_module.AdExtractor) -> None:
"""Test price extraction when element is not found"""
with patch.object(test_extractor, "web_text", new_callable = AsyncMock, side_effect = TimeoutError):
price, price_type = await test_extractor._extract_pricing_info_from_ad_page()
@@ -226,7 +228,9 @@ class TestAdExtractorShipping:
)
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_shipping_info(self, test_extractor:AdExtractor, shipping_text:str, expected_type:str, expected_cost:float | None) -> None:
async def test_extract_shipping_info(
self, test_extractor:extract_module.AdExtractor, shipping_text:str, expected_type:str, expected_cost:float | None
) -> None:
"""Test shipping info extraction with different text formats."""
with (
patch.object(test_extractor, "page", MagicMock()),
@@ -250,7 +254,7 @@ class TestAdExtractorShipping:
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_shipping_info_with_options(self, test_extractor:AdExtractor) -> None:
async def test_extract_shipping_info_with_options(self, test_extractor:extract_module.AdExtractor) -> None:
"""Test shipping info extraction with shipping options."""
shipping_response = {
"content": json.dumps({"data": {"shippingOptionsResponse": {"options": [{"id": "DHL_001", "priceInEuroCent": 549, "packageSize": "SMALL"}]}}})
@@ -269,7 +273,7 @@ class TestAdExtractorShipping:
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_shipping_info_with_all_matching_options(self, test_extractor:AdExtractor) -> None:
async def test_extract_shipping_info_with_all_matching_options(self, test_extractor:extract_module.AdExtractor) -> None:
"""Test shipping info extraction with all matching options enabled."""
shipping_response = {
"content": json.dumps(
@@ -306,7 +310,7 @@ class TestAdExtractorShipping:
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_shipping_info_with_all_matching_options_no_match(self, test_extractor:AdExtractor) -> None:
async def test_extract_shipping_info_with_all_matching_options_no_match(self, test_extractor:extract_module.AdExtractor) -> None:
"""Test shipping extraction when include-all is enabled but no option matches the price."""
shipping_response = {
"content": json.dumps(
@@ -338,7 +342,7 @@ class TestAdExtractorShipping:
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_shipping_info_with_excluded_options(self, test_extractor:AdExtractor) -> None:
async def test_extract_shipping_info_with_excluded_options(self, test_extractor:extract_module.AdExtractor) -> None:
"""Test shipping info extraction with excluded options."""
shipping_response = {
"content": json.dumps(
@@ -375,7 +379,7 @@ class TestAdExtractorShipping:
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_shipping_info_with_excluded_matching_option(self, test_extractor:AdExtractor) -> None:
async def test_extract_shipping_info_with_excluded_matching_option(self, test_extractor:extract_module.AdExtractor) -> None:
"""Test shipping info extraction when the matching option is excluded."""
shipping_response = {
"content": json.dumps(
@@ -408,7 +412,7 @@ class TestAdExtractorShipping:
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_shipping_info_with_no_matching_option(self, test_extractor:AdExtractor) -> None:
async def test_extract_shipping_info_with_no_matching_option(self, test_extractor:extract_module.AdExtractor) -> None:
"""Test shipping info extraction when price exists but NO matching option in API response."""
shipping_response = {
"content": json.dumps(
@@ -438,7 +442,7 @@ class TestAdExtractorShipping:
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_shipping_info_timeout(self, test_extractor:AdExtractor) -> None:
async def test_extract_shipping_info_timeout(self, test_extractor:extract_module.AdExtractor) -> None:
"""Test shipping info extraction when shipping element is missing (TimeoutError)."""
with (
patch.object(test_extractor, "page", MagicMock()),
@@ -455,7 +459,7 @@ class TestAdExtractorNavigation:
"""Tests for navigation related functionality."""
@pytest.mark.asyncio
async def test_navigate_to_ad_page_with_url(self, test_extractor:AdExtractor) -> None:
async def test_navigate_to_ad_page_with_url(self, test_extractor:extract_module.AdExtractor) -> None:
"""Test navigation to ad page using a URL."""
page_mock = AsyncMock()
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
@@ -470,7 +474,7 @@ class TestAdExtractorNavigation:
mock_web_open.assert_called_with("https://www.kleinanzeigen.de/s-anzeige/test/12345")
@pytest.mark.asyncio
async def test_navigate_to_ad_page_with_id(self, test_extractor:AdExtractor) -> None:
async def test_navigate_to_ad_page_with_id(self, test_extractor:extract_module.AdExtractor) -> None:
"""Test navigation to ad page using an ID."""
ad_id = 12345
page_mock = AsyncMock()
@@ -496,7 +500,7 @@ class TestAdExtractorNavigation:
popup_close_mock.click.assert_awaited_once()
@pytest.mark.asyncio
async def test_navigate_to_ad_page_with_popup(self, test_extractor:AdExtractor) -> None:
async def test_navigate_to_ad_page_with_popup(self, test_extractor:extract_module.AdExtractor) -> None:
"""Test navigation to ad page with popup handling."""
page_mock = AsyncMock()
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
@@ -518,7 +522,7 @@ class TestAdExtractorNavigation:
mock_web_click.assert_called_with(By.CLASS_NAME, "mfp-close")
@pytest.mark.asyncio
async def test_navigate_to_ad_page_invalid_id(self, test_extractor:AdExtractor) -> None:
async def test_navigate_to_ad_page_invalid_id(self, test_extractor:extract_module.AdExtractor) -> None:
"""Test navigation to ad page with invalid ID."""
page_mock = AsyncMock()
page_mock.url = "https://www.kleinanzeigen.de/s-suchen.html?k0"
@@ -538,7 +542,7 @@ class TestAdExtractorNavigation:
assert result is False
@pytest.mark.asyncio
async def test_extract_own_ads_urls(self, test_extractor:AdExtractor) -> None:
async def test_extract_own_ads_urls(self, test_extractor:extract_module.AdExtractor) -> None:
"""Test extraction of own ads URLs - basic test."""
with (
patch.object(test_extractor, "web_open", new_callable = AsyncMock),
@@ -608,7 +612,7 @@ class TestAdExtractorNavigation:
)
@pytest.mark.asyncio
async def test_extract_own_ads_urls_paginates_with_enabled_next_button(self, test_extractor:AdExtractor) -> None:
async def test_extract_own_ads_urls_paginates_with_enabled_next_button(self, test_extractor:extract_module.AdExtractor) -> None:
"""Ensure the paginator clicks the first enabled next button and advances."""
ad_list_container_mock = MagicMock()
pagination_section_mock = MagicMock()
@@ -663,7 +667,7 @@ class TestAdExtractorNavigation:
next_button_enabled.click.assert_awaited() # triggered once during navigation
@pytest.mark.asyncio
async def test_extract_own_ads_urls_timeout_in_callback(self, test_extractor:AdExtractor) -> None:
async def test_extract_own_ads_urls_timeout_in_callback(self, test_extractor:extract_module.AdExtractor) -> None:
"""Test that TimeoutError in extract_page_refs callback stops pagination."""
with (
patch.object(test_extractor, "web_open", new_callable = AsyncMock),
@@ -699,7 +703,7 @@ class TestAdExtractorNavigation:
assert refs == []
@pytest.mark.asyncio
async def test_extract_own_ads_urls_generic_exception_in_callback(self, test_extractor:AdExtractor) -> None:
async def test_extract_own_ads_urls_generic_exception_in_callback(self, test_extractor:extract_module.AdExtractor) -> None:
"""Test that generic Exception in extract_page_refs callback continues pagination."""
with (
patch.object(test_extractor, "web_open", new_callable = AsyncMock),
@@ -742,15 +746,9 @@ class TestAdExtractorContent:
# pylint: disable=protected-access
@pytest.fixture
def extractor_with_config(self) -> AdExtractor:
"""Create extractor with specific config for testing prefix/suffix handling."""
browser_mock = MagicMock(spec = Browser)
return AdExtractor(browser_mock, Config()) # Empty config, will be overridden in tests
@pytest.mark.asyncio
async def test_extract_description_with_affixes(
self, test_extractor:AdExtractor, description_test_cases:list[tuple[dict[str, Any], str, str]], test_bot_config:Config
self, test_extractor:extract_module.AdExtractor, description_test_cases:list[tuple[dict[str, Any], str, str]], test_bot_config:Config
) -> None:
"""Test extraction of description with various prefix/suffix configurations."""
# Mock the page
@@ -783,7 +781,7 @@ class TestAdExtractorContent:
assert info.description == raw_description
@pytest.mark.asyncio
async def test_extract_description_with_affixes_timeout(self, test_extractor:AdExtractor) -> None:
async def test_extract_description_with_affixes_timeout(self, test_extractor:extract_module.AdExtractor) -> None:
"""Test handling of timeout when extracting description."""
# Mock the page
page_mock = MagicMock()
@@ -816,7 +814,7 @@ class TestAdExtractorContent:
pass
@pytest.mark.asyncio
async def test_extract_description_with_affixes_no_affixes(self, test_extractor:AdExtractor) -> None:
async def test_extract_description_with_affixes_no_affixes(self, test_extractor:extract_module.AdExtractor) -> None:
"""Test extraction of description without any affixes in config."""
# Mock the page
page_mock = MagicMock()
@@ -846,7 +844,7 @@ class TestAdExtractorContent:
assert info.description == raw_description
@pytest.mark.asyncio
async def test_extract_sell_directly(self, test_extractor:AdExtractor) -> None:
async def test_extract_sell_directly(self, test_extractor:extract_module.AdExtractor) -> None:
"""Test extraction of sell directly option."""
# Mock the page URL to extract the ad ID
test_extractor.page = MagicMock()
@@ -856,6 +854,8 @@ class TestAdExtractorContent:
test_extractor.page.url = "https://www.kleinanzeigen.de/invalid-url"
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
result = await test_extractor._extract_sell_directly_from_ad_page()
# When pageNum is missing from the API response, coerce_page_number() returns None,
# causing the pagination loop to break and return None without making a web_request call.
assert result is None
# Verify web_request was NOT called when URL is invalid
@@ -873,8 +873,8 @@ class TestAdExtractorContent:
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is True
# Verify web_request was called with the correct URL
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
# Verify web_request was called with the correct URL (now includes pagination)
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
# Test successful extraction with buyNowEligible = false
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
@@ -885,8 +885,35 @@ class TestAdExtractorContent:
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is False
# Verify web_request was called with the correct URL
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
# Verify web_request was called with the correct URL (now includes pagination)
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
# Test pagination: ad found on second page
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.side_effect = [
{
"content": json.dumps(
{
"ads": [{"id": 987654321, "buyNowEligible": False}],
"paging": {"pageNum": 1, "last": 2, "next": 2},
}
)
},
{
"content": json.dumps(
{
"ads": [{"id": 123456789, "buyNowEligible": True}],
"paging": {"pageNum": 2, "last": 2},
}
)
},
]
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is True
mock_web_request.assert_any_await("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
mock_web_request.assert_any_await("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=2")
# Test when buyNowEligible is missing from the current ad
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
@@ -904,8 +931,8 @@ class TestAdExtractorContent:
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
# Verify web_request was called with the correct URL (now includes pagination)
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
# Test when current ad is not found in the ads list
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
@@ -914,16 +941,16 @@ class TestAdExtractorContent:
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
# Verify web_request was called with the correct URL (now includes pagination)
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
# Test timeout error
with patch.object(test_extractor, "web_request", new_callable = AsyncMock, side_effect = TimeoutError) as mock_web_request:
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
# Verify web_request was called with the correct URL (now includes pagination)
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
# Test JSON decode error
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
@@ -932,8 +959,8 @@ class TestAdExtractorContent:
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
# Verify web_request was called with the correct URL (now includes pagination)
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
# Test when ads list is empty
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
@@ -942,8 +969,8 @@ class TestAdExtractorContent:
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
# Verify web_request was called with the correct URL (now includes pagination)
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
# Test when buyNowEligible is a non-boolean value (string "true")
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
@@ -954,8 +981,8 @@ class TestAdExtractorContent:
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
# Verify web_request was called with the correct URL (now includes pagination)
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
# Test when buyNowEligible is a non-boolean value (integer 1)
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
@@ -966,8 +993,8 @@ class TestAdExtractorContent:
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
# Verify web_request was called with the correct URL (now includes pagination)
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
# Test when json_data is not a dict (covers line 622)
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
@@ -976,8 +1003,8 @@ class TestAdExtractorContent:
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
# Verify web_request was called with the correct URL (now includes pagination)
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
# Test when json_data is a dict but doesn't have "ads" key (covers line 622)
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
@@ -986,8 +1013,8 @@ class TestAdExtractorContent:
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
# Verify web_request was called with the correct URL (now includes pagination)
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
# Test when ads_list is not a list (covers line 624)
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
@@ -996,22 +1023,119 @@ class TestAdExtractorContent:
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
# Verify web_request was called with the correct URL
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
# Verify web_request was called with the correct URL (now includes pagination)
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
@pytest.mark.asyncio
async def test_extract_sell_directly_page_limit_zero(self, test_extractor:extract_module.AdExtractor, monkeypatch:pytest.MonkeyPatch) -> None:
test_extractor.page = MagicMock()
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
monkeypatch.setattr(extract_module, "_SELL_DIRECTLY_MAX_PAGE_LIMIT", 0)
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
mock_web_request.assert_not_awaited()
@pytest.mark.asyncio
async def test_extract_sell_directly_paging_key_resolution(self, test_extractor:extract_module.AdExtractor) -> None:
test_extractor.page = MagicMock()
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {
"content": json.dumps(
{
"ads": [{"id": 987654321, "buyNowEligible": True}],
"paging": {"pageNum": None, "page": "1", "currentPage": None, "last": 0},
}
)
}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
@pytest.mark.asyncio
async def test_extract_sell_directly_current_page_minus_one(self, test_extractor:extract_module.AdExtractor) -> None:
test_extractor.page = MagicMock()
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.side_effect = [
{"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": 1, "last": 2, "next": 2}})},
{"content": json.dumps({"ads": []})},
]
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
mock_web_request.assert_any_await("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
mock_web_request.assert_any_await("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=2")
@pytest.mark.asyncio
async def test_extract_sell_directly_invalid_page_number_type(self, test_extractor:extract_module.AdExtractor) -> None:
test_extractor.page = MagicMock()
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": [1], "last": "invalid"}})}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
@pytest.mark.asyncio
async def test_extract_sell_directly_float_page_numbers(self, test_extractor:extract_module.AdExtractor) -> None:
test_extractor.page = MagicMock()
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": 1.5, "last": 0}})}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": 2.0, "last": 1}})}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
@pytest.mark.asyncio
async def test_extract_sell_directly_page_limit(self, test_extractor:extract_module.AdExtractor, monkeypatch:pytest.MonkeyPatch) -> None:
test_extractor.page = MagicMock()
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
monkeypatch.setattr(extract_module, "_SELL_DIRECTLY_MAX_PAGE_LIMIT", 1)
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": 1, "last": 2}})}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
@pytest.mark.asyncio
async def test_extract_sell_directly_paging_helper_edge_cases(self, test_extractor:extract_module.AdExtractor) -> None:
test_extractor.page = MagicMock()
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {}})}
result = await test_extractor._extract_sell_directly_from_ad_page()
assert result is None
class TestAdExtractorCategory:
"""Tests for category extraction functionality."""
@pytest.fixture
def extractor(self, test_bot_config:Config) -> AdExtractor:
def extractor(self, test_bot_config:Config) -> extract_module.AdExtractor:
browser_mock = MagicMock(spec = Browser)
config = test_bot_config.with_values({"ad_defaults": {"description": {"prefix": "Test Prefix", "suffix": "Test Suffix"}}})
return AdExtractor(browser_mock, config)
return extract_module.AdExtractor(browser_mock, config)
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_category(self, extractor:AdExtractor) -> None:
async def test_extract_category(self, extractor:extract_module.AdExtractor) -> None:
"""Test category extraction from breadcrumb."""
category_line = MagicMock()
first_part = MagicMock()
@@ -1031,7 +1155,7 @@ class TestAdExtractorCategory:
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_category_single_identifier(self, extractor:AdExtractor) -> None:
async def test_extract_category_single_identifier(self, extractor:extract_module.AdExtractor) -> None:
"""Test category extraction when only a single breadcrumb code exists."""
category_line = MagicMock()
first_part = MagicMock()
@@ -1049,7 +1173,7 @@ class TestAdExtractorCategory:
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_category_fallback_to_legacy_selectors(self, extractor:AdExtractor, caplog:pytest.LogCaptureFixture) -> None:
async def test_extract_category_fallback_to_legacy_selectors(self, extractor:extract_module.AdExtractor, caplog:pytest.LogCaptureFixture) -> None:
"""Test category extraction when breadcrumb links are not available and legacy selectors are used."""
category_line = MagicMock()
first_part = MagicMock()
@@ -1075,7 +1199,7 @@ class TestAdExtractorCategory:
mock_web_find_all.assert_awaited_once_with(By.CSS_SELECTOR, "a", parent = category_line)
@pytest.mark.asyncio
async def test_extract_category_legacy_selectors_timeout(self, extractor:AdExtractor, caplog:pytest.LogCaptureFixture) -> None:
async def test_extract_category_legacy_selectors_timeout(self, extractor:extract_module.AdExtractor, caplog:pytest.LogCaptureFixture) -> None:
"""Ensure fallback timeout logs the error and re-raises with translated message."""
category_line = MagicMock()
@@ -1096,7 +1220,7 @@ class TestAdExtractorCategory:
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_special_attributes_empty(self, extractor:AdExtractor) -> None:
async def test_extract_special_attributes_empty(self, extractor:extract_module.AdExtractor) -> None:
"""Test extraction of special attributes when empty."""
with patch.object(extractor, "web_execute", new_callable = AsyncMock) as mock_web_execute:
mock_web_execute.return_value = {"universalAnalyticsOpts": {"dimensions": {"ad_attributes": ""}}}
@@ -1105,7 +1229,7 @@ class TestAdExtractorCategory:
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_special_attributes_not_empty(self, extractor:AdExtractor) -> None:
async def test_extract_special_attributes_not_empty(self, extractor:extract_module.AdExtractor) -> None:
"""Test extraction of special attributes when not empty."""
special_atts = {
@@ -1129,7 +1253,7 @@ class TestAdExtractorCategory:
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_special_attributes_missing_ad_attributes(self, extractor:AdExtractor) -> None:
async def test_extract_special_attributes_missing_ad_attributes(self, extractor:extract_module.AdExtractor) -> None:
"""Test extraction of special attributes when ad_attributes key is missing."""
belen_conf:dict[str, Any] = {
"universalAnalyticsOpts": {
@@ -1146,14 +1270,14 @@ class TestAdExtractorContact:
"""Tests for contact information extraction."""
@pytest.fixture
def extractor(self, test_bot_config:Config) -> AdExtractor:
def extractor(self, test_bot_config:Config) -> extract_module.AdExtractor:
browser_mock = MagicMock(spec = Browser)
config = test_bot_config.with_values({"ad_defaults": {"description": {"prefix": "Test Prefix", "suffix": "Test Suffix"}}})
return AdExtractor(browser_mock, config)
return extract_module.AdExtractor(browser_mock, config)
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_contact_info(self, extractor:AdExtractor) -> None:
async def test_extract_contact_info(self, extractor:extract_module.AdExtractor) -> None:
"""Test extraction of contact information."""
with (
patch.object(extractor, "page", MagicMock()),
@@ -1181,7 +1305,7 @@ class TestAdExtractorContact:
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_contact_info_timeout(self, extractor:AdExtractor) -> None:
async def test_extract_contact_info_timeout(self, extractor:extract_module.AdExtractor) -> None:
"""Test contact info extraction when elements are not found."""
with (
patch.object(extractor, "page", MagicMock()),
@@ -1193,7 +1317,7 @@ class TestAdExtractorContact:
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_contact_info_with_phone(self, extractor:AdExtractor) -> None:
async def test_extract_contact_info_with_phone(self, extractor:extract_module.AdExtractor) -> None:
"""Test extraction of contact information including phone number."""
with (
patch.object(extractor, "page", MagicMock()),
@@ -1217,13 +1341,13 @@ class TestAdExtractorDownload:
"""Tests for download functionality."""
@pytest.fixture
def extractor(self, test_bot_config:Config) -> AdExtractor:
def extractor(self, test_bot_config:Config) -> extract_module.AdExtractor:
browser_mock = MagicMock(spec = Browser)
config = test_bot_config.with_values({"ad_defaults": {"description": {"prefix": "Test Prefix", "suffix": "Test Suffix"}}})
return AdExtractor(browser_mock, config)
return extract_module.AdExtractor(browser_mock, config)
@pytest.mark.asyncio
async def test_download_ad(self, extractor:AdExtractor, tmp_path:Path) -> None:
async def test_download_ad(self, extractor:extract_module.AdExtractor, tmp_path:Path) -> None:
"""Test downloading an ad - directory creation and saving ad data."""
# Use tmp_path for OS-agnostic path handling
download_base = tmp_path / "downloaded-ads"
@@ -1263,7 +1387,7 @@ class TestAdExtractorDownload:
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_download_images_no_images(self, extractor:AdExtractor) -> None:
async def test_download_images_no_images(self, extractor:extract_module.AdExtractor) -> None:
"""Test image download when no images are found."""
with patch.object(extractor, "web_find", new_callable = AsyncMock, side_effect = TimeoutError):
image_paths = await extractor._download_images_from_ad_page("/some/dir", 12345)
@@ -1271,7 +1395,7 @@ class TestAdExtractorDownload:
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_download_images_with_none_url(self, extractor:AdExtractor) -> None:
async def test_download_images_with_none_url(self, extractor:extract_module.AdExtractor) -> None:
"""Test image download when some images have None as src attribute."""
image_box_mock = MagicMock()
@@ -1285,7 +1409,7 @@ class TestAdExtractorDownload:
with (
patch.object(extractor, "web_find", new_callable = AsyncMock, return_value = image_box_mock),
patch.object(extractor, "web_find_all", new_callable = AsyncMock, return_value = [img_with_url, img_without_url]),
patch.object(AdExtractor, "_download_and_save_image_sync", return_value = "/some/dir/ad_12345__img1.jpg"),
patch.object(extract_module.AdExtractor, "_download_and_save_image_sync", return_value = "/some/dir/ad_12345__img1.jpg"),
):
image_paths = await extractor._download_images_from_ad_page("/some/dir", 12345)
@@ -1295,7 +1419,7 @@ class TestAdExtractorDownload:
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_ad_page_info_with_directory_handling_final_dir_exists(self, extractor:AdExtractor, tmp_path:Path) -> None:
async def test_extract_ad_page_info_with_directory_handling_final_dir_exists(self, extractor:extract_module.AdExtractor, tmp_path:Path) -> None:
"""Test directory handling when final_dir already exists - it should be deleted."""
base_dir = tmp_path / "downloaded-ads"
base_dir.mkdir()
@@ -1356,7 +1480,7 @@ class TestAdExtractorDownload:
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_ad_page_info_with_directory_handling_rename_enabled(self, extractor:AdExtractor, tmp_path:Path) -> None:
async def test_extract_ad_page_info_with_directory_handling_rename_enabled(self, extractor:extract_module.AdExtractor, tmp_path:Path) -> None:
"""Test directory handling when temp_dir exists and rename_existing_folders is True."""
base_dir = tmp_path / "downloaded-ads"
base_dir.mkdir()
@@ -1422,7 +1546,7 @@ class TestAdExtractorDownload:
@pytest.mark.asyncio
# pylint: disable=protected-access
async def test_extract_ad_page_info_with_directory_handling_use_existing(self, extractor:AdExtractor, tmp_path:Path) -> None:
async def test_extract_ad_page_info_with_directory_handling_use_existing(self, extractor:extract_module.AdExtractor, tmp_path:Path) -> None:
"""Test directory handling when temp_dir exists and rename_existing_folders is False (default)."""
base_dir = tmp_path / "downloaded-ads"
base_dir.mkdir()
@@ -1485,7 +1609,7 @@ class TestAdExtractorDownload:
assert ad_cfg.title == "Test Title"
@pytest.mark.asyncio
async def test_download_ad_with_umlauts_in_title(self, extractor:AdExtractor, tmp_path:Path) -> None:
async def test_download_ad_with_umlauts_in_title(self, extractor:extract_module.AdExtractor, tmp_path:Path) -> None:
"""Test cross-platform Unicode handling for ad titles with umlauts (issue #728).
Verifies that:

View File

@@ -676,7 +676,8 @@ class TestKleinanzeigenBotBasics:
):
await test_bot.publish_ads(ad_cfgs)
web_request_mock.assert_awaited_once_with(f"{test_bot.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT")
# With pagination, the URL now includes pageNum parameter
web_request_mock.assert_awaited_once_with(f"{test_bot.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
publish_ad_mock.assert_awaited_once_with("ad.yaml", ad_cfgs[0][1], {}, [], AdUpdateStrategy.REPLACE)
web_await_mock.assert_awaited_once()
delete_ad_mock.assert_awaited_once_with(ad_cfgs[0][1], [], delete_old_ads_by_title = False)

View File

@@ -0,0 +1,231 @@
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""Tests for JSON API pagination helper methods."""
import json
from unittest.mock import AsyncMock, patch
import pytest
from kleinanzeigen_bot import KleinanzeigenBot
from kleinanzeigen_bot.utils import misc
@pytest.mark.unit
class TestJSONPagination:
"""Tests for _coerce_page_number and _fetch_published_ads methods."""
@pytest.fixture
def bot(self) -> KleinanzeigenBot:
return KleinanzeigenBot()
def test_coerce_page_number_with_valid_int(self) -> None:
"""Test that valid integers are returned as-is."""
result = misc.coerce_page_number(1)
if result != 1:
pytest.fail(f"_coerce_page_number(1) expected 1, got {result}")
result = misc.coerce_page_number(0)
if result != 0:
pytest.fail(f"_coerce_page_number(0) expected 0, got {result}")
result = misc.coerce_page_number(42)
if result != 42:
pytest.fail(f"_coerce_page_number(42) expected 42, got {result}")
def test_coerce_page_number_with_string_int(self) -> None:
"""Test that string integers are converted to int."""
result = misc.coerce_page_number("1")
if result != 1:
pytest.fail(f"_coerce_page_number('1') expected 1, got {result}")
result = misc.coerce_page_number("0")
if result != 0:
pytest.fail(f"_coerce_page_number('0') expected 0, got {result}")
result = misc.coerce_page_number("42")
if result != 42:
pytest.fail(f"_coerce_page_number('42') expected 42, got {result}")
def test_coerce_page_number_with_none(self) -> None:
"""Test that None returns None."""
result = misc.coerce_page_number(None)
if result is not None:
pytest.fail(f"_coerce_page_number(None) expected None, got {result}")
def test_coerce_page_number_with_invalid_types(self) -> None:
"""Test that invalid types return None."""
result = misc.coerce_page_number("invalid")
if result is not None:
pytest.fail(f'_coerce_page_number("invalid") expected None, got {result}')
result = misc.coerce_page_number("")
if result is not None:
pytest.fail(f'_coerce_page_number("") expected None, got {result}')
result = misc.coerce_page_number([])
if result is not None:
pytest.fail(f"_coerce_page_number([]) expected None, got {result}")
result = misc.coerce_page_number({})
if result is not None:
pytest.fail(f"_coerce_page_number({{}}) expected None, got {result}")
result = misc.coerce_page_number(3.14)
if result is not None:
pytest.fail(f"_coerce_page_number(3.14) expected None, got {result}")
def test_coerce_page_number_with_whole_number_float(self) -> None:
"""Test that whole-number floats are accepted and converted to int."""
result = misc.coerce_page_number(2.0)
if result != 2:
pytest.fail(f"_coerce_page_number(2.0) expected 2, got {result}")
result = misc.coerce_page_number(0.0)
if result != 0:
pytest.fail(f"_coerce_page_number(0.0) expected 0, got {result}")
result = misc.coerce_page_number(42.0)
if result != 42:
pytest.fail(f"_coerce_page_number(42.0) expected 42, got {result}")
@pytest.mark.asyncio
async def test_fetch_published_ads_single_page_no_paging(self, bot:KleinanzeigenBot) -> None:
"""Test fetching ads from single page with no paging info."""
with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request:
mock_request.return_value = {"content": '{"ads": [{"id": 1, "title": "Ad 1"}, {"id": 2, "title": "Ad 2"}]}'}
result = await bot._fetch_published_ads()
if len(result) != 2:
pytest.fail(f"Expected 2 results, got {len(result)}")
if result[0]["id"] != 1:
pytest.fail(f"Expected result[0]['id'] == 1, got {result[0]['id']}")
if result[1]["id"] != 2:
pytest.fail(f"Expected result[1]['id'] == 2, got {result[1]['id']}")
mock_request.assert_awaited_once_with(f"{bot.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
@pytest.mark.asyncio
async def test_fetch_published_ads_single_page_with_paging(self, bot:KleinanzeigenBot) -> None:
"""Test fetching ads from single page with paging info showing 1/1."""
response_data = {"ads": [{"id": 1, "title": "Ad 1"}], "paging": {"pageNum": 1, "last": 1}}
with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request:
mock_request.return_value = {"content": json.dumps(response_data)}
result = await bot._fetch_published_ads()
if len(result) != 1:
pytest.fail(f"Expected 1 ad, got {len(result)}")
if result[0].get("id") != 1:
pytest.fail(f"Expected ad id 1, got {result[0].get('id')}")
mock_request.assert_awaited_once_with(f"{bot.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
@pytest.mark.asyncio
async def test_fetch_published_ads_multi_page(self, bot:KleinanzeigenBot) -> None:
"""Test fetching ads from multiple pages (3 pages, 2 ads each)."""
page1_data = {"ads": [{"id": 1}, {"id": 2}], "paging": {"pageNum": 1, "last": 3, "next": 2}}
page2_data = {"ads": [{"id": 3}, {"id": 4}], "paging": {"pageNum": 2, "last": 3, "next": 3}}
page3_data = {"ads": [{"id": 5}, {"id": 6}], "paging": {"pageNum": 3, "last": 3}}
with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request:
mock_request.side_effect = [
{"content": json.dumps(page1_data)},
{"content": json.dumps(page2_data)},
{"content": json.dumps(page3_data)},
]
result = await bot._fetch_published_ads()
if len(result) != 6:
pytest.fail(f"Expected 6 ads but got {len(result)}")
if [ad["id"] for ad in result] != [1, 2, 3, 4, 5, 6]:
pytest.fail(f"Expected ids [1, 2, 3, 4, 5, 6] but got {[ad['id'] for ad in result]}")
if mock_request.call_count != 3:
pytest.fail(f"Expected 3 web_request calls but got {mock_request.call_count}")
mock_request.assert_any_await(f"{bot.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
mock_request.assert_any_await(f"{bot.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=2")
mock_request.assert_any_await(f"{bot.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=3")
@pytest.mark.asyncio
async def test_fetch_published_ads_empty_list(self, bot:KleinanzeigenBot) -> None:
"""Test handling of empty ads list."""
response_data = {"ads": [], "paging": {"pageNum": 1, "last": 1}}
with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request:
mock_request.return_value = {"content": json.dumps(response_data)}
result = await bot._fetch_published_ads()
if not isinstance(result, list):
pytest.fail(f"expected result to be list, got {type(result).__name__}")
if len(result) != 0:
pytest.fail(f"expected empty list from _fetch_published_ads, got {len(result)} items")
@pytest.mark.asyncio
async def test_fetch_published_ads_invalid_json(self, bot:KleinanzeigenBot) -> None:
"""Test handling of invalid JSON response."""
with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request:
mock_request.return_value = {"content": "invalid json"}
result = await bot._fetch_published_ads()
if result != []:
pytest.fail(f"Expected empty list on invalid JSON, got {result}")
@pytest.mark.asyncio
async def test_fetch_published_ads_missing_paging_dict(self, bot:KleinanzeigenBot) -> None:
"""Test handling of missing paging dict."""
response_data = {"ads": [{"id": 1}, {"id": 2}]}
with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request:
mock_request.return_value = {"content": json.dumps(response_data)}
result = await bot._fetch_published_ads()
if len(result) != 2:
pytest.fail(f"expected 2 ads, got {len(result)}")
mock_request.assert_awaited_once()
@pytest.mark.asyncio
async def test_fetch_published_ads_non_integer_paging_values(self, bot:KleinanzeigenBot) -> None:
"""Test handling of non-integer paging values."""
response_data = {"ads": [{"id": 1}], "paging": {"pageNum": "invalid", "last": "also-invalid"}}
with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request:
mock_request.return_value = {"content": json.dumps(response_data)}
result = await bot._fetch_published_ads()
# Should return ads from first page and stop due to invalid paging
if len(result) != 1:
pytest.fail(f"Expected 1 ad, got {len(result)}")
if result[0].get("id") != 1:
pytest.fail(f"Expected ad id 1, got {result[0].get('id')}")
@pytest.mark.asyncio
async def test_fetch_published_ads_non_list_ads(self, bot:KleinanzeigenBot) -> None:
"""Test handling of non-list ads field."""
response_data = {"ads": "not a list", "paging": {"pageNum": 1, "last": 1}}
with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request:
mock_request.return_value = {"content": json.dumps(response_data)}
result = await bot._fetch_published_ads()
# Should return empty list when ads is not a list
if not isinstance(result, list):
pytest.fail(f"expected empty list when 'ads' is not a list, got: {result}")
if len(result) != 0:
pytest.fail(f"expected empty list when 'ads' is not a list, got: {result}")
@pytest.mark.asyncio
async def test_fetch_published_ads_timeout(self, bot:KleinanzeigenBot) -> None:
"""Test handling of timeout during pagination."""
with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request:
mock_request.side_effect = TimeoutError("timeout")
result = await bot._fetch_published_ads()
if result != []:
pytest.fail(f"Expected empty list on timeout, got {result}")