mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 10:31:50 +01:00
fix: JSON API Pagination for >25 Ads (#797)
## ℹ️ Description *Provide a concise summary of the changes introduced in this pull request.* - Link to the related issue(s): Closes #789 (completes the fix started in #793) - **Motivation**: Fix JSON API pagination for accounts with >25 ads. Aligns pagination logic with weidi’s approach (starts at page 1), while hardening error handling and tests. Based on https://github.com/weidi/kleinanzeigen-bot/pull/1. ## 📋 Changes Summary - Added pagination helper to fetch all published ads and use it in delete/extend/publish/update flows - Added robust handling for malformed JSON payloads and unexpected ads types (with translated warnings) - Improved sell_directly extraction with pagination, bounds checks, and shared coercion helper - Added/updated tests for pagination and edge cases; updated assertions to pytest.fail style ### ⚙️ Type of Change Select the type(s) of change(s) included in this pull request: - [x] 🐞 Bug fix (non-breaking change which fixes an issue) - [ ] ✨ New feature (adds new functionality without breaking existing usage) - [ ] 💥 Breaking change (changes that might break existing user setups, scripts, or configurations) ## ✅ Checklist Before requesting a review, confirm the following: - [x] I have reviewed my changes to ensure they meet the project's standards. - [x] I have tested my changes and ensured that all tests pass (`pdm run test:cov:unified`). - [x] I have formatted the code (`pdm run format`). - [x] I have verified that linting passes (`pdm run lint`). - [x] I have updated documentation where necessary. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Reliable multi-page fetching for published ads and buy-now eligibility checks. * **Bug Fixes** * Safer pagination with per-page JSON handling, limits and improved termination diagnostics; ensures pageNum is used when needed. * **Tests** * New comprehensive pagination tests and updates to existing tests to reflect multi-page behavior. * **Chores** * Added a utility to safely coerce page numbers; minor utility signature cleanup. <sub>✏️ Tip: You can customize this high-level summary in your review settings.</sub> <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -25,6 +25,7 @@ __all__ = [
|
||||
LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
|
||||
|
||||
_BREADCRUMB_MIN_DEPTH:Final[int] = 2
|
||||
_SELL_DIRECTLY_MAX_PAGE_LIMIT:Final[int] = 100
|
||||
BREADCRUMB_RE = re.compile(r"/c(\d+)")
|
||||
|
||||
|
||||
@@ -525,19 +526,56 @@ class AdExtractor(WebScrapingMixin):
|
||||
LOG.warning("Could not extract ad ID from URL: %s", self.page.url)
|
||||
return None
|
||||
|
||||
# Fetch the management JSON data using web_request
|
||||
response = await self.web_request("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json")
|
||||
json_data = json.loads(response["content"])
|
||||
# Fetch the management JSON data using web_request with pagination support
|
||||
page = 1
|
||||
|
||||
# Find the current ad in the ads list
|
||||
if isinstance(json_data, dict) and "ads" in json_data:
|
||||
ads_list = json_data["ads"]
|
||||
if isinstance(ads_list, list):
|
||||
# Filter ads to find the current ad by ID
|
||||
current_ad = next((ad for ad in ads_list if ad.get("id") == current_ad_id), None)
|
||||
if current_ad and "buyNowEligible" in current_ad:
|
||||
buy_now_eligible = current_ad["buyNowEligible"]
|
||||
return buy_now_eligible if isinstance(buy_now_eligible, bool) else None
|
||||
while True:
|
||||
# Safety check: don't paginate beyond reasonable limit
|
||||
if page > _SELL_DIRECTLY_MAX_PAGE_LIMIT:
|
||||
LOG.warning("Stopping pagination after %s pages to avoid infinite loop", _SELL_DIRECTLY_MAX_PAGE_LIMIT)
|
||||
break
|
||||
|
||||
response = await self.web_request(f"https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum={page}")
|
||||
|
||||
try:
|
||||
json_data = json.loads(response["content"])
|
||||
except json.JSONDecodeError as ex:
|
||||
LOG.debug("Failed to parse JSON response on page %s: %s", page, ex)
|
||||
break
|
||||
|
||||
# Find the current ad in the ads list
|
||||
if isinstance(json_data, dict) and "ads" in json_data:
|
||||
ads_list = json_data["ads"]
|
||||
if isinstance(ads_list, list):
|
||||
# Filter ads to find the current ad by ID
|
||||
current_ad = next((ad for ad in ads_list if ad.get("id") == current_ad_id), None)
|
||||
if current_ad and "buyNowEligible" in current_ad:
|
||||
buy_now_eligible = current_ad["buyNowEligible"]
|
||||
return buy_now_eligible if isinstance(buy_now_eligible, bool) else None
|
||||
|
||||
# Check if we need to fetch more pages
|
||||
paging = json_data.get("paging") if isinstance(json_data, dict) else None
|
||||
if not isinstance(paging, dict):
|
||||
break
|
||||
|
||||
# Parse pagination info using real API fields
|
||||
current_page_num = misc.coerce_page_number(paging.get("pageNum"))
|
||||
total_pages = misc.coerce_page_number(paging.get("last"))
|
||||
|
||||
if current_page_num is None:
|
||||
LOG.warning("Invalid 'pageNum' in paging info: %s, stopping pagination", paging.get("pageNum"))
|
||||
break
|
||||
|
||||
# Stop if we've reached the last page
|
||||
if total_pages is None or current_page_num >= total_pages:
|
||||
break
|
||||
|
||||
# Use API's next field for navigation (more robust than our counter)
|
||||
next_page = misc.coerce_page_number(paging.get("next"))
|
||||
if next_page is None:
|
||||
LOG.warning("Invalid 'next' page value in paging info: %s, stopping pagination", paging.get("next"))
|
||||
break
|
||||
page = next_page
|
||||
|
||||
# If the key doesn't exist or ad not found, return None (unknown)
|
||||
return None
|
||||
|
||||
Reference in New Issue
Block a user