fix: prevent duplicate listings during publish retry loop (#875)

This commit is contained in:
Torsten Liermann
2026-03-14 08:37:30 +01:00
committed by GitHub
parent 67a4db0db6
commit 868f81239a
3 changed files with 69 additions and 2 deletions

View File

@@ -1572,6 +1572,14 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
success = False success = False
# Retry loop only for publish_ad (before submission completes) # Retry loop only for publish_ad (before submission completes)
# Fetch a fresh baseline right before the retry loop to avoid stale state
# from earlier successful publishes in multi-ad runs (see #874)
try:
pre_publish_ads = await self._fetch_published_ads()
ads_before_publish:set[str] = {str(x["id"]) for x in pre_publish_ads if x.get("id")}
except Exception as ex: # noqa: BLE001
LOG.warning("Could not fetch fresh published-ads baseline for '%s': %s. Falling back to initial snapshot.", ad_cfg.title, ex)
ads_before_publish = {str(x["id"]) for x in published_ads if x.get("id")}
for attempt in range(1, max_retries + 1): for attempt in range(1, max_retries + 1):
try: try:
await self.publish_ad(ad_file, ad_cfg, ad_cfg_orig, published_ads, AdUpdateStrategy.REPLACE) await self.publish_ad(ad_file, ad_cfg, ad_cfg_orig, published_ads, AdUpdateStrategy.REPLACE)
@@ -1582,6 +1590,28 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
except (TimeoutError, ProtocolException) as ex: except (TimeoutError, ProtocolException) as ex:
await self._capture_publish_error_diagnostics_if_enabled(ad_cfg, ad_cfg_orig, ad_file, attempt, ex) await self._capture_publish_error_diagnostics_if_enabled(ad_cfg, ad_cfg_orig, ad_file, attempt, ex)
if attempt < max_retries: if attempt < max_retries:
# Before retrying, check if the ad was already created despite the error.
# A partially successful submission followed by a retry would create a duplicate listing,
# which violates kleinanzeigen.de terms of service and can lead to account suspension.
try:
current_ads = await self._fetch_published_ads()
current_ad_ids = {str(x["id"]) for x in current_ads if x.get("id")}
new_ad_ids = current_ad_ids - ads_before_publish
if new_ad_ids:
LOG.warning(
"Attempt %s/%s failed for '%s': %s. "
"However, a new ad was detected (id: %s) -- aborting retries to prevent duplicates.",
attempt, max_retries, ad_cfg.title, ex, ", ".join(new_ad_ids)
)
failed_count += 1
break
except Exception as verify_ex: # noqa: BLE001
LOG.warning(
"Could not verify published ads after failed attempt for '%s': %s -- aborting retries to prevent duplicates.",
ad_cfg.title, verify_ex,
)
failed_count += 1
break
LOG.warning("Attempt %s/%s failed for '%s': %s. Retrying...", attempt, max_retries, ad_cfg.title, ex) LOG.warning("Attempt %s/%s failed for '%s': %s. Retrying...", attempt, max_retries, ad_cfg.title, ex)
await self.web_sleep(2) # Wait before retry await self.web_sleep(2) # Wait before retry
else: else:

View File

@@ -154,6 +154,9 @@ kleinanzeigen_bot/__init__.py:
"Skipping because ad is reserved": "Überspringen, da Anzeige reserviert ist" "Skipping because ad is reserved": "Überspringen, da Anzeige reserviert ist"
" -> Could not confirm publishing for '%s', but ad may be online": " -> Veröffentlichung für '%s' konnte nicht bestätigt werden, aber Anzeige ist möglicherweise online" " -> Could not confirm publishing for '%s', but ad may be online": " -> Veröffentlichung für '%s' konnte nicht bestätigt werden, aber Anzeige ist möglicherweise online"
"Attempt %s/%s failed for '%s': %s. Retrying...": "Versuch %s/%s fehlgeschlagen für '%s': %s. Erneuter Versuch..." "Attempt %s/%s failed for '%s': %s. Retrying...": "Versuch %s/%s fehlgeschlagen für '%s': %s. Erneuter Versuch..."
"Attempt %s/%s failed for '%s': %s. However, a new ad was detected (id: %s) -- aborting retries to prevent duplicates.": "Versuch %s/%s fehlgeschlagen für '%s': %s. Jedoch wurde eine neue Anzeige erkannt (ID: %s) -- Wiederholungen werden abgebrochen, um Duplikate zu vermeiden."
"Could not fetch fresh published-ads baseline for '%s': %s. Falling back to initial snapshot.": "Konnte keine aktuelle Anzeigen-Baseline für '%s' abrufen: %s. Verwende initialen Snapshot."
"Could not verify published ads after failed attempt for '%s': %s -- aborting retries to prevent duplicates.": "Veröffentlichte Anzeigen konnten nach fehlgeschlagenem Versuch für '%s' nicht geprüft werden: %s -- Wiederholungen werden abgebrochen, um Duplikate zu vermeiden."
"All %s attempts failed for '%s': %s. Skipping ad.": "Alle %s Versuche fehlgeschlagen für '%s': %s. Überspringe Anzeige." "All %s attempts failed for '%s': %s. Skipping ad.": "Alle %s Versuche fehlgeschlagen für '%s': %s. Überspringe Anzeige."
"DONE: (Re-)published %s (%s failed after retries)": "FERTIG: %s (erneut) veröffentlicht (%s fehlgeschlagen nach Wiederholungen)" "DONE: (Re-)published %s (%s failed after retries)": "FERTIG: %s (erneut) veröffentlicht (%s fehlgeschlagen nach Wiederholungen)"
"DONE: (Re-)published %s": "FERTIG: %s (erneut) veröffentlicht" "DONE: (Re-)published %s": "FERTIG: %s (erneut) veröffentlicht"

View File

@@ -1007,12 +1007,46 @@ class TestKleinanzeigenBotBasics:
): ):
await test_bot.publish_ads(ad_cfgs) await test_bot.publish_ads(ad_cfgs)
# With pagination, the URL now includes pageNum parameter # web_request is called twice: once for initial fetch, once for pre-retry-loop baseline
web_request_mock.assert_awaited_once_with(f"{test_bot.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1") expected_url = f"{test_bot.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1"
assert web_request_mock.await_count == 2
web_request_mock.assert_any_await(expected_url)
publish_ad_mock.assert_awaited_once_with("ad.yaml", ad_cfgs[0][1], {}, [], AdUpdateStrategy.REPLACE) publish_ad_mock.assert_awaited_once_with("ad.yaml", ad_cfgs[0][1], {}, [], AdUpdateStrategy.REPLACE)
web_await_mock.assert_awaited_once() web_await_mock.assert_awaited_once()
delete_ad_mock.assert_awaited_once_with(ad_cfgs[0][1], [], delete_old_ads_by_title = False) delete_ad_mock.assert_awaited_once_with(ad_cfgs[0][1], [], delete_old_ads_by_title = False)
@pytest.mark.asyncio
async def test_publish_ads_aborts_retry_on_duplicate_detection(
self,
test_bot:KleinanzeigenBot,
base_ad_config:dict[str, Any],
mock_page:MagicMock,
) -> None:
"""Ensure retries are aborted when a new ad is detected after a failed attempt to prevent duplicates."""
test_bot.page = mock_page
ad_cfg = Ad.model_validate(base_ad_config)
ad_cfg_orig = copy.deepcopy(base_ad_config)
ad_file = "ad.yaml"
# 1st _fetch_published_ads call (initial, before loop): no ads
# 2nd call (fresh baseline, before retry loop): no ads
# 3rd call (after first failed attempt): a new ad appeared — duplicate detected
fetch_responses = [
{"content": json.dumps({"ads": []})}, # initial fetch
{"content": json.dumps({"ads": []})}, # fresh baseline
{"content": json.dumps({"ads": [{"id": "99999", "state": "active"}]})}, # duplicate detected
]
with (
patch.object(test_bot, "web_request", new_callable = AsyncMock, side_effect = fetch_responses),
patch.object(test_bot, "publish_ad", new_callable = AsyncMock, side_effect = TimeoutError("image upload timeout")) as publish_mock,
):
await test_bot.publish_ads([(ad_file, ad_cfg, ad_cfg_orig)])
# publish_ad should have been called only once — retry was aborted due to duplicate detection
assert publish_mock.await_count == 1
def test_get_root_url(self, test_bot:KleinanzeigenBot) -> None: def test_get_root_url(self, test_bot:KleinanzeigenBot) -> None:
"""Test root URL retrieval.""" """Test root URL retrieval."""
assert test_bot.root_url == "https://www.kleinanzeigen.de" assert test_bot.root_url == "https://www.kleinanzeigen.de"