mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-16 12:21:50 +01:00
fix: prevent duplicate listings during publish retry loop (#875)
This commit is contained in:
@@ -1572,6 +1572,14 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
|
|||||||
success = False
|
success = False
|
||||||
|
|
||||||
# Retry loop only for publish_ad (before submission completes)
|
# Retry loop only for publish_ad (before submission completes)
|
||||||
|
# Fetch a fresh baseline right before the retry loop to avoid stale state
|
||||||
|
# from earlier successful publishes in multi-ad runs (see #874)
|
||||||
|
try:
|
||||||
|
pre_publish_ads = await self._fetch_published_ads()
|
||||||
|
ads_before_publish:set[str] = {str(x["id"]) for x in pre_publish_ads if x.get("id")}
|
||||||
|
except Exception as ex: # noqa: BLE001
|
||||||
|
LOG.warning("Could not fetch fresh published-ads baseline for '%s': %s. Falling back to initial snapshot.", ad_cfg.title, ex)
|
||||||
|
ads_before_publish = {str(x["id"]) for x in published_ads if x.get("id")}
|
||||||
for attempt in range(1, max_retries + 1):
|
for attempt in range(1, max_retries + 1):
|
||||||
try:
|
try:
|
||||||
await self.publish_ad(ad_file, ad_cfg, ad_cfg_orig, published_ads, AdUpdateStrategy.REPLACE)
|
await self.publish_ad(ad_file, ad_cfg, ad_cfg_orig, published_ads, AdUpdateStrategy.REPLACE)
|
||||||
@@ -1582,6 +1590,28 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
|
|||||||
except (TimeoutError, ProtocolException) as ex:
|
except (TimeoutError, ProtocolException) as ex:
|
||||||
await self._capture_publish_error_diagnostics_if_enabled(ad_cfg, ad_cfg_orig, ad_file, attempt, ex)
|
await self._capture_publish_error_diagnostics_if_enabled(ad_cfg, ad_cfg_orig, ad_file, attempt, ex)
|
||||||
if attempt < max_retries:
|
if attempt < max_retries:
|
||||||
|
# Before retrying, check if the ad was already created despite the error.
|
||||||
|
# A partially successful submission followed by a retry would create a duplicate listing,
|
||||||
|
# which violates kleinanzeigen.de terms of service and can lead to account suspension.
|
||||||
|
try:
|
||||||
|
current_ads = await self._fetch_published_ads()
|
||||||
|
current_ad_ids = {str(x["id"]) for x in current_ads if x.get("id")}
|
||||||
|
new_ad_ids = current_ad_ids - ads_before_publish
|
||||||
|
if new_ad_ids:
|
||||||
|
LOG.warning(
|
||||||
|
"Attempt %s/%s failed for '%s': %s. "
|
||||||
|
"However, a new ad was detected (id: %s) -- aborting retries to prevent duplicates.",
|
||||||
|
attempt, max_retries, ad_cfg.title, ex, ", ".join(new_ad_ids)
|
||||||
|
)
|
||||||
|
failed_count += 1
|
||||||
|
break
|
||||||
|
except Exception as verify_ex: # noqa: BLE001
|
||||||
|
LOG.warning(
|
||||||
|
"Could not verify published ads after failed attempt for '%s': %s -- aborting retries to prevent duplicates.",
|
||||||
|
ad_cfg.title, verify_ex,
|
||||||
|
)
|
||||||
|
failed_count += 1
|
||||||
|
break
|
||||||
LOG.warning("Attempt %s/%s failed for '%s': %s. Retrying...", attempt, max_retries, ad_cfg.title, ex)
|
LOG.warning("Attempt %s/%s failed for '%s': %s. Retrying...", attempt, max_retries, ad_cfg.title, ex)
|
||||||
await self.web_sleep(2) # Wait before retry
|
await self.web_sleep(2) # Wait before retry
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -154,6 +154,9 @@ kleinanzeigen_bot/__init__.py:
|
|||||||
"Skipping because ad is reserved": "Überspringen, da Anzeige reserviert ist"
|
"Skipping because ad is reserved": "Überspringen, da Anzeige reserviert ist"
|
||||||
" -> Could not confirm publishing for '%s', but ad may be online": " -> Veröffentlichung für '%s' konnte nicht bestätigt werden, aber Anzeige ist möglicherweise online"
|
" -> Could not confirm publishing for '%s', but ad may be online": " -> Veröffentlichung für '%s' konnte nicht bestätigt werden, aber Anzeige ist möglicherweise online"
|
||||||
"Attempt %s/%s failed for '%s': %s. Retrying...": "Versuch %s/%s fehlgeschlagen für '%s': %s. Erneuter Versuch..."
|
"Attempt %s/%s failed for '%s': %s. Retrying...": "Versuch %s/%s fehlgeschlagen für '%s': %s. Erneuter Versuch..."
|
||||||
|
"Attempt %s/%s failed for '%s': %s. However, a new ad was detected (id: %s) -- aborting retries to prevent duplicates.": "Versuch %s/%s fehlgeschlagen für '%s': %s. Jedoch wurde eine neue Anzeige erkannt (ID: %s) -- Wiederholungen werden abgebrochen, um Duplikate zu vermeiden."
|
||||||
|
"Could not fetch fresh published-ads baseline for '%s': %s. Falling back to initial snapshot.": "Konnte keine aktuelle Anzeigen-Baseline für '%s' abrufen: %s. Verwende initialen Snapshot."
|
||||||
|
"Could not verify published ads after failed attempt for '%s': %s -- aborting retries to prevent duplicates.": "Veröffentlichte Anzeigen konnten nach fehlgeschlagenem Versuch für '%s' nicht geprüft werden: %s -- Wiederholungen werden abgebrochen, um Duplikate zu vermeiden."
|
||||||
"All %s attempts failed for '%s': %s. Skipping ad.": "Alle %s Versuche fehlgeschlagen für '%s': %s. Überspringe Anzeige."
|
"All %s attempts failed for '%s': %s. Skipping ad.": "Alle %s Versuche fehlgeschlagen für '%s': %s. Überspringe Anzeige."
|
||||||
"DONE: (Re-)published %s (%s failed after retries)": "FERTIG: %s (erneut) veröffentlicht (%s fehlgeschlagen nach Wiederholungen)"
|
"DONE: (Re-)published %s (%s failed after retries)": "FERTIG: %s (erneut) veröffentlicht (%s fehlgeschlagen nach Wiederholungen)"
|
||||||
"DONE: (Re-)published %s": "FERTIG: %s (erneut) veröffentlicht"
|
"DONE: (Re-)published %s": "FERTIG: %s (erneut) veröffentlicht"
|
||||||
|
|||||||
@@ -1007,12 +1007,46 @@ class TestKleinanzeigenBotBasics:
|
|||||||
):
|
):
|
||||||
await test_bot.publish_ads(ad_cfgs)
|
await test_bot.publish_ads(ad_cfgs)
|
||||||
|
|
||||||
# With pagination, the URL now includes pageNum parameter
|
# web_request is called twice: once for initial fetch, once for pre-retry-loop baseline
|
||||||
web_request_mock.assert_awaited_once_with(f"{test_bot.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
expected_url = f"{test_bot.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1"
|
||||||
|
assert web_request_mock.await_count == 2
|
||||||
|
web_request_mock.assert_any_await(expected_url)
|
||||||
publish_ad_mock.assert_awaited_once_with("ad.yaml", ad_cfgs[0][1], {}, [], AdUpdateStrategy.REPLACE)
|
publish_ad_mock.assert_awaited_once_with("ad.yaml", ad_cfgs[0][1], {}, [], AdUpdateStrategy.REPLACE)
|
||||||
web_await_mock.assert_awaited_once()
|
web_await_mock.assert_awaited_once()
|
||||||
delete_ad_mock.assert_awaited_once_with(ad_cfgs[0][1], [], delete_old_ads_by_title = False)
|
delete_ad_mock.assert_awaited_once_with(ad_cfgs[0][1], [], delete_old_ads_by_title = False)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_publish_ads_aborts_retry_on_duplicate_detection(
|
||||||
|
self,
|
||||||
|
test_bot:KleinanzeigenBot,
|
||||||
|
base_ad_config:dict[str, Any],
|
||||||
|
mock_page:MagicMock,
|
||||||
|
) -> None:
|
||||||
|
"""Ensure retries are aborted when a new ad is detected after a failed attempt to prevent duplicates."""
|
||||||
|
test_bot.page = mock_page
|
||||||
|
|
||||||
|
ad_cfg = Ad.model_validate(base_ad_config)
|
||||||
|
ad_cfg_orig = copy.deepcopy(base_ad_config)
|
||||||
|
ad_file = "ad.yaml"
|
||||||
|
|
||||||
|
# 1st _fetch_published_ads call (initial, before loop): no ads
|
||||||
|
# 2nd call (fresh baseline, before retry loop): no ads
|
||||||
|
# 3rd call (after first failed attempt): a new ad appeared — duplicate detected
|
||||||
|
fetch_responses = [
|
||||||
|
{"content": json.dumps({"ads": []})}, # initial fetch
|
||||||
|
{"content": json.dumps({"ads": []})}, # fresh baseline
|
||||||
|
{"content": json.dumps({"ads": [{"id": "99999", "state": "active"}]})}, # duplicate detected
|
||||||
|
]
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch.object(test_bot, "web_request", new_callable = AsyncMock, side_effect = fetch_responses),
|
||||||
|
patch.object(test_bot, "publish_ad", new_callable = AsyncMock, side_effect = TimeoutError("image upload timeout")) as publish_mock,
|
||||||
|
):
|
||||||
|
await test_bot.publish_ads([(ad_file, ad_cfg, ad_cfg_orig)])
|
||||||
|
|
||||||
|
# publish_ad should have been called only once — retry was aborted due to duplicate detection
|
||||||
|
assert publish_mock.await_count == 1
|
||||||
|
|
||||||
def test_get_root_url(self, test_bot:KleinanzeigenBot) -> None:
|
def test_get_root_url(self, test_bot:KleinanzeigenBot) -> None:
|
||||||
"""Test root URL retrieval."""
|
"""Test root URL retrieval."""
|
||||||
assert test_bot.root_url == "https://www.kleinanzeigen.de"
|
assert test_bot.root_url == "https://www.kleinanzeigen.de"
|
||||||
|
|||||||
Reference in New Issue
Block a user