fix: fail closed on uncertain post-submit retries

This commit is contained in:
Jens
2026-03-15 19:47:45 +01:00
parent 6e562164b8
commit 1abe233de5
5 changed files with 157 additions and 200 deletions

View File

@@ -20,7 +20,7 @@ from .model.ad_model import MAX_DESCRIPTION_LENGTH, Ad, AdPartial, Contact, calc
from .model.config_model import Config from .model.config_model import Config
from .update_checker import UpdateChecker from .update_checker import UpdateChecker
from .utils import diagnostics, dicts, error_handlers, loggers, misc, xdg_paths from .utils import diagnostics, dicts, error_handlers, loggers, misc, xdg_paths
from .utils.exceptions import CaptchaEncountered from .utils.exceptions import CaptchaEncountered, PublishSubmissionUncertainError
from .utils.files import abspath from .utils.files import abspath
from .utils.i18n import Locale, get_current_locale, pluralize, set_current_locale from .utils.i18n import Locale, get_current_locale, pluralize, set_current_locale
from .utils.misc import ainput, ensure, is_frozen from .utils.misc import ainput, ensure, is_frozen
@@ -1438,7 +1438,7 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
return False return False
async def _fetch_published_ads(self, *, strict:bool = False) -> list[dict[str, Any]]: async def _fetch_published_ads(self) -> list[dict[str, Any]]:
"""Fetch all published ads, handling API pagination. """Fetch all published ads, handling API pagination.
Returns: Returns:
@@ -1458,14 +1458,10 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
try: try:
response = await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum={page}") response = await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum={page}")
except TimeoutError as ex: except TimeoutError as ex:
if strict:
raise
LOG.warning("Pagination request failed on page %s: %s", page, ex) LOG.warning("Pagination request failed on page %s: %s", page, ex)
break break
if not isinstance(response, dict): if not isinstance(response, dict):
if strict:
raise TypeError(f"Unexpected pagination response type on page {page}: {type(response).__name__}")
LOG.warning("Unexpected pagination response type on page %s: %s", page, type(response).__name__) LOG.warning("Unexpected pagination response type on page %s: %s", page, type(response).__name__)
break break
@@ -1475,8 +1471,6 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
if isinstance(content, bytes): if isinstance(content, bytes):
content = content.decode("utf-8", errors = "replace") content = content.decode("utf-8", errors = "replace")
if not isinstance(content, str): if not isinstance(content, str):
if strict:
raise TypeError(f"Unexpected response content type on page {page}: {type(content).__name__}")
LOG.warning("Unexpected response content type on page %s: %s", page, type(content).__name__) LOG.warning("Unexpected response content type on page %s: %s", page, type(content).__name__)
break break
@@ -1484,27 +1478,19 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
json_data = json.loads(content) json_data = json.loads(content)
except (json.JSONDecodeError, TypeError) as ex: except (json.JSONDecodeError, TypeError) as ex:
if not content: if not content:
if strict:
raise ValueError(f"Empty JSON response content on page {page}") from ex
LOG.warning("Empty JSON response content on page %s", page) LOG.warning("Empty JSON response content on page %s", page)
break break
if strict:
raise ValueError(f"Failed to parse JSON response on page {page}: {ex}") from ex
snippet = content[:SNIPPET_LIMIT] + ("..." if len(content) > SNIPPET_LIMIT else "") snippet = content[:SNIPPET_LIMIT] + ("..." if len(content) > SNIPPET_LIMIT else "")
LOG.warning("Failed to parse JSON response on page %s: %s (content: %s)", page, ex, snippet) LOG.warning("Failed to parse JSON response on page %s: %s (content: %s)", page, ex, snippet)
break break
if not isinstance(json_data, dict): if not isinstance(json_data, dict):
if strict:
raise TypeError(f"Unexpected JSON payload type on page {page}: {type(json_data).__name__}")
snippet = content[:SNIPPET_LIMIT] + ("..." if len(content) > SNIPPET_LIMIT else "") snippet = content[:SNIPPET_LIMIT] + ("..." if len(content) > SNIPPET_LIMIT else "")
LOG.warning("Unexpected JSON payload on page %s (content: %s)", page, snippet) LOG.warning("Unexpected JSON payload on page %s (content: %s)", page, snippet)
break break
page_ads = json_data.get("ads", []) page_ads = json_data.get("ads", [])
if not isinstance(page_ads, list): if not isinstance(page_ads, list):
if strict:
raise TypeError(f"Unexpected 'ads' type on page {page}: {type(page_ads).__name__}")
preview = str(page_ads) preview = str(page_ads)
if len(preview) > SNIPPET_LIMIT: if len(preview) > SNIPPET_LIMIT:
preview = preview[:SNIPPET_LIMIT] + "..." preview = preview[:SNIPPET_LIMIT] + "..."
@@ -1519,8 +1505,6 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
filtered_page_ads.append(entry) filtered_page_ads.append(entry)
continue continue
rejected_count += 1 rejected_count += 1
if strict:
raise TypeError(f"Unexpected ad entry type on page {page}: {type(entry).__name__}")
if rejected_preview is None: if rejected_preview is None:
rejected_preview = repr(entry) rejected_preview = repr(entry)
@@ -1534,8 +1518,6 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
paging = json_data.get("paging") paging = json_data.get("paging")
if not isinstance(paging, dict): if not isinstance(paging, dict):
if strict:
raise ValueError(f"Missing or invalid paging info on page {page}: {type(paging).__name__}")
LOG.debug("No paging dict found on page %s, assuming single page", page) LOG.debug("No paging dict found on page %s, assuming single page", page)
break break
@@ -1544,14 +1526,10 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
total_pages = misc.coerce_page_number(paging.get("last")) total_pages = misc.coerce_page_number(paging.get("last"))
if current_page_num is None: if current_page_num is None:
if strict:
raise ValueError(f"Invalid 'pageNum' in paging info: {paging.get('pageNum')}")
LOG.warning("Invalid 'pageNum' in paging info: %s, stopping pagination", paging.get("pageNum")) LOG.warning("Invalid 'pageNum' in paging info: %s, stopping pagination", paging.get("pageNum"))
break break
if total_pages is None: if total_pages is None:
if strict:
raise ValueError("No pagination info found")
LOG.debug("No pagination info found, assuming single page") LOG.debug("No pagination info found, assuming single page")
break break
@@ -1570,8 +1548,6 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
# Use API's next field for navigation (more robust than our counter) # Use API's next field for navigation (more robust than our counter)
next_page = misc.coerce_page_number(paging.get("next")) next_page = misc.coerce_page_number(paging.get("next"))
if next_page is None: if next_page is None:
if strict:
raise ValueError(f"Invalid 'next' page value in paging info: {paging.get('next')}")
LOG.warning("Invalid 'next' page value in paging info: %s, stopping pagination", paging.get("next")) LOG.warning("Invalid 'next' page value in paging info: %s, stopping pagination", paging.get("next"))
break break
page = next_page page = next_page
@@ -1739,28 +1715,6 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
# Check for success messages # Check for success messages
return await self.web_check(By.ID, "checking-done", Is.DISPLAYED) or await self.web_check(By.ID, "not-completed", Is.DISPLAYED) return await self.web_check(By.ID, "checking-done", Is.DISPLAYED) or await self.web_check(By.ID, "not-completed", Is.DISPLAYED)
async def _detect_new_published_ad_ids(self, ads_before_publish:set[str], ad_title:str) -> set[str] | None:
try:
current_ads = await self._fetch_published_ads(strict = True)
current_ad_ids:set[str] = set()
for current_ad in current_ads:
if not isinstance(current_ad, dict):
# Keep duplicate-prevention verification fail-closed: malformed entries
# must abort retries rather than risk creating duplicate listings.
entry_length = len(current_ad) if hasattr(current_ad, "__len__") else None
LOG.debug("Malformed ad entry in strict duplicate verification: type=%s length=%s", type(current_ad).__name__, entry_length)
raise TypeError(f"Unexpected ad entry type: {type(current_ad).__name__}")
if current_ad.get("id"):
current_ad_ids.add(str(current_ad["id"]))
except Exception as ex: # noqa: BLE001
LOG.warning(
"Could not verify published ads after failed attempt for '%s': %s -- aborting retries to prevent duplicates.",
ad_title,
ex,
)
return None
return current_ad_ids - ads_before_publish
async def publish_ads(self, ad_cfgs:list[tuple[str, Ad, dict[str, Any]]]) -> None: async def publish_ads(self, ad_cfgs:list[tuple[str, Ad, dict[str, Any]]]) -> None:
count = 0 count = 0
failed_count = 0 failed_count = 0
@@ -1778,15 +1732,6 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
count += 1 count += 1
success = False success = False
# Retry loop only for publish_ad (before submission completes)
# Fetch a fresh baseline right before the retry loop to avoid stale state
# from earlier successful publishes in multi-ad runs (see #874)
try:
pre_publish_ads = await self._fetch_published_ads()
ads_before_publish:set[str] = {str(x["id"]) for x in pre_publish_ads if x.get("id")}
except Exception as ex: # noqa: BLE001
LOG.warning("Could not fetch fresh published-ads baseline for '%s': %s. Falling back to initial snapshot.", ad_cfg.title, ex)
ads_before_publish = {str(x["id"]) for x in published_ads if x.get("id")}
for attempt in range(1, max_retries + 1): for attempt in range(1, max_retries + 1):
try: try:
await self.publish_ad(ad_file, ad_cfg, ad_cfg_orig, published_ads, AdUpdateStrategy.REPLACE) await self.publish_ad(ad_file, ad_cfg, ad_cfg_orig, published_ads, AdUpdateStrategy.REPLACE)
@@ -1794,6 +1739,22 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
break # Publish succeeded, exit retry loop break # Publish succeeded, exit retry loop
except asyncio.CancelledError: except asyncio.CancelledError:
raise # Respect task cancellation raise # Respect task cancellation
except PublishSubmissionUncertainError as ex:
await self._capture_publish_error_diagnostics_if_enabled(ad_cfg, ad_cfg_orig, ad_file, attempt, ex)
LOG.warning(
"Attempt %s/%s for '%s' reached submit boundary but failed: %s. Not retrying to prevent duplicate listings.",
attempt,
max_retries,
ad_cfg.title,
ex,
)
LOG.warning("Manual recovery required for '%s'. Check 'Meine Anzeigen' to confirm whether the ad was posted.", ad_cfg.title)
LOG.warning(
"If posted, sync local state with 'kleinanzeigen-bot download --ads=new' or 'kleinanzeigen-bot download --ads=<id>'; "
"otherwise rerun publish for this ad."
)
failed_count += 1
break
except (TimeoutError, ProtocolException) as ex: except (TimeoutError, ProtocolException) as ex:
await self._capture_publish_error_diagnostics_if_enabled(ad_cfg, ad_cfg_orig, ad_file, attempt, ex) await self._capture_publish_error_diagnostics_if_enabled(ad_cfg, ad_cfg_orig, ad_file, attempt, ex)
if attempt >= max_retries: if attempt >= max_retries:
@@ -1801,26 +1762,6 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
failed_count += 1 failed_count += 1
continue continue
# Before retrying, check if the ad was already created despite the error.
# A partially successful submission followed by a retry would create a duplicate listing,
# which violates kleinanzeigen.de terms of service and can lead to account suspension.
new_ad_ids = await self._detect_new_published_ad_ids(ads_before_publish, ad_cfg.title)
if new_ad_ids is None:
failed_count += 1
break
if new_ad_ids:
LOG.warning(
"Attempt %s/%s failed for '%s': %s. "
"However, a new ad was detected (id: %s) -- aborting retries to prevent duplicates.",
attempt,
max_retries,
ad_cfg.title,
ex,
", ".join(new_ad_ids),
)
failed_count += 1
break
LOG.warning("Attempt %s/%s failed for '%s': %s. Retrying...", attempt, max_retries, ad_cfg.title, ex) LOG.warning("Attempt %s/%s failed for '%s': %s. Retrying...", attempt, max_retries, ad_cfg.title, ex)
await self.web_sleep(2_000) # Wait before retry await self.web_sleep(2_000) # Wait before retry
@@ -1972,6 +1913,9 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
############################# #############################
# submit # submit
############################# #############################
submission_attempted = False
try:
submission_attempted = True
try: try:
await self.web_click(By.ID, "pstad-submit") await self.web_click(By.ID, "pstad-submit")
except TimeoutError: except TimeoutError:
@@ -2006,6 +1950,10 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
confirmation_timeout = self._timeout("publishing_confirmation") confirmation_timeout = self._timeout("publishing_confirmation")
await self.web_await(lambda: "p-anzeige-aufgeben-bestaetigung.html?adId=" in self.page.url, timeout = confirmation_timeout) await self.web_await(lambda: "p-anzeige-aufgeben-bestaetigung.html?adId=" in self.page.url, timeout = confirmation_timeout)
except (TimeoutError, ProtocolException) as ex:
if submission_attempted:
raise PublishSubmissionUncertainError("submission may have succeeded before failure") from ex
raise
# extract the ad id from the URL's query parameter # extract the ad id from the URL's query parameter
current_url_query_params = urllib_parse.parse_qs(urllib_parse.urlparse(self.page.url).query) current_url_query_params = urllib_parse.parse_qs(urllib_parse.urlparse(self.page.url).query)

View File

@@ -177,16 +177,15 @@ kleinanzeigen_bot/__init__.py:
"Skipping because ad is reserved": "Überspringen, da Anzeige reserviert ist" "Skipping because ad is reserved": "Überspringen, da Anzeige reserviert ist"
" -> Could not confirm publishing for '%s', but ad may be online": " -> Veröffentlichung für '%s' konnte nicht bestätigt werden, aber Anzeige ist möglicherweise online" " -> Could not confirm publishing for '%s', but ad may be online": " -> Veröffentlichung für '%s' konnte nicht bestätigt werden, aber Anzeige ist möglicherweise online"
"Attempt %s/%s failed for '%s': %s. Retrying...": "Versuch %s/%s fehlgeschlagen für '%s': %s. Erneuter Versuch..." "Attempt %s/%s failed for '%s': %s. Retrying...": "Versuch %s/%s fehlgeschlagen für '%s': %s. Erneuter Versuch..."
"Attempt %s/%s failed for '%s': %s. However, a new ad was detected (id: %s) -- aborting retries to prevent duplicates.": "Versuch %s/%s fehlgeschlagen für '%s': %s. Jedoch wurde eine neue Anzeige erkannt (ID: %s) -- Wiederholungen werden abgebrochen, um Duplikate zu vermeiden." "Attempt %s/%s for '%s' reached submit boundary but failed: %s. Not retrying to prevent duplicate listings.": "Versuch %s/%s für '%s' hat die Submit-Grenze erreicht, ist aber fehlgeschlagen: %s. Kein erneuter Versuch, um doppelte Anzeigen zu vermeiden."
"Could not fetch fresh published-ads baseline for '%s': %s. Falling back to initial snapshot.": "Konnte keine aktuelle Anzeigen-Baseline für '%s' abrufen: %s. Verwende initialen Snapshot." "Manual recovery required for '%s'. Check 'Meine Anzeigen' to confirm whether the ad was posted.": "Manuelle Wiederherstellung für '%s' erforderlich. Prüfen Sie in 'Meine Anzeigen', ob die Anzeige veröffentlicht wurde."
? "If posted, sync local state with 'kleinanzeigen-bot download --ads=new' or 'kleinanzeigen-bot download --ads=<id>'; otherwise rerun publish for this ad."
: "Falls veröffentlicht, lokalen Stand mit 'kleinanzeigen-bot download --ads=new' oder 'kleinanzeigen-bot download --ads=<id>' synchronisieren; andernfalls Veröffentlichung für diese Anzeige erneut starten."
"All %s attempts failed for '%s': %s. Skipping ad.": "Alle %s Versuche fehlgeschlagen für '%s': %s. Überspringe Anzeige." "All %s attempts failed for '%s': %s. Skipping ad.": "Alle %s Versuche fehlgeschlagen für '%s': %s. Überspringe Anzeige."
"DONE: (Re-)published %s (%s failed after retries)": "FERTIG: %s (erneut) veröffentlicht (%s fehlgeschlagen nach Wiederholungen)" "DONE: (Re-)published %s (%s failed after retries)": "FERTIG: %s (erneut) veröffentlicht (%s fehlgeschlagen nach Wiederholungen)"
"DONE: (Re-)published %s": "FERTIG: %s (erneut) veröffentlicht" "DONE: (Re-)published %s": "FERTIG: %s (erneut) veröffentlicht"
"ad": "Anzeige" "ad": "Anzeige"
_detect_new_published_ad_ids:
"Could not verify published ads after failed attempt for '%s': %s -- aborting retries to prevent duplicates.": "Veröffentlichte Anzeigen konnten nach fehlgeschlagenem Versuch für '%s' nicht geprüft werden: %s -- Wiederholungen werden abgebrochen, um Duplikate zu vermeiden."
apply_auto_price_reduction: apply_auto_price_reduction:
"Auto price reduction is enabled for [%s] but no price is configured.": "Automatische Preisreduzierung ist für [%s] aktiviert, aber es wurde kein Preis konfiguriert." "Auto price reduction is enabled for [%s] but no price is configured.": "Automatische Preisreduzierung ist für [%s] aktiviert, aber es wurde kein Preis konfiguriert."
"Auto price reduction is enabled for [%s] but min_price equals price (%s) - no reductions will occur.": "Automatische Preisreduzierung ist für [%s] aktiviert, aber min_price entspricht dem Preis (%s) - es werden keine Reduktionen auftreten." "Auto price reduction is enabled for [%s] but min_price equals price (%s) - no reductions will occur.": "Automatische Preisreduzierung ist für [%s] aktiviert, aber min_price entspricht dem Preis (%s) - es werden keine Reduktionen auftreten."

View File

@@ -14,3 +14,10 @@ class CaptchaEncountered(KleinanzeigenBotError):
def __init__(self, restart_delay:timedelta) -> None: def __init__(self, restart_delay:timedelta) -> None:
super().__init__() super().__init__()
self.restart_delay = restart_delay self.restart_delay = restart_delay
class PublishSubmissionUncertainError(KleinanzeigenBotError):
"""Raised when publish submission may have reached the server state boundary."""
def __init__(self, reason:str) -> None:
super().__init__(reason)

View File

@@ -10,6 +10,7 @@ from typing import Any, cast
from unittest.mock import AsyncMock, MagicMock, patch from unittest.mock import AsyncMock, MagicMock, patch
import pytest import pytest
from nodriver.core.connection import ProtocolException
from pydantic import ValidationError from pydantic import ValidationError
from kleinanzeigen_bot import LOG, PUBLISH_MAX_RETRIES, AdUpdateStrategy, KleinanzeigenBot, LoginState, misc from kleinanzeigen_bot import LOG, PUBLISH_MAX_RETRIES, AdUpdateStrategy, KleinanzeigenBot, LoginState, misc
@@ -17,6 +18,7 @@ from kleinanzeigen_bot._version import __version__
from kleinanzeigen_bot.model.ad_model import Ad from kleinanzeigen_bot.model.ad_model import Ad
from kleinanzeigen_bot.model.config_model import AdDefaults, Config, DiagnosticsConfig, PublishingConfig from kleinanzeigen_bot.model.config_model import AdDefaults, Config, DiagnosticsConfig, PublishingConfig
from kleinanzeigen_bot.utils import dicts, loggers, xdg_paths from kleinanzeigen_bot.utils import dicts, loggers, xdg_paths
from kleinanzeigen_bot.utils.exceptions import PublishSubmissionUncertainError
from kleinanzeigen_bot.utils.web_scraping_mixin import By, Element from kleinanzeigen_bot.utils.web_scraping_mixin import By, Element
@@ -1171,10 +1173,9 @@ class TestKleinanzeigenBotBasics:
): ):
await test_bot.publish_ads(ad_cfgs) await test_bot.publish_ads(ad_cfgs)
# web_request is called twice: once for initial fetch, once for pre-retry-loop baseline # web_request is called once for initial published-ads snapshot
expected_url = f"{test_bot.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1" expected_url = f"{test_bot.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1"
assert web_request_mock.await_count == 2 web_request_mock.assert_awaited_once_with(expected_url)
web_request_mock.assert_any_await(expected_url)
publish_ad_mock.assert_awaited_once_with("ad.yaml", ad_cfgs[0][1], {}, [], AdUpdateStrategy.REPLACE) publish_ad_mock.assert_awaited_once_with("ad.yaml", ad_cfgs[0][1], {}, [], AdUpdateStrategy.REPLACE)
web_await_mock.assert_awaited_once() web_await_mock.assert_awaited_once()
delete_ad_mock.assert_awaited_once_with(ad_cfgs[0][1], [], delete_old_ads_by_title = False) delete_ad_mock.assert_awaited_once_with(ad_cfgs[0][1], [], delete_old_ads_by_title = False)
@@ -1198,103 +1199,136 @@ class TestKleinanzeigenBotBasics:
with ( with (
patch.object(test_bot, "web_request", new_callable = AsyncMock, return_value = ads_response), patch.object(test_bot, "web_request", new_callable = AsyncMock, return_value = ads_response),
patch.object(test_bot, "publish_ad", new_callable = AsyncMock, side_effect = [TimeoutError("transient"), None]) as publish_mock, patch.object(test_bot, "publish_ad", new_callable = AsyncMock, side_effect = [TimeoutError("transient"), None]) as publish_mock,
patch.object(test_bot, "_detect_new_published_ad_ids", new_callable = AsyncMock, return_value = set()) as detect_mock,
patch.object(test_bot, "web_sleep", new_callable = AsyncMock) as sleep_mock, patch.object(test_bot, "web_sleep", new_callable = AsyncMock) as sleep_mock,
patch.object(test_bot, "web_await", new_callable = AsyncMock, return_value = True), patch.object(test_bot, "web_await", new_callable = AsyncMock, return_value = True),
): ):
await test_bot.publish_ads([(ad_file, ad_cfg, ad_cfg_orig)]) await test_bot.publish_ads([(ad_file, ad_cfg, ad_cfg_orig)])
assert publish_mock.await_count == 2 assert publish_mock.await_count == 2
detect_mock.assert_awaited_once()
sleep_mock.assert_awaited_once_with(2_000) sleep_mock.assert_awaited_once_with(2_000)
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_publish_ads_aborts_retry_on_duplicate_detection( async def test_publish_ads_does_not_retry_when_submission_state_is_uncertain(
self, self,
test_bot:KleinanzeigenBot, test_bot:KleinanzeigenBot,
base_ad_config:dict[str, Any], base_ad_config:dict[str, Any],
mock_page:MagicMock, mock_page:MagicMock,
) -> None: ) -> None:
"""Ensure retries are aborted when a new ad is detected after a failed attempt to prevent duplicates.""" """Post-submit uncertainty must fail closed and skip retries."""
test_bot.page = mock_page test_bot.page = mock_page
test_bot.keep_old_ads = True
ad_cfg = Ad.model_validate(base_ad_config) ad_cfg = Ad.model_validate(base_ad_config)
ad_cfg_orig = copy.deepcopy(base_ad_config) ad_cfg_orig = copy.deepcopy(base_ad_config)
ad_file = "ad.yaml" ad_file = "ad.yaml"
# 1st _fetch_published_ads call (initial, before loop): no ads
# 2nd call (fresh baseline, before retry loop): no ads
# 3rd call (after first failed attempt): a new ad appeared — duplicate detected
fetch_responses = [
{"content": json.dumps({"ads": []})}, # initial fetch
{"content": json.dumps({"ads": []})}, # fresh baseline
{"content": json.dumps({"ads": [{"id": "99999", "state": "active"}]})}, # duplicate detected
]
with ( with (
patch.object(test_bot, "web_request", new_callable = AsyncMock, side_effect = fetch_responses), patch.object(
patch.object(test_bot, "publish_ad", new_callable = AsyncMock, side_effect = TimeoutError("image upload timeout")) as publish_mock, test_bot,
"web_request",
new_callable = AsyncMock,
return_value = {"content": json.dumps({"ads": [], "paging": {"pageNum": 1, "last": 1}})},
),
patch.object(
test_bot,
"publish_ad",
new_callable = AsyncMock,
side_effect = PublishSubmissionUncertainError("submission may have succeeded before failure"),
) as publish_mock,
patch.object(test_bot, "web_sleep", new_callable = AsyncMock) as sleep_mock,
): ):
await test_bot.publish_ads([(ad_file, ad_cfg, ad_cfg_orig)]) await test_bot.publish_ads([(ad_file, ad_cfg, ad_cfg_orig)])
# publish_ad should have been called only once — retry was aborted due to duplicate detection
assert publish_mock.await_count == 1 assert publish_mock.await_count == 1
sleep_mock.assert_not_awaited()
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_publish_ads_aborts_retry_when_duplicate_verification_fetch_is_malformed( async def test_publish_ad_keeps_pre_submit_timeouts_retryable(
self, self,
test_bot:KleinanzeigenBot, test_bot:KleinanzeigenBot,
base_ad_config:dict[str, Any], base_ad_config:dict[str, Any],
mock_page:MagicMock,
) -> None: ) -> None:
"""Retry verification must fail closed on malformed published-ads responses.""" """Timeouts before submit boundary should remain plain retryable failures."""
test_bot.page = mock_page ad_cfg = Ad.model_validate(base_ad_config | {"id": 12345, "shipping_type": "NOT_APPLICABLE", "price_type": "NOT_APPLICABLE"})
ad_cfg = Ad.model_validate(base_ad_config)
ad_cfg_orig = copy.deepcopy(base_ad_config) ad_cfg_orig = copy.deepcopy(base_ad_config)
ad_file = "ad.yaml"
fetch_responses = [
{"content": json.dumps({"ads": []})},
{"content": json.dumps({"ads": []})},
[],
]
with ( with (
patch.object(test_bot, "web_request", new_callable = AsyncMock, side_effect = fetch_responses), patch.object(test_bot, "web_open", new_callable = AsyncMock),
patch.object(test_bot, "publish_ad", new_callable = AsyncMock, side_effect = TimeoutError("image upload timeout")) as publish_mock, patch.object(test_bot, "_dismiss_consent_banner", new_callable = AsyncMock),
patch.object(test_bot, "_KleinanzeigenBot__set_category", new_callable = AsyncMock, side_effect = TimeoutError("image upload timeout")),
pytest.raises(TimeoutError, match = "image upload timeout"),
): ):
await test_bot.publish_ads([(ad_file, ad_cfg, ad_cfg_orig)]) await test_bot.publish_ad("ad.yaml", ad_cfg, ad_cfg_orig, [], AdUpdateStrategy.MODIFY)
assert publish_mock.await_count == 1
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_publish_ads_aborts_retry_when_duplicate_verification_ads_entries_are_malformed( async def test_publish_ad_marks_post_submit_timeout_as_uncertain(
self, self,
test_bot:KleinanzeigenBot, test_bot:KleinanzeigenBot,
base_ad_config:dict[str, Any], base_ad_config:dict[str, Any],
mock_page:MagicMock, mock_page:MagicMock,
) -> None: ) -> None:
"""Retry verification must fail closed when strict fetch returns non-dict ad entries.""" """Timeouts after submit click should be converted to non-retryable uncertainty."""
test_bot.page = mock_page test_bot.page = mock_page
ad_cfg = Ad.model_validate(base_ad_config | {"id": 12345, "shipping_type": "NOT_APPLICABLE", "price_type": "NOT_APPLICABLE"})
ad_cfg = Ad.model_validate(base_ad_config)
ad_cfg_orig = copy.deepcopy(base_ad_config) ad_cfg_orig = copy.deepcopy(base_ad_config)
ad_file = "ad.yaml"
fetch_responses = [ async def find_side_effect(selector_type:By, selector_value:str, **_:Any) -> MagicMock:
{"content": json.dumps({"ads": [], "paging": {"pageNum": 1, "last": 1}})}, if selector_type == By.ID and selector_value == "myftr-shppngcrt-frm":
{"content": json.dumps({"ads": [], "paging": {"pageNum": 1, "last": 1}})}, raise TimeoutError("no payment form")
{"content": json.dumps({"ads": [42], "paging": {"pageNum": 1, "last": 1}})}, return MagicMock()
]
with ( with (
patch.object(test_bot, "web_request", new_callable = AsyncMock, side_effect = fetch_responses), patch.object(test_bot, "web_open", new_callable = AsyncMock),
patch.object(test_bot, "publish_ad", new_callable = AsyncMock, side_effect = TimeoutError("image upload timeout")) as publish_mock, patch.object(test_bot, "_dismiss_consent_banner", new_callable = AsyncMock),
patch.object(test_bot, "_KleinanzeigenBot__set_category", new_callable = AsyncMock),
patch.object(test_bot, "_KleinanzeigenBot__set_special_attributes", new_callable = AsyncMock),
patch.object(test_bot, "_KleinanzeigenBot__set_contact_fields", new_callable = AsyncMock),
patch.object(test_bot, "check_and_wait_for_captcha", new_callable = AsyncMock),
patch.object(test_bot, "web_input", new_callable = AsyncMock),
patch.object(test_bot, "web_click", new_callable = AsyncMock),
patch.object(test_bot, "web_check", new_callable = AsyncMock, return_value = False),
patch.object(test_bot, "web_execute", new_callable = AsyncMock),
patch.object(test_bot, "web_find", new_callable = AsyncMock, side_effect = find_side_effect),
patch.object(test_bot, "web_find_all", new_callable = AsyncMock, return_value = []),
patch.object(test_bot, "web_await", new_callable = AsyncMock, side_effect = TimeoutError("confirmation timeout")),
pytest.raises(PublishSubmissionUncertainError, match = "submission may have succeeded before failure"),
): ):
await test_bot.publish_ads([(ad_file, ad_cfg, ad_cfg_orig)]) await test_bot.publish_ad("ad.yaml", ad_cfg, ad_cfg_orig, [], AdUpdateStrategy.MODIFY)
assert publish_mock.await_count == 1 @pytest.mark.asyncio
async def test_publish_ad_marks_post_submit_protocol_exception_as_uncertain(
self,
test_bot:KleinanzeigenBot,
base_ad_config:dict[str, Any],
mock_page:MagicMock,
) -> None:
"""Protocol exceptions after submit click should be converted to uncertainty."""
test_bot.page = mock_page
ad_cfg = Ad.model_validate(base_ad_config | {"id": 12345, "shipping_type": "NOT_APPLICABLE", "price_type": "NOT_APPLICABLE"})
ad_cfg_orig = copy.deepcopy(base_ad_config)
async def find_side_effect(selector_type:By, selector_value:str, **_:Any) -> MagicMock:
if selector_type == By.ID and selector_value == "myftr-shppngcrt-frm":
raise TimeoutError("no payment form")
return MagicMock()
with (
patch.object(test_bot, "web_open", new_callable = AsyncMock),
patch.object(test_bot, "_dismiss_consent_banner", new_callable = AsyncMock),
patch.object(test_bot, "_KleinanzeigenBot__set_category", new_callable = AsyncMock),
patch.object(test_bot, "_KleinanzeigenBot__set_special_attributes", new_callable = AsyncMock),
patch.object(test_bot, "_KleinanzeigenBot__set_contact_fields", new_callable = AsyncMock),
patch.object(test_bot, "check_and_wait_for_captcha", new_callable = AsyncMock),
patch.object(test_bot, "web_input", new_callable = AsyncMock),
patch.object(test_bot, "web_click", new_callable = AsyncMock),
patch.object(test_bot, "web_check", new_callable = AsyncMock, return_value = False),
patch.object(test_bot, "web_execute", new_callable = AsyncMock),
patch.object(test_bot, "web_find", new_callable = AsyncMock, side_effect = find_side_effect),
patch.object(test_bot, "web_find_all", new_callable = AsyncMock, return_value = []),
patch.object(test_bot, "web_await", new_callable = AsyncMock, side_effect = ProtocolException(MagicMock(), "connection lost", 0)),
pytest.raises(PublishSubmissionUncertainError, match = "submission may have succeeded before failure"),
):
await test_bot.publish_ad("ad.yaml", ad_cfg, ad_cfg_orig, [], AdUpdateStrategy.MODIFY)
def test_get_root_url(self, test_bot:KleinanzeigenBot) -> None: def test_get_root_url(self, test_bot:KleinanzeigenBot) -> None:
"""Test root URL retrieval.""" """Test root URL retrieval."""

View File

@@ -187,17 +187,6 @@ class TestJSONPagination:
pytest.fail(f"expected 2 ads, got {len(result)}") pytest.fail(f"expected 2 ads, got {len(result)}")
mock_request.assert_awaited_once() mock_request.assert_awaited_once()
@pytest.mark.asyncio
async def test_fetch_published_ads_strict_raises_on_missing_paging_dict(self, bot:KleinanzeigenBot) -> None:
"""Strict mode should fail closed when paging metadata is missing."""
response_data = {"ads": [{"id": 1}, {"id": 2}]}
with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request:
mock_request.return_value = {"content": json.dumps(response_data)}
with pytest.raises(ValueError, match = "Missing or invalid paging info on page 1: NoneType"):
await bot._fetch_published_ads(strict = True)
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_fetch_published_ads_non_integer_paging_values(self, bot:KleinanzeigenBot) -> None: async def test_fetch_published_ads_non_integer_paging_values(self, bot:KleinanzeigenBot) -> None:
"""Test handling of non-integer paging values.""" """Test handling of non-integer paging values."""
@@ -231,26 +220,15 @@ class TestJSONPagination:
pytest.fail(f"expected empty list when 'ads' is not a list, got: {result}") pytest.fail(f"expected empty list when 'ads' is not a list, got: {result}")
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_fetch_published_ads_strict_rejects_non_dict_entries(self, bot:KleinanzeigenBot) -> None: async def test_fetch_published_ads_filters_non_dict_entries(self, bot:KleinanzeigenBot, caplog:pytest.LogCaptureFixture) -> None:
"""Strict mode should reject malformed entries inside ads list.""" """Malformed entries should be filtered and logged."""
response_data = {"ads": [42, {"id": 1}], "paging": {"pageNum": 1, "last": 1}}
with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request:
mock_request.return_value = {"content": json.dumps(response_data)}
with pytest.raises(TypeError, match = "Unexpected ad entry type on page 1: int"):
await bot._fetch_published_ads(strict = True)
@pytest.mark.asyncio
async def test_fetch_published_ads_non_strict_filters_non_dict_entries(self, bot:KleinanzeigenBot, caplog:pytest.LogCaptureFixture) -> None:
"""Non-strict mode should filter malformed entries and continue."""
response_data = {"ads": [42, {"id": 1}, "broken"], "paging": {"pageNum": 1, "last": 1}} response_data = {"ads": [42, {"id": 1}, "broken"], "paging": {"pageNum": 1, "last": 1}}
with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request: with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request:
mock_request.return_value = {"content": json.dumps(response_data)} mock_request.return_value = {"content": json.dumps(response_data)}
with caplog.at_level("WARNING"): with caplog.at_level("WARNING"):
result = await bot._fetch_published_ads(strict = False) result = await bot._fetch_published_ads()
if result != [{"id": 1}]: if result != [{"id": 1}]:
pytest.fail(f"expected malformed entries to be filtered out, got: {result}") pytest.fail(f"expected malformed entries to be filtered out, got: {result}")
@@ -269,24 +247,15 @@ class TestJSONPagination:
pytest.fail(f"Expected empty list on timeout, got {result}") pytest.fail(f"Expected empty list on timeout, got {result}")
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_fetch_published_ads_non_strict_handles_non_string_content_type(self, bot:KleinanzeigenBot, caplog:pytest.LogCaptureFixture) -> None: async def test_fetch_published_ads_handles_non_string_content_type(self, bot:KleinanzeigenBot, caplog:pytest.LogCaptureFixture) -> None:
"""Non-strict mode should gracefully stop on unexpected non-string content types.""" """Unexpected non-string content types should stop pagination with warning."""
with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request: with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request:
mock_request.return_value = {"content": None} mock_request.return_value = {"content": None}
with caplog.at_level("WARNING"): with caplog.at_level("WARNING"):
result = await bot._fetch_published_ads(strict = False) result = await bot._fetch_published_ads()
if result != []: if result != []:
pytest.fail(f"expected empty result on non-string content in non-strict mode, got: {result}") pytest.fail(f"expected empty result on non-string content, got: {result}")
if "Unexpected response content type on page 1: NoneType" not in caplog.text: if "Unexpected response content type on page 1: NoneType" not in caplog.text:
pytest.fail(f"expected non-string content warning in logs, got: {caplog.text}") pytest.fail(f"expected non-string content warning in logs, got: {caplog.text}")
@pytest.mark.asyncio
async def test_fetch_published_ads_strict_raises_on_non_string_content_type(self, bot:KleinanzeigenBot) -> None:
"""Strict mode should fail closed on unexpected non-string content types."""
with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request:
mock_request.return_value = {"content": None}
with pytest.raises(TypeError, match = "Unexpected response content type on page 1: NoneType"):
await bot._fetch_published_ads(strict = True)