From 9ed87ff17f4ce74251baae8016707644849b51a0 Mon Sep 17 00:00:00 2001 From: Jens <1742418+1cu@users.noreply.github.com> Date: Wed, 10 Dec 2025 14:04:42 +0100 Subject: [PATCH] fix: wait for image upload completion before submitting ad (#716) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## ℹ️ Description Fixes a race condition where ads were submitted before all images finished uploading to the server, causing some images to be missing from published ads. - Link to the related issue(s): Issue #715 - The bot was submitting ads immediately after the last image `send_file()` call completed, only waiting 1-2.5 seconds via `web_sleep()`. This wasn't enough time for server-side image processing, thumbnail generation, and DOM updates to complete, resulting in missing images in published ads. ## 📋 Changes Summary ### Image Upload Verification (Initial Fix) - Added thumbnail verification in `__upload_images()` method to wait for all image thumbnails to appear in the DOM after upload - Added configurable timeout `image_upload` to `TimeoutConfig` (default: 30s, minimum: 5s) - Improved error messages to show expected vs actual image count when upload times out - Added German translations for new log messages and error messages - Regenerated JSON schemas to include new timeout configuration ### Polling Performance & Crash Fix (Follow-up Fix) - Fixed critical bug where `web_find_all()` would raise `TimeoutError` when no thumbnails exist yet, causing immediate crash - Wrapped DOM queries in `try/except TimeoutError` blocks to handle empty results gracefully - Changed polling to use `self._timeout("quick_dom")` (~1s with PR #718) instead of default timeout - Improved polling performance: reduced cycle time from ~2s to ~1.5s - DOM queries are client-side only (no server load from frequent polling) **New configuration option:** ```yaml timeouts: image_upload: 30.0 # Total timeout for image upload and server-side processing quick_dom: 1.0 # Per-poll timeout for thumbnail checks (adjustable via multiplier) ``` The bot now polls the DOM for `ul#j-pictureupload-thumbnails > li.ui-sortable-handle` elements after uploading images, ensuring server-side processing is complete before submitting the ad form. ### ⚙️ Type of Change - [x] 🐞 Bug fix (non-breaking change which fixes an issue) - [ ] ✨ New feature (adds new functionality without breaking existing usage) - [ ] 💥 Breaking change (changes that might break existing user setups, scripts, or configurations) ## ✅ Checklist Before requesting a review, confirm the following: - [x] I have reviewed my changes to ensure they meet the project's standards. - [x] I have tested my changes and ensured that all tests pass (`pdm run test`). - [x] I have formatted the code (`pdm run format`). - [x] I have verified that linting passes (`pdm run lint`). - [x] I have updated documentation where necessary. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. ## Summary by CodeRabbit * **New Features** * Image uploads now verify completion by waiting for all uploaded thumbnails to appear before proceeding. * **Improvements** * Added a configurable image upload timeout (default 30s, minimum 5s). * Improved timeout reporting: when thumbnails don’t appear in time, the app returns clearer feedback showing expected vs. observed counts. ✏️ Tip: You can customize this high-level summary in your review settings. --- README.md | 1 + schemas/config.schema.json | 7 +++ src/kleinanzeigen_bot/__init__.py | 48 ++++++++++++++++++- src/kleinanzeigen_bot/model/config_model.py | 1 + .../resources/translations.de.yaml | 7 +++ 5 files changed, 63 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d607f8f..3fced45 100644 --- a/README.md +++ b/README.md @@ -294,6 +294,7 @@ timeouts: gdpr_prompt: 10.0 # Timeout when handling GDPR dialogs publishing_result: 300.0 # Timeout for publishing status checks publishing_confirmation: 20.0 # Timeout for publish confirmation redirect + image_upload: 30.0 # Timeout for image upload and server-side processing pagination_initial: 10.0 # Timeout for first pagination lookup pagination_follow_up: 5.0 # Timeout for subsequent pagination clicks quick_dom: 2.0 # Generic short DOM timeout (shipping dialogs, etc.) diff --git a/schemas/config.schema.json b/schemas/config.schema.json index 0c639fe..b0621bc 100644 --- a/schemas/config.schema.json +++ b/schemas/config.schema.json @@ -417,6 +417,13 @@ "title": "Publishing Confirmation", "type": "number" }, + "image_upload": { + "default": 30.0, + "description": "Timeout for image upload and server-side processing.", + "minimum": 5.0, + "title": "Image Upload", + "type": "number" + }, "pagination_initial": { "default": 10.0, "description": "Timeout for initial pagination lookup.", diff --git a/src/kleinanzeigen_bot/__init__.py b/src/kleinanzeigen_bot/__init__.py index e912f6e..92f175f 100644 --- a/src/kleinanzeigen_bot/__init__.py +++ b/src/kleinanzeigen_bot/__init__.py @@ -893,7 +893,7 @@ class KleinanzeigenBot(WebScrapingMixin): # delete previous images because we don't know which have changed ############################# img_items = await self.web_find_all(By.CSS_SELECTOR, - "ul#j-pictureupload-thumbnails > li.ui-sortable-handle") + "ul#j-pictureupload-thumbnails > li:not(.is-placeholder)") for element in img_items: btn = await self.web_find(By.CSS_SELECTOR, "button.pictureupload-thumbnails-remove", parent = element) await btn.click() @@ -1260,6 +1260,52 @@ class KleinanzeigenBot(WebScrapingMixin): await image_upload.send_file(image) await self.web_sleep() + # Wait for all images to be processed and thumbnails to appear + expected_count = len(ad_cfg.images) + LOG.info(_(" -> waiting for %s to be processed..."), pluralize("image", ad_cfg.images)) + + async def check_thumbnails_uploaded() -> bool: + try: + thumbnails = await self.web_find_all( + By.CSS_SELECTOR, + "ul#j-pictureupload-thumbnails > li:not(.is-placeholder)", + timeout = self._timeout("quick_dom") # Fast timeout for polling + ) + current_count = len(thumbnails) + if current_count < expected_count: + LOG.debug(_(" -> %d of %d images processed"), current_count, expected_count) + return current_count == expected_count + except TimeoutError: + # No thumbnails found yet, continue polling + return False + + try: + await self.web_await( + check_thumbnails_uploaded, + timeout = self._timeout("image_upload"), + timeout_error_message = _("Image upload timeout exceeded") + ) + except TimeoutError as ex: + # Get current count for better error message + try: + thumbnails = await self.web_find_all( + By.CSS_SELECTOR, + "ul#j-pictureupload-thumbnails > li:not(.is-placeholder)", + timeout = self._timeout("quick_dom") + ) + current_count = len(thumbnails) + except TimeoutError: + # Still no thumbnails after full timeout + current_count = 0 + raise TimeoutError( + _("Not all images were uploaded within timeout. Expected %(expected)d, found %(found)d thumbnails.") % { + "expected": expected_count, + "found": current_count + } + ) from ex + + LOG.info(_(" -> all images uploaded successfully")) + async def download_ads(self) -> None: """ Determines which download mode was chosen with the arguments, and calls the specified download routine. diff --git a/src/kleinanzeigen_bot/model/config_model.py b/src/kleinanzeigen_bot/model/config_model.py index 552a49a..1e224ab 100644 --- a/src/kleinanzeigen_bot/model/config_model.py +++ b/src/kleinanzeigen_bot/model/config_model.py @@ -127,6 +127,7 @@ class TimeoutConfig(ContextualModel): gdpr_prompt:float = Field(default = 10.0, ge = 1.0, description = "Timeout for GDPR/consent dialogs.") publishing_result:float = Field(default = 300.0, ge = 10.0, description = "Timeout for publishing result checks.") publishing_confirmation:float = Field(default = 20.0, ge = 1.0, description = "Timeout for publish confirmation redirect.") + image_upload:float = Field(default = 30.0, ge = 5.0, description = "Timeout for image upload and server-side processing.") pagination_initial:float = Field(default = 10.0, ge = 1.0, description = "Timeout for initial pagination lookup.") pagination_follow_up:float = Field(default = 5.0, ge = 1.0, description = "Timeout for subsequent pagination navigation.") quick_dom:float = Field(default = 2.0, ge = 0.1, description = "Generic short timeout for transient UI.") diff --git a/src/kleinanzeigen_bot/resources/translations.de.yaml b/src/kleinanzeigen_bot/resources/translations.de.yaml index ece3787..55827e4 100644 --- a/src/kleinanzeigen_bot/resources/translations.de.yaml +++ b/src/kleinanzeigen_bot/resources/translations.de.yaml @@ -112,6 +112,13 @@ kleinanzeigen_bot/__init__.py: " -> found %s": "-> %s gefunden" "image": "Bild" " -> uploading image [%s]": " -> Lade Bild [%s] hoch" + " -> waiting for %s to be processed...": " -> Warte auf Verarbeitung von %s..." + " -> all images uploaded successfully": " -> Alle Bilder erfolgreich hochgeladen" + "Image upload timeout exceeded": "Zeitüberschreitung beim Hochladen der Bilder" + "Not all images were uploaded within timeout. Expected %(expected)d, found %(found)d thumbnails.": "Nicht alle Bilder wurden innerhalb der Zeitüberschreitung hochgeladen. Erwartet: %(expected)d, gefunden: %(found)d Miniaturansichten." + + check_thumbnails_uploaded: + " -> %d of %d images processed": " -> %d von %d Bildern verarbeitet" __check_ad_republication: " -> SKIPPED: ad [%s] was last published %d days ago. republication is only required every %s days": " -> ÜBERSPRUNGEN: Anzeige [%s] wurde zuletzt vor %d Tagen veröffentlicht. Erneute Veröffentlichung ist erst nach %s Tagen erforderlich"