From b47c6311ebd9234829ab5ebd90ecd6a488fab0aa Mon Sep 17 00:00:00 2001 From: Jens <1742418+1cu@users.noreply.github.com> Date: Sun, 15 Mar 2026 20:14:31 +0100 Subject: [PATCH] fix: harden published-ads filtering for required keys --- src/kleinanzeigen_bot/__init__.py | 8 ++---- tests/unit/test_json_pagination.py | 46 ++++++++++++++++++++++++------ 2 files changed, 39 insertions(+), 15 deletions(-) diff --git a/src/kleinanzeigen_bot/__init__.py b/src/kleinanzeigen_bot/__init__.py index 53222cf..428144c 100644 --- a/src/kleinanzeigen_bot/__init__.py +++ b/src/kleinanzeigen_bot/__init__.py @@ -1501,7 +1501,7 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904 rejected_count = 0 rejected_preview:str | None = None for entry in page_ads: - if isinstance(entry, dict): + if isinstance(entry, dict) and "id" in entry and "state" in entry: filtered_page_ads.append(entry) continue rejected_count += 1 @@ -1913,9 +1913,7 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904 ############################# # submit ############################# - submission_attempted = False try: - submission_attempted = True try: await self.web_click(By.ID, "pstad-submit") except TimeoutError: @@ -1951,9 +1949,7 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904 confirmation_timeout = self._timeout("publishing_confirmation") await self.web_await(lambda: "p-anzeige-aufgeben-bestaetigung.html?adId=" in self.page.url, timeout = confirmation_timeout) except (TimeoutError, ProtocolException) as ex: - if submission_attempted: - raise PublishSubmissionUncertainError("submission may have succeeded before failure") from ex - raise + raise PublishSubmissionUncertainError("submission may have succeeded before failure") from ex # extract the ad id from the URL's query parameter current_url_query_params = urllib_parse.parse_qs(urllib_parse.urlparse(self.page.url).query) diff --git a/tests/unit/test_json_pagination.py b/tests/unit/test_json_pagination.py index fbf0d9c..7cc824a 100644 --- a/tests/unit/test_json_pagination.py +++ b/tests/unit/test_json_pagination.py @@ -94,7 +94,7 @@ class TestJSONPagination: async def test_fetch_published_ads_single_page_no_paging(self, bot:KleinanzeigenBot) -> None: """Test fetching ads from single page with no paging info.""" with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request: - mock_request.return_value = {"content": '{"ads": [{"id": 1, "title": "Ad 1"}, {"id": 2, "title": "Ad 2"}]}'} + mock_request.return_value = {"content": '{"ads": [{"id": 1, "state": "active", "title": "Ad 1"}, {"id": 2, "state": "active", "title": "Ad 2"}]}'} result = await bot._fetch_published_ads() @@ -109,7 +109,7 @@ class TestJSONPagination: @pytest.mark.asyncio async def test_fetch_published_ads_single_page_with_paging(self, bot:KleinanzeigenBot) -> None: """Test fetching ads from single page with paging info showing 1/1.""" - response_data = {"ads": [{"id": 1, "title": "Ad 1"}], "paging": {"pageNum": 1, "last": 1}} + response_data = {"ads": [{"id": 1, "state": "active", "title": "Ad 1"}], "paging": {"pageNum": 1, "last": 1}} with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request: mock_request.return_value = {"content": json.dumps(response_data)} @@ -125,9 +125,9 @@ class TestJSONPagination: @pytest.mark.asyncio async def test_fetch_published_ads_multi_page(self, bot:KleinanzeigenBot) -> None: """Test fetching ads from multiple pages (3 pages, 2 ads each).""" - page1_data = {"ads": [{"id": 1}, {"id": 2}], "paging": {"pageNum": 1, "last": 3, "next": 2}} - page2_data = {"ads": [{"id": 3}, {"id": 4}], "paging": {"pageNum": 2, "last": 3, "next": 3}} - page3_data = {"ads": [{"id": 5}, {"id": 6}], "paging": {"pageNum": 3, "last": 3}} + page1_data = {"ads": [{"id": 1, "state": "active"}, {"id": 2, "state": "active"}], "paging": {"pageNum": 1, "last": 3, "next": 2}} + page2_data = {"ads": [{"id": 3, "state": "active"}, {"id": 4, "state": "active"}], "paging": {"pageNum": 2, "last": 3, "next": 3}} + page3_data = {"ads": [{"id": 5, "state": "active"}, {"id": 6, "state": "active"}], "paging": {"pageNum": 3, "last": 3}} with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request: mock_request.side_effect = [ @@ -176,7 +176,7 @@ class TestJSONPagination: @pytest.mark.asyncio async def test_fetch_published_ads_missing_paging_dict(self, bot:KleinanzeigenBot) -> None: """Test handling of missing paging dict.""" - response_data = {"ads": [{"id": 1}, {"id": 2}]} + response_data = {"ads": [{"id": 1, "state": "active"}, {"id": 2, "state": "active"}]} with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request: mock_request.return_value = {"content": json.dumps(response_data)} @@ -190,7 +190,7 @@ class TestJSONPagination: @pytest.mark.asyncio async def test_fetch_published_ads_non_integer_paging_values(self, bot:KleinanzeigenBot) -> None: """Test handling of non-integer paging values.""" - response_data = {"ads": [{"id": 1}], "paging": {"pageNum": "invalid", "last": "also-invalid"}} + response_data = {"ads": [{"id": 1, "state": "active"}], "paging": {"pageNum": "invalid", "last": "also-invalid"}} with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request: mock_request.return_value = {"content": json.dumps(response_data)} @@ -222,7 +222,7 @@ class TestJSONPagination: @pytest.mark.asyncio async def test_fetch_published_ads_filters_non_dict_entries(self, bot:KleinanzeigenBot, caplog:pytest.LogCaptureFixture) -> None: """Malformed entries should be filtered and logged.""" - response_data = {"ads": [42, {"id": 1}, "broken"], "paging": {"pageNum": 1, "last": 1}} + response_data = {"ads": [42, {"id": 1, "state": "active"}, "broken"], "paging": {"pageNum": 1, "last": 1}} with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request: mock_request.return_value = {"content": json.dumps(response_data)} @@ -230,11 +230,39 @@ class TestJSONPagination: with caplog.at_level("WARNING"): result = await bot._fetch_published_ads() - if result != [{"id": 1}]: + if result != [{"id": 1, "state": "active"}]: pytest.fail(f"expected malformed entries to be filtered out, got: {result}") if "Filtered 2 malformed ad entries on page 1" not in caplog.text: pytest.fail(f"expected malformed-entry warning in logs, got: {caplog.text}") + @pytest.mark.asyncio + async def test_fetch_published_ads_filters_dict_entries_missing_required_keys( + self, + bot:KleinanzeigenBot, + caplog:pytest.LogCaptureFixture, + ) -> None: + """Dict entries without required id/state keys should be rejected.""" + response_data = { + "ads": [ + {"id": 1}, + {"state": "active"}, + {"title": "missing both"}, + {"id": 2, "state": "paused"}, + ], + "paging": {"pageNum": 1, "last": 1}, + } + + with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request: + mock_request.return_value = {"content": json.dumps(response_data)} + + with caplog.at_level("WARNING"): + result = await bot._fetch_published_ads() + + if result != [{"id": 2, "state": "paused"}]: + pytest.fail(f"expected only entries with id and state to remain, got: {result}") + if "Filtered 3 malformed ad entries on page 1" not in caplog.text: + pytest.fail(f"expected malformed-entry warning in logs, got: {caplog.text}") + @pytest.mark.asyncio async def test_fetch_published_ads_timeout(self, bot:KleinanzeigenBot) -> None: """Test handling of timeout during pagination."""