mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-16 12:21:50 +01:00
fix: harden published-ads filtering for required keys
This commit is contained in:
@@ -1501,7 +1501,7 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
|
||||
rejected_count = 0
|
||||
rejected_preview:str | None = None
|
||||
for entry in page_ads:
|
||||
if isinstance(entry, dict):
|
||||
if isinstance(entry, dict) and "id" in entry and "state" in entry:
|
||||
filtered_page_ads.append(entry)
|
||||
continue
|
||||
rejected_count += 1
|
||||
@@ -1913,9 +1913,7 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
|
||||
#############################
|
||||
# submit
|
||||
#############################
|
||||
submission_attempted = False
|
||||
try:
|
||||
submission_attempted = True
|
||||
try:
|
||||
await self.web_click(By.ID, "pstad-submit")
|
||||
except TimeoutError:
|
||||
@@ -1951,9 +1949,7 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
|
||||
confirmation_timeout = self._timeout("publishing_confirmation")
|
||||
await self.web_await(lambda: "p-anzeige-aufgeben-bestaetigung.html?adId=" in self.page.url, timeout = confirmation_timeout)
|
||||
except (TimeoutError, ProtocolException) as ex:
|
||||
if submission_attempted:
|
||||
raise PublishSubmissionUncertainError("submission may have succeeded before failure") from ex
|
||||
raise
|
||||
raise PublishSubmissionUncertainError("submission may have succeeded before failure") from ex
|
||||
|
||||
# extract the ad id from the URL's query parameter
|
||||
current_url_query_params = urllib_parse.parse_qs(urllib_parse.urlparse(self.page.url).query)
|
||||
|
||||
@@ -94,7 +94,7 @@ class TestJSONPagination:
|
||||
async def test_fetch_published_ads_single_page_no_paging(self, bot:KleinanzeigenBot) -> None:
|
||||
"""Test fetching ads from single page with no paging info."""
|
||||
with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request:
|
||||
mock_request.return_value = {"content": '{"ads": [{"id": 1, "title": "Ad 1"}, {"id": 2, "title": "Ad 2"}]}'}
|
||||
mock_request.return_value = {"content": '{"ads": [{"id": 1, "state": "active", "title": "Ad 1"}, {"id": 2, "state": "active", "title": "Ad 2"}]}'}
|
||||
|
||||
result = await bot._fetch_published_ads()
|
||||
|
||||
@@ -109,7 +109,7 @@ class TestJSONPagination:
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetch_published_ads_single_page_with_paging(self, bot:KleinanzeigenBot) -> None:
|
||||
"""Test fetching ads from single page with paging info showing 1/1."""
|
||||
response_data = {"ads": [{"id": 1, "title": "Ad 1"}], "paging": {"pageNum": 1, "last": 1}}
|
||||
response_data = {"ads": [{"id": 1, "state": "active", "title": "Ad 1"}], "paging": {"pageNum": 1, "last": 1}}
|
||||
|
||||
with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request:
|
||||
mock_request.return_value = {"content": json.dumps(response_data)}
|
||||
@@ -125,9 +125,9 @@ class TestJSONPagination:
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetch_published_ads_multi_page(self, bot:KleinanzeigenBot) -> None:
|
||||
"""Test fetching ads from multiple pages (3 pages, 2 ads each)."""
|
||||
page1_data = {"ads": [{"id": 1}, {"id": 2}], "paging": {"pageNum": 1, "last": 3, "next": 2}}
|
||||
page2_data = {"ads": [{"id": 3}, {"id": 4}], "paging": {"pageNum": 2, "last": 3, "next": 3}}
|
||||
page3_data = {"ads": [{"id": 5}, {"id": 6}], "paging": {"pageNum": 3, "last": 3}}
|
||||
page1_data = {"ads": [{"id": 1, "state": "active"}, {"id": 2, "state": "active"}], "paging": {"pageNum": 1, "last": 3, "next": 2}}
|
||||
page2_data = {"ads": [{"id": 3, "state": "active"}, {"id": 4, "state": "active"}], "paging": {"pageNum": 2, "last": 3, "next": 3}}
|
||||
page3_data = {"ads": [{"id": 5, "state": "active"}, {"id": 6, "state": "active"}], "paging": {"pageNum": 3, "last": 3}}
|
||||
|
||||
with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request:
|
||||
mock_request.side_effect = [
|
||||
@@ -176,7 +176,7 @@ class TestJSONPagination:
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetch_published_ads_missing_paging_dict(self, bot:KleinanzeigenBot) -> None:
|
||||
"""Test handling of missing paging dict."""
|
||||
response_data = {"ads": [{"id": 1}, {"id": 2}]}
|
||||
response_data = {"ads": [{"id": 1, "state": "active"}, {"id": 2, "state": "active"}]}
|
||||
|
||||
with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request:
|
||||
mock_request.return_value = {"content": json.dumps(response_data)}
|
||||
@@ -190,7 +190,7 @@ class TestJSONPagination:
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetch_published_ads_non_integer_paging_values(self, bot:KleinanzeigenBot) -> None:
|
||||
"""Test handling of non-integer paging values."""
|
||||
response_data = {"ads": [{"id": 1}], "paging": {"pageNum": "invalid", "last": "also-invalid"}}
|
||||
response_data = {"ads": [{"id": 1, "state": "active"}], "paging": {"pageNum": "invalid", "last": "also-invalid"}}
|
||||
|
||||
with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request:
|
||||
mock_request.return_value = {"content": json.dumps(response_data)}
|
||||
@@ -222,7 +222,7 @@ class TestJSONPagination:
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetch_published_ads_filters_non_dict_entries(self, bot:KleinanzeigenBot, caplog:pytest.LogCaptureFixture) -> None:
|
||||
"""Malformed entries should be filtered and logged."""
|
||||
response_data = {"ads": [42, {"id": 1}, "broken"], "paging": {"pageNum": 1, "last": 1}}
|
||||
response_data = {"ads": [42, {"id": 1, "state": "active"}, "broken"], "paging": {"pageNum": 1, "last": 1}}
|
||||
|
||||
with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request:
|
||||
mock_request.return_value = {"content": json.dumps(response_data)}
|
||||
@@ -230,11 +230,39 @@ class TestJSONPagination:
|
||||
with caplog.at_level("WARNING"):
|
||||
result = await bot._fetch_published_ads()
|
||||
|
||||
if result != [{"id": 1}]:
|
||||
if result != [{"id": 1, "state": "active"}]:
|
||||
pytest.fail(f"expected malformed entries to be filtered out, got: {result}")
|
||||
if "Filtered 2 malformed ad entries on page 1" not in caplog.text:
|
||||
pytest.fail(f"expected malformed-entry warning in logs, got: {caplog.text}")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetch_published_ads_filters_dict_entries_missing_required_keys(
|
||||
self,
|
||||
bot:KleinanzeigenBot,
|
||||
caplog:pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
"""Dict entries without required id/state keys should be rejected."""
|
||||
response_data = {
|
||||
"ads": [
|
||||
{"id": 1},
|
||||
{"state": "active"},
|
||||
{"title": "missing both"},
|
||||
{"id": 2, "state": "paused"},
|
||||
],
|
||||
"paging": {"pageNum": 1, "last": 1},
|
||||
}
|
||||
|
||||
with patch.object(bot, "web_request", new_callable = AsyncMock) as mock_request:
|
||||
mock_request.return_value = {"content": json.dumps(response_data)}
|
||||
|
||||
with caplog.at_level("WARNING"):
|
||||
result = await bot._fetch_published_ads()
|
||||
|
||||
if result != [{"id": 2, "state": "paused"}]:
|
||||
pytest.fail(f"expected only entries with id and state to remain, got: {result}")
|
||||
if "Filtered 3 malformed ad entries on page 1" not in caplog.text:
|
||||
pytest.fail(f"expected malformed-entry warning in logs, got: {caplog.text}")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetch_published_ads_timeout(self, bot:KleinanzeigenBot) -> None:
|
||||
"""Test handling of timeout during pagination."""
|
||||
|
||||
Reference in New Issue
Block a user