mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 02:31:45 +01:00
## ℹ️ Description *Provide a concise summary of the changes introduced in this pull request.* - Link to the related issue(s): Closes #789 (completes the fix started in #793) - **Motivation**: Fix JSON API pagination for accounts with >25 ads. Aligns pagination logic with weidi’s approach (starts at page 1), while hardening error handling and tests. Based on https://github.com/weidi/kleinanzeigen-bot/pull/1. ## 📋 Changes Summary - Added pagination helper to fetch all published ads and use it in delete/extend/publish/update flows - Added robust handling for malformed JSON payloads and unexpected ads types (with translated warnings) - Improved sell_directly extraction with pagination, bounds checks, and shared coercion helper - Added/updated tests for pagination and edge cases; updated assertions to pytest.fail style ### ⚙️ Type of Change Select the type(s) of change(s) included in this pull request: - [x] 🐞 Bug fix (non-breaking change which fixes an issue) - [ ] ✨ New feature (adds new functionality without breaking existing usage) - [ ] 💥 Breaking change (changes that might break existing user setups, scripts, or configurations) ## ✅ Checklist Before requesting a review, confirm the following: - [x] I have reviewed my changes to ensure they meet the project's standards. - [x] I have tested my changes and ensured that all tests pass (`pdm run test:cov:unified`). - [x] I have formatted the code (`pdm run format`). - [x] I have verified that linting passes (`pdm run lint`). - [x] I have updated documentation where necessary. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Reliable multi-page fetching for published ads and buy-now eligibility checks. * **Bug Fixes** * Safer pagination with per-page JSON handling, limits and improved termination diagnostics; ensures pageNum is used when needed. * **Tests** * New comprehensive pagination tests and updates to existing tests to reflect multi-page behavior. * **Chores** * Added a utility to safely coerce page numbers; minor utility signature cleanup. <sub>✏️ Tip: You can customize this high-level summary in your review settings.</sub> <!-- end of auto-generated comment: release notes by coderabbit.ai -->
1689 lines
83 KiB
Python
1689 lines
83 KiB
Python
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
|
import json # isort: skip
|
|
from gettext import gettext as _
|
|
from pathlib import Path
|
|
from typing import Any, TypedDict
|
|
from unittest.mock import AsyncMock, MagicMock, call, patch
|
|
from urllib.error import URLError
|
|
|
|
import pytest
|
|
|
|
import kleinanzeigen_bot.extract as extract_module
|
|
from kleinanzeigen_bot.model.ad_model import AdPartial, ContactPartial
|
|
from kleinanzeigen_bot.model.config_model import Config, DownloadConfig
|
|
from kleinanzeigen_bot.utils.web_scraping_mixin import Browser, By, Element
|
|
|
|
|
|
class _DimensionsDict(TypedDict):
|
|
ad_attributes:str
|
|
|
|
|
|
class _UniversalAnalyticsOptsDict(TypedDict):
|
|
dimensions:_DimensionsDict
|
|
|
|
|
|
class _BelenConfDict(TypedDict):
|
|
universalAnalyticsOpts:_UniversalAnalyticsOptsDict
|
|
|
|
|
|
class _SpecialAttributesDict(TypedDict, total=False):
|
|
art_s:str
|
|
condition_s:str
|
|
|
|
|
|
class _TestCaseDict(TypedDict): # noqa: PYI049 Private TypedDict `...` is never used
|
|
belen_conf:_BelenConfDict
|
|
expected:_SpecialAttributesDict
|
|
|
|
|
|
@pytest.fixture
|
|
def test_extractor(browser_mock:MagicMock, test_bot_config:Config) -> extract_module.AdExtractor:
|
|
"""Provides a fresh extract_module.AdExtractor instance for testing.
|
|
|
|
Dependencies:
|
|
- browser_mock: Used to mock browser interactions
|
|
- test_bot_config: Used to initialize the extractor with a valid configuration
|
|
"""
|
|
return extract_module.AdExtractor(browser_mock, test_bot_config)
|
|
|
|
|
|
class TestAdExtractorBasics:
|
|
"""Basic synchronous tests for extract_module.AdExtractor."""
|
|
|
|
def test_constructor(self, browser_mock:MagicMock, test_bot_config:Config) -> None:
|
|
"""Test the constructor of extract_module.AdExtractor"""
|
|
extractor = extract_module.AdExtractor(browser_mock, test_bot_config)
|
|
assert extractor.browser == browser_mock
|
|
assert extractor.config == test_bot_config
|
|
|
|
@pytest.mark.parametrize(
|
|
("url", "expected_id"),
|
|
[
|
|
("https://www.kleinanzeigen.de/s-anzeige/test-title/12345678", 12345678),
|
|
("https://www.kleinanzeigen.de/s-anzeige/another-test/98765432", 98765432),
|
|
("https://www.kleinanzeigen.de/s-anzeige/invalid-id/abc", -1),
|
|
("https://www.kleinanzeigen.de/invalid-url", -1),
|
|
],
|
|
)
|
|
def test_extract_ad_id_from_ad_url(self, test_extractor:extract_module.AdExtractor, url:str, expected_id:int) -> None:
|
|
"""Test extraction of ad ID from different URL formats."""
|
|
assert test_extractor.extract_ad_id_from_ad_url(url) == expected_id
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_path_exists_helper(self, tmp_path:Path) -> None:
|
|
"""Test files.exists helper function."""
|
|
|
|
from kleinanzeigen_bot.utils import files # noqa: PLC0415
|
|
|
|
# Test with existing path
|
|
existing_file = tmp_path / "test.txt"
|
|
existing_file.write_text("test")
|
|
assert await files.exists(existing_file) is True
|
|
assert await files.exists(str(existing_file)) is True
|
|
|
|
# Test with non-existing path
|
|
non_existing = tmp_path / "nonexistent.txt"
|
|
assert await files.exists(non_existing) is False
|
|
assert await files.exists(str(non_existing)) is False
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_path_is_dir_helper(self, tmp_path:Path) -> None:
|
|
"""Test files.is_dir helper function."""
|
|
|
|
from kleinanzeigen_bot.utils import files # noqa: PLC0415
|
|
|
|
# Test with directory
|
|
test_dir = tmp_path / "testdir"
|
|
test_dir.mkdir()
|
|
assert await files.is_dir(test_dir) is True
|
|
assert await files.is_dir(str(test_dir)) is True
|
|
|
|
# Test with file
|
|
test_file = tmp_path / "test.txt"
|
|
test_file.write_text("test")
|
|
assert await files.is_dir(test_file) is False
|
|
assert await files.is_dir(str(test_file)) is False
|
|
|
|
# Test with non-existing path
|
|
non_existing = tmp_path / "nonexistent"
|
|
assert await files.is_dir(non_existing) is False
|
|
assert await files.is_dir(str(non_existing)) is False
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_exists_async_helper(self, tmp_path:Path) -> None:
|
|
"""Test files.exists async helper function."""
|
|
from kleinanzeigen_bot.utils import files # noqa: PLC0415
|
|
|
|
# Test with existing path
|
|
existing_file = tmp_path / "test.txt"
|
|
existing_file.write_text("test")
|
|
assert await files.exists(existing_file) is True
|
|
assert await files.exists(str(existing_file)) is True
|
|
|
|
# Test with non-existing path
|
|
non_existing = tmp_path / "nonexistent.txt"
|
|
assert await files.exists(non_existing) is False
|
|
assert await files.exists(str(non_existing)) is False
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_isdir_async_helper(self, tmp_path:Path) -> None:
|
|
"""Test files.is_dir async helper function."""
|
|
from kleinanzeigen_bot.utils import files # noqa: PLC0415
|
|
|
|
# Test with directory
|
|
test_dir = tmp_path / "testdir"
|
|
test_dir.mkdir()
|
|
assert await files.is_dir(test_dir) is True
|
|
assert await files.is_dir(str(test_dir)) is True
|
|
|
|
# Test with file
|
|
test_file = tmp_path / "test.txt"
|
|
test_file.write_text("test")
|
|
assert await files.is_dir(test_file) is False
|
|
assert await files.is_dir(str(test_file)) is False
|
|
|
|
# Test with non-existing path
|
|
non_existing = tmp_path / "nonexistent"
|
|
assert await files.is_dir(non_existing) is False
|
|
assert await files.is_dir(str(non_existing)) is False
|
|
|
|
def test_download_and_save_image_sync_success(self, tmp_path:Path) -> None:
|
|
"""Test _download_and_save_image_sync with successful download."""
|
|
from unittest.mock import MagicMock, mock_open # noqa: PLC0415
|
|
|
|
test_dir = tmp_path / "images"
|
|
test_dir.mkdir()
|
|
|
|
# Mock urllib response
|
|
mock_response = MagicMock()
|
|
mock_response.info().get_content_type.return_value = "image/jpeg"
|
|
mock_response.__enter__ = MagicMock(return_value = mock_response)
|
|
mock_response.__exit__ = MagicMock(return_value = False)
|
|
|
|
with (
|
|
patch("kleinanzeigen_bot.extract.urllib_request.urlopen", return_value = mock_response),
|
|
patch("kleinanzeigen_bot.extract.open", mock_open()),
|
|
patch("kleinanzeigen_bot.extract.shutil.copyfileobj"),
|
|
):
|
|
result = extract_module.AdExtractor._download_and_save_image_sync("http://example.com/image.jpg", str(test_dir), "test_", 1)
|
|
|
|
assert result is not None
|
|
assert result.endswith((".jpe", ".jpeg", ".jpg"))
|
|
assert "test_1" in result
|
|
|
|
def test_download_and_save_image_sync_failure(self, tmp_path:Path) -> None:
|
|
"""Test _download_and_save_image_sync with download failure."""
|
|
with patch("kleinanzeigen_bot.extract.urllib_request.urlopen", side_effect = URLError("Network error")):
|
|
result = extract_module.AdExtractor._download_and_save_image_sync("http://example.com/image.jpg", str(tmp_path), "test_", 1)
|
|
|
|
assert result is None
|
|
|
|
|
|
class TestAdExtractorPricing:
|
|
"""Tests for pricing related functionality."""
|
|
|
|
@pytest.mark.parametrize(
|
|
("price_text", "expected_price", "expected_type"),
|
|
[
|
|
("50 €", 50, "FIXED"),
|
|
("1.234 €", 1234, "FIXED"),
|
|
("50 € VB", 50, "NEGOTIABLE"),
|
|
("VB", None, "NEGOTIABLE"),
|
|
("Zu verschenken", None, "GIVE_AWAY"),
|
|
],
|
|
)
|
|
@pytest.mark.asyncio
|
|
# pylint: disable=protected-access
|
|
async def test_extract_pricing_info(
|
|
self, test_extractor:extract_module.AdExtractor, price_text:str, expected_price:int | None, expected_type:str
|
|
) -> None:
|
|
"""Test price extraction with different formats"""
|
|
with patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = price_text):
|
|
price, price_type = await test_extractor._extract_pricing_info_from_ad_page()
|
|
assert price == expected_price
|
|
assert price_type == expected_type
|
|
|
|
@pytest.mark.asyncio
|
|
# pylint: disable=protected-access
|
|
async def test_extract_pricing_info_timeout(self, test_extractor:extract_module.AdExtractor) -> None:
|
|
"""Test price extraction when element is not found"""
|
|
with patch.object(test_extractor, "web_text", new_callable = AsyncMock, side_effect = TimeoutError):
|
|
price, price_type = await test_extractor._extract_pricing_info_from_ad_page()
|
|
assert price is None
|
|
assert price_type == "NOT_APPLICABLE"
|
|
|
|
|
|
class TestAdExtractorShipping:
|
|
"""Tests for shipping related functionality."""
|
|
|
|
@pytest.mark.parametrize(
|
|
("shipping_text", "expected_type", "expected_cost"),
|
|
[
|
|
("+ Versand ab 2,99 €", "SHIPPING", 2.99),
|
|
("Nur Abholung", "PICKUP", None),
|
|
("Versand möglich", "SHIPPING", None),
|
|
],
|
|
)
|
|
@pytest.mark.asyncio
|
|
# pylint: disable=protected-access
|
|
async def test_extract_shipping_info(
|
|
self, test_extractor:extract_module.AdExtractor, shipping_text:str, expected_type:str, expected_cost:float | None
|
|
) -> None:
|
|
"""Test shipping info extraction with different text formats."""
|
|
with (
|
|
patch.object(test_extractor, "page", MagicMock()),
|
|
patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = shipping_text),
|
|
patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request,
|
|
):
|
|
if expected_cost:
|
|
shipping_response:dict[str, Any] = {
|
|
"data": {"shippingOptionsResponse": {"options": [{"id": "DHL_001", "priceInEuroCent": int(expected_cost * 100), "packageSize": "SMALL"}]}}
|
|
}
|
|
mock_web_request.return_value = {"content": json.dumps(shipping_response)}
|
|
|
|
shipping_type, costs, options = await test_extractor._extract_shipping_info_from_ad_page()
|
|
|
|
assert shipping_type == expected_type
|
|
assert costs == expected_cost
|
|
if expected_cost:
|
|
assert options == ["DHL_2"]
|
|
else:
|
|
assert options is None
|
|
|
|
@pytest.mark.asyncio
|
|
# pylint: disable=protected-access
|
|
async def test_extract_shipping_info_with_options(self, test_extractor:extract_module.AdExtractor) -> None:
|
|
"""Test shipping info extraction with shipping options."""
|
|
shipping_response = {
|
|
"content": json.dumps({"data": {"shippingOptionsResponse": {"options": [{"id": "DHL_001", "priceInEuroCent": 549, "packageSize": "SMALL"}]}}})
|
|
}
|
|
|
|
with (
|
|
patch.object(test_extractor, "page", MagicMock()),
|
|
patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = "+ Versand ab 5,49 €"),
|
|
patch.object(test_extractor, "web_request", new_callable = AsyncMock, return_value = shipping_response),
|
|
):
|
|
shipping_type, costs, options = await test_extractor._extract_shipping_info_from_ad_page()
|
|
|
|
assert shipping_type == "SHIPPING"
|
|
assert costs == 5.49
|
|
assert options == ["DHL_2"]
|
|
|
|
@pytest.mark.asyncio
|
|
# pylint: disable=protected-access
|
|
async def test_extract_shipping_info_with_all_matching_options(self, test_extractor:extract_module.AdExtractor) -> None:
|
|
"""Test shipping info extraction with all matching options enabled."""
|
|
shipping_response = {
|
|
"content": json.dumps(
|
|
{
|
|
"data": {
|
|
"shippingOptionsResponse": {
|
|
"options": [
|
|
{"id": "HERMES_001", "priceInEuroCent": 489, "packageSize": "SMALL"},
|
|
{"id": "HERMES_002", "priceInEuroCent": 549, "packageSize": "SMALL"},
|
|
{"id": "DHL_001", "priceInEuroCent": 619, "packageSize": "SMALL"},
|
|
]
|
|
}
|
|
}
|
|
}
|
|
)
|
|
}
|
|
|
|
# Enable all matching options in config
|
|
test_extractor.config.download = DownloadConfig.model_validate({"include_all_matching_shipping_options": True})
|
|
|
|
with (
|
|
patch.object(test_extractor, "page", MagicMock()),
|
|
patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = "+ Versand ab 4,89 €"),
|
|
patch.object(test_extractor, "web_request", new_callable = AsyncMock, return_value = shipping_response),
|
|
):
|
|
shipping_type, costs, options = await test_extractor._extract_shipping_info_from_ad_page()
|
|
|
|
assert shipping_type == "SHIPPING"
|
|
assert costs == 4.89
|
|
if options is not None:
|
|
assert sorted(options) == ["DHL_2", "Hermes_Päckchen", "Hermes_S"]
|
|
else:
|
|
assert options is None
|
|
|
|
@pytest.mark.asyncio
|
|
# pylint: disable=protected-access
|
|
async def test_extract_shipping_info_with_all_matching_options_no_match(self, test_extractor:extract_module.AdExtractor) -> None:
|
|
"""Test shipping extraction when include-all is enabled but no option matches the price."""
|
|
shipping_response = {
|
|
"content": json.dumps(
|
|
{
|
|
"data": {
|
|
"shippingOptionsResponse": {
|
|
"options": [
|
|
{"id": "DHL_001", "priceInEuroCent": 500, "packageSize": "SMALL"},
|
|
{"id": "HERMES_001", "priceInEuroCent": 600, "packageSize": "SMALL"},
|
|
]
|
|
}
|
|
}
|
|
}
|
|
)
|
|
}
|
|
|
|
test_extractor.config.download = DownloadConfig.model_validate({"include_all_matching_shipping_options": True})
|
|
|
|
with (
|
|
patch.object(test_extractor, "page", MagicMock()),
|
|
patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = "+ Versand ab 4,89 €"),
|
|
patch.object(test_extractor, "web_request", new_callable = AsyncMock, return_value = shipping_response),
|
|
):
|
|
shipping_type, costs, options = await test_extractor._extract_shipping_info_from_ad_page()
|
|
|
|
assert shipping_type == "SHIPPING"
|
|
assert costs == 4.89
|
|
assert options is None
|
|
|
|
@pytest.mark.asyncio
|
|
# pylint: disable=protected-access
|
|
async def test_extract_shipping_info_with_excluded_options(self, test_extractor:extract_module.AdExtractor) -> None:
|
|
"""Test shipping info extraction with excluded options."""
|
|
shipping_response = {
|
|
"content": json.dumps(
|
|
{
|
|
"data": {
|
|
"shippingOptionsResponse": {
|
|
"options": [
|
|
{"id": "HERMES_001", "priceInEuroCent": 489, "packageSize": "SMALL"},
|
|
{"id": "HERMES_002", "priceInEuroCent": 549, "packageSize": "SMALL"},
|
|
{"id": "DHL_001", "priceInEuroCent": 619, "packageSize": "SMALL"},
|
|
]
|
|
}
|
|
}
|
|
}
|
|
)
|
|
}
|
|
|
|
# Enable all matching options and exclude DHL in config
|
|
test_extractor.config.download = DownloadConfig.model_validate({"include_all_matching_shipping_options": True, "excluded_shipping_options": ["DHL_2"]})
|
|
|
|
with (
|
|
patch.object(test_extractor, "page", MagicMock()),
|
|
patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = "+ Versand ab 4,89 €"),
|
|
patch.object(test_extractor, "web_request", new_callable = AsyncMock, return_value = shipping_response),
|
|
):
|
|
shipping_type, costs, options = await test_extractor._extract_shipping_info_from_ad_page()
|
|
|
|
assert shipping_type == "SHIPPING"
|
|
assert costs == 4.89
|
|
if options is not None:
|
|
assert sorted(options) == ["Hermes_Päckchen", "Hermes_S"]
|
|
else:
|
|
assert options is None
|
|
|
|
@pytest.mark.asyncio
|
|
# pylint: disable=protected-access
|
|
async def test_extract_shipping_info_with_excluded_matching_option(self, test_extractor:extract_module.AdExtractor) -> None:
|
|
"""Test shipping info extraction when the matching option is excluded."""
|
|
shipping_response = {
|
|
"content": json.dumps(
|
|
{
|
|
"data": {
|
|
"shippingOptionsResponse": {
|
|
"options": [
|
|
{"id": "HERMES_001", "priceInEuroCent": 489, "packageSize": "SMALL"},
|
|
{"id": "HERMES_002", "priceInEuroCent": 549, "packageSize": "SMALL"},
|
|
]
|
|
}
|
|
}
|
|
}
|
|
)
|
|
}
|
|
|
|
# Exclude the matching option
|
|
test_extractor.config.download = DownloadConfig.model_validate({"excluded_shipping_options": ["Hermes_Päckchen"]})
|
|
|
|
with (
|
|
patch.object(test_extractor, "page", MagicMock()),
|
|
patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = "+ Versand ab 4,89 €"),
|
|
patch.object(test_extractor, "web_request", new_callable = AsyncMock, return_value = shipping_response),
|
|
):
|
|
shipping_type, costs, options = await test_extractor._extract_shipping_info_from_ad_page()
|
|
|
|
assert shipping_type == "SHIPPING"
|
|
assert costs == 4.89
|
|
assert options is None
|
|
|
|
@pytest.mark.asyncio
|
|
# pylint: disable=protected-access
|
|
async def test_extract_shipping_info_with_no_matching_option(self, test_extractor:extract_module.AdExtractor) -> None:
|
|
"""Test shipping info extraction when price exists but NO matching option in API response."""
|
|
shipping_response = {
|
|
"content": json.dumps(
|
|
{
|
|
"data": {
|
|
"shippingOptionsResponse": {
|
|
"options": [
|
|
{"id": "DHL_001", "priceInEuroCent": 500, "packageSize": "SMALL"},
|
|
{"id": "HERMES_001", "priceInEuroCent": 600, "packageSize": "SMALL"},
|
|
]
|
|
}
|
|
}
|
|
}
|
|
)
|
|
}
|
|
|
|
with (
|
|
patch.object(test_extractor, "page", MagicMock()),
|
|
patch.object(test_extractor, "web_text", new_callable = AsyncMock, return_value = "+ Versand ab 7,00 €"),
|
|
patch.object(test_extractor, "web_request", new_callable = AsyncMock, return_value = shipping_response),
|
|
):
|
|
shipping_type, costs, options = await test_extractor._extract_shipping_info_from_ad_page()
|
|
|
|
assert shipping_type == "SHIPPING"
|
|
assert costs == 7.0
|
|
assert options is None
|
|
|
|
@pytest.mark.asyncio
|
|
# pylint: disable=protected-access
|
|
async def test_extract_shipping_info_timeout(self, test_extractor:extract_module.AdExtractor) -> None:
|
|
"""Test shipping info extraction when shipping element is missing (TimeoutError)."""
|
|
with (
|
|
patch.object(test_extractor, "page", MagicMock()),
|
|
patch.object(test_extractor, "web_text", new_callable = AsyncMock, side_effect = TimeoutError),
|
|
):
|
|
shipping_type, costs, options = await test_extractor._extract_shipping_info_from_ad_page()
|
|
|
|
assert shipping_type == "NOT_APPLICABLE"
|
|
assert costs is None
|
|
assert options is None
|
|
|
|
|
|
class TestAdExtractorNavigation:
|
|
"""Tests for navigation related functionality."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_navigate_to_ad_page_with_url(self, test_extractor:extract_module.AdExtractor) -> None:
|
|
"""Test navigation to ad page using a URL."""
|
|
page_mock = AsyncMock()
|
|
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
|
|
|
with (
|
|
patch.object(test_extractor, "page", page_mock),
|
|
patch.object(test_extractor, "web_open", new_callable = AsyncMock) as mock_web_open,
|
|
patch.object(test_extractor, "web_find", new_callable = AsyncMock, side_effect = TimeoutError),
|
|
):
|
|
result = await test_extractor.navigate_to_ad_page("https://www.kleinanzeigen.de/s-anzeige/test/12345")
|
|
assert result is True
|
|
mock_web_open.assert_called_with("https://www.kleinanzeigen.de/s-anzeige/test/12345")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_navigate_to_ad_page_with_id(self, test_extractor:extract_module.AdExtractor) -> None:
|
|
"""Test navigation to ad page using an ID."""
|
|
ad_id = 12345
|
|
page_mock = AsyncMock()
|
|
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/{0}".format(ad_id)
|
|
|
|
popup_close_mock = AsyncMock()
|
|
popup_close_mock.click = AsyncMock()
|
|
popup_close_mock.apply = AsyncMock(return_value = True)
|
|
|
|
def find_mock(selector_type:By, selector_value:str, **_:Any) -> Element | None:
|
|
if selector_type == By.CLASS_NAME and selector_value == "mfp-close":
|
|
return popup_close_mock
|
|
return None
|
|
|
|
with (
|
|
patch.object(test_extractor, "page", page_mock),
|
|
patch.object(test_extractor, "web_open", new_callable = AsyncMock) as mock_web_open,
|
|
patch.object(test_extractor, "web_find", new_callable = AsyncMock, side_effect = find_mock),
|
|
):
|
|
result = await test_extractor.navigate_to_ad_page(ad_id)
|
|
assert result is True
|
|
mock_web_open.assert_called_with("https://www.kleinanzeigen.de/s-suchanfrage.html?keywords={0}".format(ad_id))
|
|
popup_close_mock.click.assert_awaited_once()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_navigate_to_ad_page_with_popup(self, test_extractor:extract_module.AdExtractor) -> None:
|
|
"""Test navigation to ad page with popup handling."""
|
|
page_mock = AsyncMock()
|
|
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
|
|
|
input_mock = AsyncMock()
|
|
input_mock.clear_input = AsyncMock()
|
|
input_mock.send_keys = AsyncMock()
|
|
input_mock.apply = AsyncMock(return_value = True)
|
|
|
|
with (
|
|
patch.object(test_extractor, "page", page_mock),
|
|
patch.object(test_extractor, "web_open", new_callable = AsyncMock),
|
|
patch.object(test_extractor, "web_find", new_callable = AsyncMock, return_value = input_mock),
|
|
patch.object(test_extractor, "web_click", new_callable = AsyncMock) as mock_web_click,
|
|
patch.object(test_extractor, "web_check", new_callable = AsyncMock, return_value = True),
|
|
):
|
|
result = await test_extractor.navigate_to_ad_page(12345)
|
|
assert result is True
|
|
mock_web_click.assert_called_with(By.CLASS_NAME, "mfp-close")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_navigate_to_ad_page_invalid_id(self, test_extractor:extract_module.AdExtractor) -> None:
|
|
"""Test navigation to ad page with invalid ID."""
|
|
page_mock = AsyncMock()
|
|
page_mock.url = "https://www.kleinanzeigen.de/s-suchen.html?k0"
|
|
|
|
input_mock = AsyncMock()
|
|
input_mock.clear_input = AsyncMock()
|
|
input_mock.send_keys = AsyncMock()
|
|
input_mock.apply = AsyncMock(return_value = True)
|
|
input_mock.attrs = {}
|
|
|
|
with (
|
|
patch.object(test_extractor, "page", page_mock),
|
|
patch.object(test_extractor, "web_open", new_callable = AsyncMock),
|
|
patch.object(test_extractor, "web_find", new_callable = AsyncMock, return_value = input_mock),
|
|
):
|
|
result = await test_extractor.navigate_to_ad_page(99999)
|
|
assert result is False
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_own_ads_urls(self, test_extractor:extract_module.AdExtractor) -> None:
|
|
"""Test extraction of own ads URLs - basic test."""
|
|
with (
|
|
patch.object(test_extractor, "web_open", new_callable = AsyncMock),
|
|
patch.object(test_extractor, "web_sleep", new_callable = AsyncMock),
|
|
patch.object(test_extractor, "web_find", new_callable = AsyncMock) as mock_web_find,
|
|
patch.object(test_extractor, "web_find_all", new_callable = AsyncMock) as mock_web_find_all,
|
|
patch.object(test_extractor, "web_scroll_page_down", new_callable = AsyncMock),
|
|
patch.object(test_extractor, "web_execute", new_callable = AsyncMock),
|
|
):
|
|
# --- Setup mock objects for DOM elements ---
|
|
# Mocks needed for the actual execution flow
|
|
ad_list_container_mock = MagicMock()
|
|
pagination_section_mock = MagicMock()
|
|
cardbox_mock = MagicMock() # Represents the <li> element
|
|
link_mock = MagicMock() # Represents the <a> element
|
|
link_mock.attrs = {"href": "/s-anzeige/test/12345"} # Configure the desired output
|
|
|
|
# Mocks for elements potentially checked but maybe not strictly needed for output
|
|
# (depending on how robust the mocking is)
|
|
# next_button_mock = MagicMock() # If needed for multi_page logic
|
|
|
|
# --- Setup mock responses for web_find and web_find_all in CORRECT ORDER ---
|
|
|
|
# 1. Initial find for ad list container (before loop)
|
|
# 2. Find for pagination section (pagination check)
|
|
# 3. Find for ad list container (inside loop)
|
|
# 4. Find for the link (inside list comprehension)
|
|
mock_web_find.side_effect = [
|
|
ad_list_container_mock, # Call 1: find #my-manageitems-adlist (before loop)
|
|
pagination_section_mock, # Call 2: find .Pagination
|
|
ad_list_container_mock, # Call 3: find #my-manageitems-adlist (inside loop)
|
|
link_mock, # Call 4: find 'div.manageitems-item-ad h3 a.text-onSurface'
|
|
# Add more mocks here if the pagination navigation logic calls web_find again
|
|
]
|
|
|
|
# 1. Find all 'Nächste' buttons (pagination check) - Return empty list for single page test case
|
|
# 2. Find all '.cardbox' elements (inside loop)
|
|
mock_web_find_all.side_effect = [
|
|
[], # Call 1: find 'button[aria-label="Nächste"]' -> No next button = single page
|
|
[cardbox_mock], # Call 2: find .cardbox -> One ad item
|
|
# Add more mocks here if pagination navigation calls web_find_all
|
|
]
|
|
|
|
# --- Execute test and verify results ---
|
|
refs = await test_extractor.extract_own_ads_urls()
|
|
|
|
# --- Assertions ---
|
|
assert refs == ["/s-anzeige/test/12345"] # Now it should match
|
|
|
|
# Optional: Verify calls were made as expected
|
|
mock_web_find.assert_has_calls(
|
|
[
|
|
call(By.ID, "my-manageitems-adlist"),
|
|
call(By.CSS_SELECTOR, ".Pagination", timeout = 10),
|
|
call(By.ID, "my-manageitems-adlist"),
|
|
call(By.CSS_SELECTOR, "div h3 a.text-onSurface", parent = cardbox_mock),
|
|
],
|
|
any_order = False,
|
|
) # Check order if important
|
|
|
|
mock_web_find_all.assert_has_calls(
|
|
[
|
|
call(By.CSS_SELECTOR, 'button[aria-label="Nächste"]', parent = pagination_section_mock),
|
|
call(By.CLASS_NAME, "cardbox", parent = ad_list_container_mock),
|
|
],
|
|
any_order = False,
|
|
)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_own_ads_urls_paginates_with_enabled_next_button(self, test_extractor:extract_module.AdExtractor) -> None:
|
|
"""Ensure the paginator clicks the first enabled next button and advances."""
|
|
ad_list_container_mock = MagicMock()
|
|
pagination_section_mock = MagicMock()
|
|
cardbox_page_one = MagicMock()
|
|
cardbox_page_two = MagicMock()
|
|
link_page_one = MagicMock(attrs = {"href": "/s-anzeige/page-one/111"})
|
|
link_page_two = MagicMock(attrs = {"href": "/s-anzeige/page-two/222"})
|
|
|
|
next_button_enabled = AsyncMock()
|
|
next_button_enabled.attrs = {}
|
|
disabled_button = MagicMock()
|
|
disabled_button.attrs = {"disabled": True}
|
|
|
|
link_queue = [link_page_one, link_page_two]
|
|
next_button_call = {"count": 0}
|
|
cardbox_call = {"count": 0}
|
|
|
|
async def fake_web_find(selector_type:By, selector_value:str, *, parent:Element | None = None, timeout:int | float | None = None) -> Element:
|
|
if selector_type == By.ID and selector_value == "my-manageitems-adlist":
|
|
return ad_list_container_mock
|
|
if selector_type == By.CSS_SELECTOR and selector_value == ".Pagination":
|
|
return pagination_section_mock
|
|
if selector_type == By.CSS_SELECTOR and selector_value == "div h3 a.text-onSurface":
|
|
return link_queue.pop(0)
|
|
raise AssertionError(f"Unexpected selector {selector_type} {selector_value}")
|
|
|
|
async def fake_web_find_all(
|
|
selector_type:By, selector_value:str, *, parent:Element | None = None, timeout:int | float | None = None
|
|
) -> list[Element]:
|
|
if selector_type == By.CSS_SELECTOR and selector_value == 'button[aria-label="Nächste"]':
|
|
next_button_call["count"] += 1
|
|
if next_button_call["count"] == 1:
|
|
return [next_button_enabled] # initial detection -> multi page
|
|
if next_button_call["count"] == 2:
|
|
return [disabled_button, next_button_enabled] # navigation on page 1
|
|
return [] # after navigating, stop
|
|
if selector_type == By.CLASS_NAME and selector_value == "cardbox":
|
|
cardbox_call["count"] += 1
|
|
return [cardbox_page_one] if cardbox_call["count"] == 1 else [cardbox_page_two]
|
|
raise AssertionError(f"Unexpected find_all selector {selector_type} {selector_value}")
|
|
|
|
with (
|
|
patch.object(test_extractor, "web_open", new_callable = AsyncMock),
|
|
patch.object(test_extractor, "web_scroll_page_down", new_callable = AsyncMock),
|
|
patch.object(test_extractor, "web_sleep", new_callable = AsyncMock),
|
|
patch.object(test_extractor, "web_find", new_callable = AsyncMock, side_effect = fake_web_find),
|
|
patch.object(test_extractor, "web_find_all", new_callable = AsyncMock, side_effect = fake_web_find_all),
|
|
):
|
|
refs = await test_extractor.extract_own_ads_urls()
|
|
|
|
assert refs == ["/s-anzeige/page-one/111", "/s-anzeige/page-two/222"]
|
|
next_button_enabled.click.assert_awaited() # triggered once during navigation
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_own_ads_urls_timeout_in_callback(self, test_extractor:extract_module.AdExtractor) -> None:
|
|
"""Test that TimeoutError in extract_page_refs callback stops pagination."""
|
|
with (
|
|
patch.object(test_extractor, "web_open", new_callable = AsyncMock),
|
|
patch.object(test_extractor, "web_sleep", new_callable = AsyncMock),
|
|
patch.object(test_extractor, "web_find", new_callable = AsyncMock) as mock_web_find,
|
|
patch.object(test_extractor, "web_find_all", new_callable = AsyncMock, return_value = []),
|
|
patch.object(test_extractor, "web_scroll_page_down", new_callable = AsyncMock),
|
|
patch.object(test_extractor, "web_execute", new_callable = AsyncMock),
|
|
):
|
|
# Setup: ad list container exists, but web_find_all for cardbox raises TimeoutError
|
|
ad_list_container_mock = MagicMock()
|
|
|
|
call_count = {"count": 0}
|
|
|
|
def mock_find_side_effect(*args:Any, **kwargs:Any) -> Element:
|
|
call_count["count"] += 1
|
|
if call_count["count"] == 1:
|
|
# First call: ad list container (before pagination loop)
|
|
return ad_list_container_mock
|
|
# Second call: ad list container (inside callback)
|
|
return ad_list_container_mock
|
|
|
|
mock_web_find.side_effect = mock_find_side_effect
|
|
|
|
# Make web_find_all for cardbox raise TimeoutError (simulating missing ad items)
|
|
async def mock_find_all_side_effect(*args:Any, **kwargs:Any) -> list[Element]:
|
|
raise TimeoutError("Ad items not found")
|
|
|
|
with patch.object(test_extractor, "web_find_all", new_callable = AsyncMock, side_effect = mock_find_all_side_effect):
|
|
refs = await test_extractor.extract_own_ads_urls()
|
|
|
|
# Pagination should stop (TimeoutError in callback returns True)
|
|
assert refs == []
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_own_ads_urls_generic_exception_in_callback(self, test_extractor:extract_module.AdExtractor) -> None:
|
|
"""Test that generic Exception in extract_page_refs callback continues pagination."""
|
|
with (
|
|
patch.object(test_extractor, "web_open", new_callable = AsyncMock),
|
|
patch.object(test_extractor, "web_sleep", new_callable = AsyncMock),
|
|
patch.object(test_extractor, "web_find", new_callable = AsyncMock) as mock_web_find,
|
|
patch.object(test_extractor, "web_scroll_page_down", new_callable = AsyncMock),
|
|
):
|
|
# Setup: ad list container exists, but web_find_all raises generic Exception
|
|
ad_list_container_mock = MagicMock()
|
|
|
|
call_count = {"count": 0}
|
|
|
|
def mock_find_side_effect(*args:Any, **kwargs:Any) -> Element:
|
|
call_count["count"] += 1
|
|
if call_count["count"] == 1:
|
|
# First call: ad list container (before pagination loop)
|
|
return ad_list_container_mock
|
|
# Second call: pagination check - raise TimeoutError to indicate no pagination
|
|
if call_count["count"] == 2:
|
|
raise TimeoutError("No pagination")
|
|
# Third call: ad list container (inside callback)
|
|
return ad_list_container_mock
|
|
|
|
mock_web_find.side_effect = mock_find_side_effect
|
|
|
|
# Make web_find_all raise a generic exception
|
|
async def mock_find_all_side_effect(*args:Any, **kwargs:Any) -> list[Element]:
|
|
raise AttributeError("Unexpected error")
|
|
|
|
with patch.object(test_extractor, "web_find_all", new_callable = AsyncMock, side_effect = mock_find_all_side_effect):
|
|
refs = await test_extractor.extract_own_ads_urls()
|
|
|
|
# Pagination should continue despite exception (callback returns False)
|
|
# Since it's a single page (no pagination), refs should be empty
|
|
assert refs == []
|
|
|
|
|
|
class TestAdExtractorContent:
|
|
"""Tests for content extraction functionality."""
|
|
|
|
# pylint: disable=protected-access
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_description_with_affixes(
|
|
self, test_extractor:extract_module.AdExtractor, description_test_cases:list[tuple[dict[str, Any], str, str]], test_bot_config:Config
|
|
) -> None:
|
|
"""Test extraction of description with various prefix/suffix configurations."""
|
|
# Mock the page
|
|
page_mock = MagicMock()
|
|
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
|
test_extractor.page = page_mock
|
|
|
|
for config, raw_description, _expected_description in description_test_cases:
|
|
test_extractor.config = test_bot_config.with_values(config)
|
|
|
|
with patch.multiple(
|
|
test_extractor,
|
|
web_text = AsyncMock(
|
|
side_effect = [
|
|
"Test Title", # Title
|
|
raw_description, # Raw description (without affixes)
|
|
"03.02.2025", # Creation date
|
|
]
|
|
),
|
|
web_execute = AsyncMock(return_value = {"universalAnalyticsOpts": {"dimensions": {"l3_category_id": "", "ad_attributes": ""}}}),
|
|
_extract_category_from_ad_page = AsyncMock(return_value = "160"),
|
|
_extract_special_attributes_from_ad_page = AsyncMock(return_value = {}),
|
|
_extract_pricing_info_from_ad_page = AsyncMock(return_value = (None, "NOT_APPLICABLE")),
|
|
_extract_shipping_info_from_ad_page = AsyncMock(return_value = ("NOT_APPLICABLE", None, None)),
|
|
_extract_sell_directly_from_ad_page = AsyncMock(return_value = False),
|
|
_download_images_from_ad_page = AsyncMock(return_value = []),
|
|
_extract_contact_from_ad_page = AsyncMock(return_value = {}),
|
|
):
|
|
info = await test_extractor._extract_ad_page_info("/some/dir", 12345)
|
|
assert info.description == raw_description
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_description_with_affixes_timeout(self, test_extractor:extract_module.AdExtractor) -> None:
|
|
"""Test handling of timeout when extracting description."""
|
|
# Mock the page
|
|
page_mock = MagicMock()
|
|
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
|
test_extractor.page = page_mock
|
|
|
|
with patch.multiple(
|
|
test_extractor,
|
|
web_text = AsyncMock(
|
|
side_effect = [
|
|
"Test Title", # Title succeeds
|
|
TimeoutError("Timeout"), # Description times out
|
|
"03.02.2025", # Date succeeds
|
|
]
|
|
),
|
|
web_execute = AsyncMock(return_value = {"universalAnalyticsOpts": {"dimensions": {"l3_category_id": "", "ad_attributes": ""}}}),
|
|
_extract_category_from_ad_page = AsyncMock(return_value = "160"),
|
|
_extract_special_attributes_from_ad_page = AsyncMock(return_value = {}),
|
|
_extract_pricing_info_from_ad_page = AsyncMock(return_value = (None, "NOT_APPLICABLE")),
|
|
_extract_shipping_info_from_ad_page = AsyncMock(return_value = ("NOT_APPLICABLE", None, None)),
|
|
_extract_sell_directly_from_ad_page = AsyncMock(return_value = False),
|
|
_download_images_from_ad_page = AsyncMock(return_value = []),
|
|
_extract_contact_from_ad_page = AsyncMock(return_value = ContactPartial()),
|
|
):
|
|
try:
|
|
info = await test_extractor._extract_ad_page_info("/some/dir", 12345)
|
|
assert not info.description
|
|
except TimeoutError:
|
|
# This is also acceptable - depends on how we want to handle timeouts
|
|
pass
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_description_with_affixes_no_affixes(self, test_extractor:extract_module.AdExtractor) -> None:
|
|
"""Test extraction of description without any affixes in config."""
|
|
# Mock the page
|
|
page_mock = MagicMock()
|
|
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
|
test_extractor.page = page_mock
|
|
raw_description = "Original Description"
|
|
|
|
with patch.multiple(
|
|
test_extractor,
|
|
web_text = AsyncMock(
|
|
side_effect = [
|
|
"Test Title", # Title
|
|
raw_description, # Description without affixes
|
|
"03.02.2025", # Creation date
|
|
]
|
|
),
|
|
web_execute = AsyncMock(return_value = {"universalAnalyticsOpts": {"dimensions": {"l3_category_id": "", "ad_attributes": ""}}}),
|
|
_extract_category_from_ad_page = AsyncMock(return_value = "160"),
|
|
_extract_special_attributes_from_ad_page = AsyncMock(return_value = {}),
|
|
_extract_pricing_info_from_ad_page = AsyncMock(return_value = (None, "NOT_APPLICABLE")),
|
|
_extract_shipping_info_from_ad_page = AsyncMock(return_value = ("NOT_APPLICABLE", None, None)),
|
|
_extract_sell_directly_from_ad_page = AsyncMock(return_value = False),
|
|
_download_images_from_ad_page = AsyncMock(return_value = []),
|
|
_extract_contact_from_ad_page = AsyncMock(return_value = ContactPartial()),
|
|
):
|
|
info = await test_extractor._extract_ad_page_info("/some/dir", 12345)
|
|
assert info.description == raw_description
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_sell_directly(self, test_extractor:extract_module.AdExtractor) -> None:
|
|
"""Test extraction of sell directly option."""
|
|
# Mock the page URL to extract the ad ID
|
|
test_extractor.page = MagicMock()
|
|
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
|
|
|
|
# Test when extract_ad_id_from_ad_url returns -1 (invalid URL)
|
|
test_extractor.page.url = "https://www.kleinanzeigen.de/invalid-url"
|
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
|
# When pageNum is missing from the API response, coerce_page_number() returns None,
|
|
# causing the pagination loop to break and return None without making a web_request call.
|
|
assert result is None
|
|
|
|
# Verify web_request was NOT called when URL is invalid
|
|
mock_web_request.assert_not_awaited()
|
|
|
|
# Reset to valid URL for subsequent tests
|
|
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
|
|
|
|
# Test successful extraction with buyNowEligible = true
|
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
|
mock_web_request.return_value = {
|
|
"content": json.dumps({"ads": [{"id": 123456789, "buyNowEligible": True}, {"id": 987654321, "buyNowEligible": False}]})
|
|
}
|
|
|
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
|
assert result is True
|
|
|
|
# Verify web_request was called with the correct URL (now includes pagination)
|
|
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
|
|
|
# Test successful extraction with buyNowEligible = false
|
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
|
mock_web_request.return_value = {
|
|
"content": json.dumps({"ads": [{"id": 123456789, "buyNowEligible": False}, {"id": 987654321, "buyNowEligible": True}]})
|
|
}
|
|
|
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
|
assert result is False
|
|
|
|
# Verify web_request was called with the correct URL (now includes pagination)
|
|
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
|
|
|
# Test pagination: ad found on second page
|
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
|
mock_web_request.side_effect = [
|
|
{
|
|
"content": json.dumps(
|
|
{
|
|
"ads": [{"id": 987654321, "buyNowEligible": False}],
|
|
"paging": {"pageNum": 1, "last": 2, "next": 2},
|
|
}
|
|
)
|
|
},
|
|
{
|
|
"content": json.dumps(
|
|
{
|
|
"ads": [{"id": 123456789, "buyNowEligible": True}],
|
|
"paging": {"pageNum": 2, "last": 2},
|
|
}
|
|
)
|
|
},
|
|
]
|
|
|
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
|
assert result is True
|
|
|
|
mock_web_request.assert_any_await("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
|
mock_web_request.assert_any_await("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=2")
|
|
|
|
# Test when buyNowEligible is missing from the current ad
|
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
|
mock_web_request.return_value = {
|
|
"content": json.dumps(
|
|
{
|
|
"ads": [
|
|
{"id": 123456789}, # No buyNowEligible field
|
|
{"id": 987654321, "buyNowEligible": True},
|
|
]
|
|
}
|
|
)
|
|
}
|
|
|
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
|
assert result is None
|
|
|
|
# Verify web_request was called with the correct URL (now includes pagination)
|
|
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
|
|
|
# Test when current ad is not found in the ads list
|
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
|
mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321, "buyNowEligible": True}]})}
|
|
|
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
|
assert result is None
|
|
|
|
# Verify web_request was called with the correct URL (now includes pagination)
|
|
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
|
|
|
# Test timeout error
|
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock, side_effect = TimeoutError) as mock_web_request:
|
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
|
assert result is None
|
|
|
|
# Verify web_request was called with the correct URL (now includes pagination)
|
|
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
|
|
|
# Test JSON decode error
|
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
|
mock_web_request.return_value = {"content": "invalid json"}
|
|
|
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
|
assert result is None
|
|
|
|
# Verify web_request was called with the correct URL (now includes pagination)
|
|
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
|
|
|
# Test when ads list is empty
|
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
|
mock_web_request.return_value = {"content": json.dumps({"ads": []})}
|
|
|
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
|
assert result is None
|
|
|
|
# Verify web_request was called with the correct URL (now includes pagination)
|
|
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
|
|
|
# Test when buyNowEligible is a non-boolean value (string "true")
|
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
|
mock_web_request.return_value = {
|
|
"content": json.dumps({"ads": [{"id": 123456789, "buyNowEligible": "true"}, {"id": 987654321, "buyNowEligible": False}]})
|
|
}
|
|
|
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
|
assert result is None
|
|
|
|
# Verify web_request was called with the correct URL (now includes pagination)
|
|
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
|
|
|
# Test when buyNowEligible is a non-boolean value (integer 1)
|
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
|
mock_web_request.return_value = {
|
|
"content": json.dumps({"ads": [{"id": 123456789, "buyNowEligible": 1}, {"id": 987654321, "buyNowEligible": False}]})
|
|
}
|
|
|
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
|
assert result is None
|
|
|
|
# Verify web_request was called with the correct URL (now includes pagination)
|
|
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
|
|
|
# Test when json_data is not a dict (covers line 622)
|
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
|
mock_web_request.return_value = {"content": json.dumps(["not", "a", "dict"])}
|
|
|
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
|
assert result is None
|
|
|
|
# Verify web_request was called with the correct URL (now includes pagination)
|
|
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
|
|
|
# Test when json_data is a dict but doesn't have "ads" key (covers line 622)
|
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
|
mock_web_request.return_value = {"content": json.dumps({"other_key": "value"})}
|
|
|
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
|
assert result is None
|
|
|
|
# Verify web_request was called with the correct URL (now includes pagination)
|
|
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
|
|
|
# Test when ads_list is not a list (covers line 624)
|
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
|
mock_web_request.return_value = {"content": json.dumps({"ads": "not a list"})}
|
|
|
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
|
assert result is None
|
|
|
|
# Verify web_request was called with the correct URL (now includes pagination)
|
|
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_sell_directly_page_limit_zero(self, test_extractor:extract_module.AdExtractor, monkeypatch:pytest.MonkeyPatch) -> None:
|
|
test_extractor.page = MagicMock()
|
|
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
|
|
monkeypatch.setattr(extract_module, "_SELL_DIRECTLY_MAX_PAGE_LIMIT", 0)
|
|
|
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
|
assert result is None
|
|
mock_web_request.assert_not_awaited()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_sell_directly_paging_key_resolution(self, test_extractor:extract_module.AdExtractor) -> None:
|
|
test_extractor.page = MagicMock()
|
|
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
|
|
|
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
|
mock_web_request.return_value = {
|
|
"content": json.dumps(
|
|
{
|
|
"ads": [{"id": 987654321, "buyNowEligible": True}],
|
|
"paging": {"pageNum": None, "page": "1", "currentPage": None, "last": 0},
|
|
}
|
|
)
|
|
}
|
|
|
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
|
assert result is None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_sell_directly_current_page_minus_one(self, test_extractor:extract_module.AdExtractor) -> None:
|
|
test_extractor.page = MagicMock()
|
|
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
|
|
|
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
|
mock_web_request.side_effect = [
|
|
{"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": 1, "last": 2, "next": 2}})},
|
|
{"content": json.dumps({"ads": []})},
|
|
]
|
|
|
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
|
assert result is None
|
|
mock_web_request.assert_any_await("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
|
mock_web_request.assert_any_await("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=2")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_sell_directly_invalid_page_number_type(self, test_extractor:extract_module.AdExtractor) -> None:
|
|
test_extractor.page = MagicMock()
|
|
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
|
|
|
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
|
mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": [1], "last": "invalid"}})}
|
|
|
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
|
assert result is None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_sell_directly_float_page_numbers(self, test_extractor:extract_module.AdExtractor) -> None:
|
|
test_extractor.page = MagicMock()
|
|
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
|
|
|
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
|
mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": 1.5, "last": 0}})}
|
|
|
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
|
assert result is None
|
|
|
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
|
mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": 2.0, "last": 1}})}
|
|
|
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
|
assert result is None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_sell_directly_page_limit(self, test_extractor:extract_module.AdExtractor, monkeypatch:pytest.MonkeyPatch) -> None:
|
|
test_extractor.page = MagicMock()
|
|
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
|
|
monkeypatch.setattr(extract_module, "_SELL_DIRECTLY_MAX_PAGE_LIMIT", 1)
|
|
|
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
|
mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {"pageNum": 1, "last": 2}})}
|
|
|
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
|
assert result is None
|
|
mock_web_request.assert_awaited_once_with("https://www.kleinanzeigen.de/m-meine-anzeigen-verwalten.json?sort=DEFAULT&pageNum=1")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_sell_directly_paging_helper_edge_cases(self, test_extractor:extract_module.AdExtractor) -> None:
|
|
test_extractor.page = MagicMock()
|
|
test_extractor.page.url = "https://www.kleinanzeigen.de/s-anzeige/test-ad/123456789"
|
|
|
|
with patch.object(test_extractor, "web_request", new_callable = AsyncMock) as mock_web_request:
|
|
mock_web_request.return_value = {"content": json.dumps({"ads": [{"id": 987654321}], "paging": {}})}
|
|
|
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
|
assert result is None
|
|
|
|
|
|
class TestAdExtractorCategory:
|
|
"""Tests for category extraction functionality."""
|
|
|
|
@pytest.fixture
|
|
def extractor(self, test_bot_config:Config) -> extract_module.AdExtractor:
|
|
browser_mock = MagicMock(spec = Browser)
|
|
config = test_bot_config.with_values({"ad_defaults": {"description": {"prefix": "Test Prefix", "suffix": "Test Suffix"}}})
|
|
return extract_module.AdExtractor(browser_mock, config)
|
|
|
|
@pytest.mark.asyncio
|
|
# pylint: disable=protected-access
|
|
async def test_extract_category(self, extractor:extract_module.AdExtractor) -> None:
|
|
"""Test category extraction from breadcrumb."""
|
|
category_line = MagicMock()
|
|
first_part = MagicMock()
|
|
first_part.attrs = {"href": "/s-familie-kind-baby/c17"}
|
|
second_part = MagicMock()
|
|
second_part.attrs = {"href": "/s-spielzeug/c23"}
|
|
|
|
with (
|
|
patch.object(extractor, "web_find", new_callable = AsyncMock, side_effect = [category_line]) as mock_web_find,
|
|
patch.object(extractor, "web_find_all", new_callable = AsyncMock, return_value = [first_part, second_part]) as mock_web_find_all,
|
|
):
|
|
result = await extractor._extract_category_from_ad_page()
|
|
assert result == "17/23"
|
|
|
|
mock_web_find.assert_awaited_once_with(By.ID, "vap-brdcrmb")
|
|
mock_web_find_all.assert_awaited_once_with(By.CSS_SELECTOR, "a", parent = category_line)
|
|
|
|
@pytest.mark.asyncio
|
|
# pylint: disable=protected-access
|
|
async def test_extract_category_single_identifier(self, extractor:extract_module.AdExtractor) -> None:
|
|
"""Test category extraction when only a single breadcrumb code exists."""
|
|
category_line = MagicMock()
|
|
first_part = MagicMock()
|
|
first_part.attrs = {"href": "/s-kleidung/c42"}
|
|
|
|
with (
|
|
patch.object(extractor, "web_find", new_callable = AsyncMock, side_effect = [category_line]) as mock_web_find,
|
|
patch.object(extractor, "web_find_all", new_callable = AsyncMock, return_value = [first_part]) as mock_web_find_all,
|
|
):
|
|
result = await extractor._extract_category_from_ad_page()
|
|
assert result == "42/42"
|
|
|
|
mock_web_find.assert_awaited_once_with(By.ID, "vap-brdcrmb")
|
|
mock_web_find_all.assert_awaited_once_with(By.CSS_SELECTOR, "a", parent = category_line)
|
|
|
|
@pytest.mark.asyncio
|
|
# pylint: disable=protected-access
|
|
async def test_extract_category_fallback_to_legacy_selectors(self, extractor:extract_module.AdExtractor, caplog:pytest.LogCaptureFixture) -> None:
|
|
"""Test category extraction when breadcrumb links are not available and legacy selectors are used."""
|
|
category_line = MagicMock()
|
|
first_part = MagicMock()
|
|
first_part.attrs = {"href": 12345} # Ensure str() conversion happens
|
|
second_part = MagicMock()
|
|
second_part.attrs = {"href": 67890} # This will need str() conversion
|
|
|
|
caplog.set_level("DEBUG")
|
|
expected_message = _("Falling back to legacy breadcrumb selectors; collected ids: %s") % []
|
|
with (
|
|
patch.object(extractor, "web_find", new_callable = AsyncMock) as mock_web_find,
|
|
patch.object(extractor, "web_find_all", new_callable = AsyncMock, side_effect = TimeoutError) as mock_web_find_all,
|
|
):
|
|
mock_web_find.side_effect = [category_line, first_part, second_part]
|
|
|
|
result = await extractor._extract_category_from_ad_page()
|
|
assert result == "12345/67890"
|
|
assert sum(1 for record in caplog.records if record.message == expected_message) == 1
|
|
|
|
mock_web_find.assert_any_call(By.ID, "vap-brdcrmb")
|
|
mock_web_find.assert_any_call(By.CSS_SELECTOR, "a:nth-of-type(2)", parent = category_line)
|
|
mock_web_find.assert_any_call(By.CSS_SELECTOR, "a:nth-of-type(3)", parent = category_line)
|
|
mock_web_find_all.assert_awaited_once_with(By.CSS_SELECTOR, "a", parent = category_line)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_category_legacy_selectors_timeout(self, extractor:extract_module.AdExtractor, caplog:pytest.LogCaptureFixture) -> None:
|
|
"""Ensure fallback timeout logs the error and re-raises with translated message."""
|
|
category_line = MagicMock()
|
|
|
|
async def fake_web_find(selector_type:By, selector_value:str, *, parent:Element | None = None, timeout:int | float | None = None) -> Element:
|
|
if selector_type == By.ID and selector_value == "vap-brdcrmb":
|
|
return category_line
|
|
raise TimeoutError("legacy selectors missing")
|
|
|
|
with (
|
|
patch.object(extractor, "web_find", new_callable = AsyncMock, side_effect = fake_web_find),
|
|
patch.object(extractor, "web_find_all", new_callable = AsyncMock, side_effect = TimeoutError),
|
|
caplog.at_level("ERROR"),
|
|
pytest.raises(TimeoutError, match = "Unable to locate breadcrumb fallback selectors"),
|
|
):
|
|
await extractor._extract_category_from_ad_page()
|
|
|
|
assert any("Legacy breadcrumb selectors not found" in record.message for record in caplog.records)
|
|
|
|
@pytest.mark.asyncio
|
|
# pylint: disable=protected-access
|
|
async def test_extract_special_attributes_empty(self, extractor:extract_module.AdExtractor) -> None:
|
|
"""Test extraction of special attributes when empty."""
|
|
with patch.object(extractor, "web_execute", new_callable = AsyncMock) as mock_web_execute:
|
|
mock_web_execute.return_value = {"universalAnalyticsOpts": {"dimensions": {"ad_attributes": ""}}}
|
|
result = await extractor._extract_special_attributes_from_ad_page(mock_web_execute.return_value)
|
|
assert result == {}
|
|
|
|
@pytest.mark.asyncio
|
|
# pylint: disable=protected-access
|
|
async def test_extract_special_attributes_not_empty(self, extractor:extract_module.AdExtractor) -> None:
|
|
"""Test extraction of special attributes when not empty."""
|
|
|
|
special_atts = {
|
|
"universalAnalyticsOpts": {
|
|
"dimensions": {"ad_attributes": "versand_s:t|color_s:creme|groesse_s:68|condition_s:alright|type_s:accessoires|art_s:maedchen"}
|
|
}
|
|
}
|
|
result = await extractor._extract_special_attributes_from_ad_page(special_atts)
|
|
assert len(result) == 5
|
|
assert "versand_s" not in result
|
|
assert "color_s" in result
|
|
assert result["color_s"] == "creme"
|
|
assert "groesse_s" in result
|
|
assert result["groesse_s"] == "68"
|
|
assert "condition_s" in result
|
|
assert result["condition_s"] == "alright"
|
|
assert "type_s" in result
|
|
assert result["type_s"] == "accessoires"
|
|
assert "art_s" in result
|
|
assert result["art_s"] == "maedchen"
|
|
|
|
@pytest.mark.asyncio
|
|
# pylint: disable=protected-access
|
|
async def test_extract_special_attributes_missing_ad_attributes(self, extractor:extract_module.AdExtractor) -> None:
|
|
"""Test extraction of special attributes when ad_attributes key is missing."""
|
|
belen_conf:dict[str, Any] = {
|
|
"universalAnalyticsOpts": {
|
|
"dimensions": {
|
|
# ad_attributes key is completely missing
|
|
}
|
|
}
|
|
}
|
|
result = await extractor._extract_special_attributes_from_ad_page(belen_conf)
|
|
assert result == {}
|
|
|
|
|
|
class TestAdExtractorContact:
|
|
"""Tests for contact information extraction."""
|
|
|
|
@pytest.fixture
|
|
def extractor(self, test_bot_config:Config) -> extract_module.AdExtractor:
|
|
browser_mock = MagicMock(spec = Browser)
|
|
config = test_bot_config.with_values({"ad_defaults": {"description": {"prefix": "Test Prefix", "suffix": "Test Suffix"}}})
|
|
return extract_module.AdExtractor(browser_mock, config)
|
|
|
|
@pytest.mark.asyncio
|
|
# pylint: disable=protected-access
|
|
async def test_extract_contact_info(self, extractor:extract_module.AdExtractor) -> None:
|
|
"""Test extraction of contact information."""
|
|
with (
|
|
patch.object(extractor, "page", MagicMock()),
|
|
patch.object(extractor, "web_text", new_callable = AsyncMock) as mock_web_text,
|
|
patch.object(extractor, "web_find", new_callable = AsyncMock) as mock_web_find,
|
|
):
|
|
mock_web_text.side_effect = [
|
|
"12345 Berlin - Mitte",
|
|
"Example Street 123,",
|
|
"Test User",
|
|
]
|
|
|
|
mock_web_find.side_effect = [
|
|
MagicMock(), # contact person element
|
|
MagicMock(), # name element
|
|
TimeoutError(), # phone element (simulating no phone)
|
|
]
|
|
|
|
contact_info = await extractor._extract_contact_from_ad_page()
|
|
assert contact_info.street == "Example Street 123"
|
|
assert contact_info.zipcode == "12345"
|
|
assert contact_info.location == "Berlin - Mitte"
|
|
assert contact_info.name == "Test User"
|
|
assert contact_info.phone is None
|
|
|
|
@pytest.mark.asyncio
|
|
# pylint: disable=protected-access
|
|
async def test_extract_contact_info_timeout(self, extractor:extract_module.AdExtractor) -> None:
|
|
"""Test contact info extraction when elements are not found."""
|
|
with (
|
|
patch.object(extractor, "page", MagicMock()),
|
|
patch.object(extractor, "web_text", new_callable = AsyncMock, side_effect = TimeoutError()),
|
|
patch.object(extractor, "web_find", new_callable = AsyncMock, side_effect = TimeoutError()),
|
|
pytest.raises(TimeoutError),
|
|
):
|
|
await extractor._extract_contact_from_ad_page()
|
|
|
|
@pytest.mark.asyncio
|
|
# pylint: disable=protected-access
|
|
async def test_extract_contact_info_with_phone(self, extractor:extract_module.AdExtractor) -> None:
|
|
"""Test extraction of contact information including phone number."""
|
|
with (
|
|
patch.object(extractor, "page", MagicMock()),
|
|
patch.object(extractor, "web_text", new_callable = AsyncMock) as mock_web_text,
|
|
patch.object(extractor, "web_find", new_callable = AsyncMock) as mock_web_find,
|
|
):
|
|
mock_web_text.side_effect = ["12345 Berlin - Mitte", "Example Street 123,", "Test User", "+49(0)1234 567890"]
|
|
|
|
phone_element = MagicMock()
|
|
mock_web_find.side_effect = [
|
|
MagicMock(), # contact person element
|
|
MagicMock(), # name element
|
|
phone_element, # phone element
|
|
]
|
|
|
|
contact_info = await extractor._extract_contact_from_ad_page()
|
|
assert contact_info.phone == "01234567890" # Normalized phone number
|
|
|
|
|
|
class TestAdExtractorDownload:
|
|
"""Tests for download functionality."""
|
|
|
|
@pytest.fixture
|
|
def extractor(self, test_bot_config:Config) -> extract_module.AdExtractor:
|
|
browser_mock = MagicMock(spec = Browser)
|
|
config = test_bot_config.with_values({"ad_defaults": {"description": {"prefix": "Test Prefix", "suffix": "Test Suffix"}}})
|
|
return extract_module.AdExtractor(browser_mock, config)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_download_ad(self, extractor:extract_module.AdExtractor, tmp_path:Path) -> None:
|
|
"""Test downloading an ad - directory creation and saving ad data."""
|
|
# Use tmp_path for OS-agnostic path handling
|
|
download_base = tmp_path / "downloaded-ads"
|
|
final_dir = download_base / "ad_12345_Test Advertisement Title"
|
|
yaml_path = final_dir / "ad_12345.yaml"
|
|
|
|
with (
|
|
patch("kleinanzeigen_bot.extract.xdg_paths.get_downloaded_ads_path", return_value = download_base),
|
|
patch("kleinanzeigen_bot.extract.dicts.save_dict", autospec = True) as mock_save_dict,
|
|
patch.object(extractor, "_extract_ad_page_info_with_directory_handling", new_callable = AsyncMock) as mock_extract_with_dir,
|
|
):
|
|
mock_extract_with_dir.return_value = (
|
|
AdPartial.model_validate(
|
|
{
|
|
"title": "Test Advertisement Title",
|
|
"description": "Test Description",
|
|
"category": "Dienstleistungen",
|
|
"price": 100,
|
|
"images": [],
|
|
"contact": {"name": "Test User", "street": "Test Street 123", "zipcode": "12345", "location": "Test City"},
|
|
}
|
|
),
|
|
str(final_dir),
|
|
)
|
|
|
|
await extractor.download_ad(12345)
|
|
|
|
# Verify observable behavior: extraction and save were called
|
|
mock_extract_with_dir.assert_called_once()
|
|
mock_save_dict.assert_called_once()
|
|
|
|
# Verify saved to correct location with correct data
|
|
actual_call = mock_save_dict.call_args
|
|
actual_path = Path(actual_call[0][0])
|
|
assert actual_path == yaml_path
|
|
assert actual_call[0][1] == mock_extract_with_dir.return_value[0].model_dump()
|
|
|
|
@pytest.mark.asyncio
|
|
# pylint: disable=protected-access
|
|
async def test_download_images_no_images(self, extractor:extract_module.AdExtractor) -> None:
|
|
"""Test image download when no images are found."""
|
|
with patch.object(extractor, "web_find", new_callable = AsyncMock, side_effect = TimeoutError):
|
|
image_paths = await extractor._download_images_from_ad_page("/some/dir", 12345)
|
|
assert len(image_paths) == 0
|
|
|
|
@pytest.mark.asyncio
|
|
# pylint: disable=protected-access
|
|
async def test_download_images_with_none_url(self, extractor:extract_module.AdExtractor) -> None:
|
|
"""Test image download when some images have None as src attribute."""
|
|
image_box_mock = MagicMock()
|
|
|
|
# Create image elements - one with valid src, one with None src
|
|
img_with_url = MagicMock()
|
|
img_with_url.attrs = {"src": "http://example.com/valid_image.jpg"}
|
|
|
|
img_without_url = MagicMock()
|
|
img_without_url.attrs = {"src": None}
|
|
|
|
with (
|
|
patch.object(extractor, "web_find", new_callable = AsyncMock, return_value = image_box_mock),
|
|
patch.object(extractor, "web_find_all", new_callable = AsyncMock, return_value = [img_with_url, img_without_url]),
|
|
patch.object(extract_module.AdExtractor, "_download_and_save_image_sync", return_value = "/some/dir/ad_12345__img1.jpg"),
|
|
):
|
|
image_paths = await extractor._download_images_from_ad_page("/some/dir", 12345)
|
|
|
|
# Should only download the one valid image (skip the None)
|
|
assert len(image_paths) == 1
|
|
assert image_paths[0] == "ad_12345__img1.jpg"
|
|
|
|
@pytest.mark.asyncio
|
|
# pylint: disable=protected-access
|
|
async def test_extract_ad_page_info_with_directory_handling_final_dir_exists(self, extractor:extract_module.AdExtractor, tmp_path:Path) -> None:
|
|
"""Test directory handling when final_dir already exists - it should be deleted."""
|
|
base_dir = tmp_path / "downloaded-ads"
|
|
base_dir.mkdir()
|
|
|
|
# Create the final directory that should be deleted
|
|
final_dir = base_dir / "ad_12345_Test Title"
|
|
final_dir.mkdir()
|
|
old_file = final_dir / "old_file.txt"
|
|
old_file.write_text("old content")
|
|
|
|
# Mock the page
|
|
page_mock = MagicMock()
|
|
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
|
extractor.page = page_mock
|
|
|
|
with (
|
|
patch.object(
|
|
extractor,
|
|
"web_text",
|
|
new_callable = AsyncMock,
|
|
side_effect = [
|
|
"Test Title", # Title extraction
|
|
"Test Title", # Second title call for full extraction
|
|
"Description text", # Description
|
|
"03.02.2025", # Creation date
|
|
],
|
|
),
|
|
patch.object(
|
|
extractor,
|
|
"web_execute",
|
|
new_callable = AsyncMock,
|
|
return_value = {"universalAnalyticsOpts": {"dimensions": {"l3_category_id": "", "ad_attributes": ""}}},
|
|
),
|
|
patch.object(extractor, "_extract_category_from_ad_page", new_callable = AsyncMock, return_value = "160"),
|
|
patch.object(extractor, "_extract_special_attributes_from_ad_page", new_callable = AsyncMock, return_value = {}),
|
|
patch.object(extractor, "_extract_pricing_info_from_ad_page", new_callable = AsyncMock, return_value = (None, "NOT_APPLICABLE")),
|
|
patch.object(extractor, "_extract_shipping_info_from_ad_page", new_callable = AsyncMock, return_value = ("NOT_APPLICABLE", None, None)),
|
|
patch.object(extractor, "_extract_sell_directly_from_ad_page", new_callable = AsyncMock, return_value = False),
|
|
patch.object(extractor, "_download_images_from_ad_page", new_callable = AsyncMock, return_value = []),
|
|
patch.object(
|
|
extractor,
|
|
"_extract_contact_from_ad_page",
|
|
new_callable = AsyncMock,
|
|
return_value = ContactPartial(
|
|
name = "Test",
|
|
zipcode = "12345",
|
|
location = "Berlin",
|
|
),
|
|
),
|
|
):
|
|
ad_cfg, result_dir = await extractor._extract_ad_page_info_with_directory_handling(base_dir, 12345)
|
|
|
|
# Verify the old directory was deleted and recreated
|
|
assert result_dir == final_dir
|
|
assert result_dir.exists()
|
|
assert not old_file.exists() # Old file should be gone
|
|
assert ad_cfg.title == "Test Title"
|
|
|
|
@pytest.mark.asyncio
|
|
# pylint: disable=protected-access
|
|
async def test_extract_ad_page_info_with_directory_handling_rename_enabled(self, extractor:extract_module.AdExtractor, tmp_path:Path) -> None:
|
|
"""Test directory handling when temp_dir exists and rename_existing_folders is True."""
|
|
base_dir = tmp_path / "downloaded-ads"
|
|
base_dir.mkdir()
|
|
|
|
# Create the temp directory (without title)
|
|
temp_dir = base_dir / "ad_12345"
|
|
temp_dir.mkdir()
|
|
existing_file = temp_dir / "existing_image.jpg"
|
|
existing_file.write_text("existing image data")
|
|
|
|
# Enable rename_existing_folders in config
|
|
extractor.config.download.rename_existing_folders = True
|
|
|
|
# Mock the page
|
|
page_mock = MagicMock()
|
|
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
|
extractor.page = page_mock
|
|
|
|
with (
|
|
patch.object(
|
|
extractor,
|
|
"web_text",
|
|
new_callable = AsyncMock,
|
|
side_effect = [
|
|
"Test Title", # Title extraction
|
|
"Test Title", # Second title call for full extraction
|
|
"Description text", # Description
|
|
"03.02.2025", # Creation date
|
|
],
|
|
),
|
|
patch.object(
|
|
extractor,
|
|
"web_execute",
|
|
new_callable = AsyncMock,
|
|
return_value = {"universalAnalyticsOpts": {"dimensions": {"l3_category_id": "", "ad_attributes": ""}}},
|
|
),
|
|
patch.object(extractor, "_extract_category_from_ad_page", new_callable = AsyncMock, return_value = "160"),
|
|
patch.object(extractor, "_extract_special_attributes_from_ad_page", new_callable = AsyncMock, return_value = {}),
|
|
patch.object(extractor, "_extract_pricing_info_from_ad_page", new_callable = AsyncMock, return_value = (None, "NOT_APPLICABLE")),
|
|
patch.object(extractor, "_extract_shipping_info_from_ad_page", new_callable = AsyncMock, return_value = ("NOT_APPLICABLE", None, None)),
|
|
patch.object(extractor, "_extract_sell_directly_from_ad_page", new_callable = AsyncMock, return_value = False),
|
|
patch.object(extractor, "_download_images_from_ad_page", new_callable = AsyncMock, return_value = []),
|
|
patch.object(
|
|
extractor,
|
|
"_extract_contact_from_ad_page",
|
|
new_callable = AsyncMock,
|
|
return_value = ContactPartial(
|
|
name = "Test",
|
|
zipcode = "12345",
|
|
location = "Berlin",
|
|
),
|
|
),
|
|
):
|
|
ad_cfg, result_dir = await extractor._extract_ad_page_info_with_directory_handling(base_dir, 12345)
|
|
|
|
# Verify the directory was renamed from temp_dir to final_dir
|
|
final_dir = base_dir / "ad_12345_Test Title"
|
|
assert result_dir == final_dir
|
|
assert result_dir.exists()
|
|
assert not temp_dir.exists() # Old temp dir should be gone
|
|
assert (result_dir / "existing_image.jpg").exists() # File should be preserved
|
|
assert ad_cfg.title == "Test Title"
|
|
|
|
@pytest.mark.asyncio
|
|
# pylint: disable=protected-access
|
|
async def test_extract_ad_page_info_with_directory_handling_use_existing(self, extractor:extract_module.AdExtractor, tmp_path:Path) -> None:
|
|
"""Test directory handling when temp_dir exists and rename_existing_folders is False (default)."""
|
|
base_dir = tmp_path / "downloaded-ads"
|
|
base_dir.mkdir()
|
|
|
|
# Create the temp directory (without title)
|
|
temp_dir = base_dir / "ad_12345"
|
|
temp_dir.mkdir()
|
|
existing_file = temp_dir / "existing_image.jpg"
|
|
existing_file.write_text("existing image data")
|
|
|
|
# Ensure rename_existing_folders is False (default)
|
|
extractor.config.download.rename_existing_folders = False
|
|
|
|
# Mock the page
|
|
page_mock = MagicMock()
|
|
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
|
extractor.page = page_mock
|
|
|
|
with (
|
|
patch.object(
|
|
extractor,
|
|
"web_text",
|
|
new_callable = AsyncMock,
|
|
side_effect = [
|
|
"Test Title", # Title extraction
|
|
"Test Title", # Second title call for full extraction
|
|
"Description text", # Description
|
|
"03.02.2025", # Creation date
|
|
],
|
|
),
|
|
patch.object(
|
|
extractor,
|
|
"web_execute",
|
|
new_callable = AsyncMock,
|
|
return_value = {"universalAnalyticsOpts": {"dimensions": {"l3_category_id": "", "ad_attributes": ""}}},
|
|
),
|
|
patch.object(extractor, "_extract_category_from_ad_page", new_callable = AsyncMock, return_value = "160"),
|
|
patch.object(extractor, "_extract_special_attributes_from_ad_page", new_callable = AsyncMock, return_value = {}),
|
|
patch.object(extractor, "_extract_pricing_info_from_ad_page", new_callable = AsyncMock, return_value = (None, "NOT_APPLICABLE")),
|
|
patch.object(extractor, "_extract_shipping_info_from_ad_page", new_callable = AsyncMock, return_value = ("NOT_APPLICABLE", None, None)),
|
|
patch.object(extractor, "_extract_sell_directly_from_ad_page", new_callable = AsyncMock, return_value = False),
|
|
patch.object(extractor, "_download_images_from_ad_page", new_callable = AsyncMock, return_value = []),
|
|
patch.object(
|
|
extractor,
|
|
"_extract_contact_from_ad_page",
|
|
new_callable = AsyncMock,
|
|
return_value = ContactPartial(
|
|
name = "Test",
|
|
zipcode = "12345",
|
|
location = "Berlin",
|
|
),
|
|
),
|
|
):
|
|
ad_cfg, result_dir = await extractor._extract_ad_page_info_with_directory_handling(base_dir, 12345)
|
|
|
|
# Verify the existing temp_dir was used (not renamed)
|
|
assert result_dir == temp_dir
|
|
assert result_dir.exists()
|
|
assert (result_dir / "existing_image.jpg").exists() # File should be preserved
|
|
assert ad_cfg.title == "Test Title"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_download_ad_with_umlauts_in_title(self, extractor:extract_module.AdExtractor, tmp_path:Path) -> None:
|
|
"""Test cross-platform Unicode handling for ad titles with umlauts (issue #728).
|
|
|
|
Verifies that:
|
|
1. Directories are created with NFC-normalized names (via sanitize_folder_name)
|
|
2. Files can be saved to those directories (via save_dict's NFC normalization)
|
|
3. No FileNotFoundError occurs due to NFC/NFD mismatch on Linux/Windows
|
|
"""
|
|
# Title with German umlauts (ä) - common in real ads
|
|
title_with_umlauts = "KitchenAid Zuhälter - nie benutzt"
|
|
|
|
# Mock the page
|
|
page_mock = MagicMock()
|
|
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
|
extractor.page = page_mock
|
|
|
|
base_dir = tmp_path / "downloaded-ads"
|
|
base_dir.mkdir()
|
|
|
|
with (
|
|
patch.object(
|
|
extractor,
|
|
"web_text",
|
|
new_callable = AsyncMock,
|
|
side_effect = [
|
|
title_with_umlauts, # Title extraction
|
|
title_with_umlauts, # Second title call for full extraction
|
|
"Description text", # Description
|
|
"03.02.2025", # Creation date
|
|
],
|
|
),
|
|
patch.object(
|
|
extractor,
|
|
"web_execute",
|
|
new_callable = AsyncMock,
|
|
return_value = {"universalAnalyticsOpts": {"dimensions": {"l3_category_id": "", "ad_attributes": ""}}},
|
|
),
|
|
patch.object(extractor, "_extract_category_from_ad_page", new_callable = AsyncMock, return_value = "160"),
|
|
patch.object(extractor, "_extract_special_attributes_from_ad_page", new_callable = AsyncMock, return_value = {}),
|
|
patch.object(extractor, "_extract_pricing_info_from_ad_page", new_callable = AsyncMock, return_value = (None, "NOT_APPLICABLE")),
|
|
patch.object(extractor, "_extract_shipping_info_from_ad_page", new_callable = AsyncMock, return_value = ("NOT_APPLICABLE", None, None)),
|
|
patch.object(extractor, "_extract_sell_directly_from_ad_page", new_callable = AsyncMock, return_value = False),
|
|
patch.object(extractor, "_download_images_from_ad_page", new_callable = AsyncMock, return_value = []),
|
|
patch.object(
|
|
extractor,
|
|
"_extract_contact_from_ad_page",
|
|
new_callable = AsyncMock,
|
|
return_value = ContactPartial(
|
|
name = "Test",
|
|
zipcode = "12345",
|
|
location = "Berlin",
|
|
),
|
|
),
|
|
):
|
|
ad_cfg, result_dir = await extractor._extract_ad_page_info_with_directory_handling(base_dir, 12345)
|
|
|
|
# Verify directory was created with NFC-normalized name
|
|
assert result_dir.exists()
|
|
assert ad_cfg.title == title_with_umlauts
|
|
|
|
# Test saving YAML file to the Unicode directory path
|
|
# Before fix: Failed on Linux/Windows due to NFC/NFD mismatch
|
|
# After fix: Both directory and file use NFC normalization
|
|
ad_file_path = Path(result_dir) / "ad_12345.yaml"
|
|
|
|
from kleinanzeigen_bot.utils import dicts # noqa: PLC0415
|
|
|
|
header_string = (
|
|
"# yaml-language-server: $schema=https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot/refs/heads/main/schemas/ad.schema.json"
|
|
)
|
|
|
|
# save_dict normalizes path to NFC, matching the NFC directory name
|
|
dicts.save_dict(str(ad_file_path), ad_cfg.model_dump(), header = header_string)
|
|
|
|
# Verify file was created successfully (no FileNotFoundError)
|
|
assert ad_file_path.exists()
|
|
assert ad_file_path.is_file()
|