mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 10:31:50 +01:00
enh: allow per-ad overriding of global description affixes (#416)
This commit is contained in:
@@ -107,3 +107,98 @@ def test_extractor(browser_mock: MagicMock, sample_config: dict[str, Any]) -> Ad
|
||||
- sample_config: Used to initialize the extractor with a valid configuration
|
||||
"""
|
||||
return AdExtractor(browser_mock, sample_config)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def description_test_cases() -> list[tuple[dict[str, Any], str, str]]:
|
||||
"""Provides test cases for description prefix/suffix handling.
|
||||
|
||||
Returns tuples of (config, raw_description, expected_description)
|
||||
"""
|
||||
return [
|
||||
# Test case 1: New flattened format
|
||||
(
|
||||
{
|
||||
"ad_defaults": {
|
||||
"description_prefix": "Global Prefix\n",
|
||||
"description_suffix": "\nGlobal Suffix"
|
||||
}
|
||||
},
|
||||
"Original Description", # Raw description without affixes
|
||||
"Global Prefix\nOriginal Description\nGlobal Suffix" # Expected with affixes
|
||||
),
|
||||
# Test case 2: Legacy nested format
|
||||
(
|
||||
{
|
||||
"ad_defaults": {
|
||||
"description": {
|
||||
"prefix": "Legacy Prefix\n",
|
||||
"suffix": "\nLegacy Suffix"
|
||||
}
|
||||
}
|
||||
},
|
||||
"Original Description",
|
||||
"Legacy Prefix\nOriginal Description\nLegacy Suffix"
|
||||
),
|
||||
# Test case 3: Both formats - new format takes precedence
|
||||
(
|
||||
{
|
||||
"ad_defaults": {
|
||||
"description_prefix": "New Prefix\n",
|
||||
"description_suffix": "\nNew Suffix",
|
||||
"description": {
|
||||
"prefix": "Legacy Prefix\n",
|
||||
"suffix": "\nLegacy Suffix"
|
||||
}
|
||||
}
|
||||
},
|
||||
"Original Description",
|
||||
"New Prefix\nOriginal Description\nNew Suffix"
|
||||
),
|
||||
# Test case 4: Empty config
|
||||
(
|
||||
{"ad_defaults": {}},
|
||||
"Original Description",
|
||||
"Original Description"
|
||||
),
|
||||
# Test case 5: None values in config
|
||||
(
|
||||
{
|
||||
"ad_defaults": {
|
||||
"description_prefix": None,
|
||||
"description_suffix": None,
|
||||
"description": {
|
||||
"prefix": None,
|
||||
"suffix": None
|
||||
}
|
||||
}
|
||||
},
|
||||
"Original Description",
|
||||
"Original Description"
|
||||
),
|
||||
# Test case 6: Non-string values in config
|
||||
(
|
||||
{
|
||||
"ad_defaults": {
|
||||
"description_prefix": 123,
|
||||
"description_suffix": True,
|
||||
"description": {
|
||||
"prefix": [],
|
||||
"suffix": {}
|
||||
}
|
||||
}
|
||||
},
|
||||
"Original Description",
|
||||
"Original Description"
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_web_text_responses() -> list[str]:
|
||||
"""Provides common mock responses for web_text calls."""
|
||||
return [
|
||||
"Test Title", # Title
|
||||
"Test Description", # Description
|
||||
"03.02.2025" # Creation date
|
||||
]
|
||||
|
||||
@@ -3,6 +3,10 @@ SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
||||
SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||
"""
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
from kleinanzeigen_bot import ads
|
||||
|
||||
|
||||
@@ -27,3 +31,148 @@ def test_calculate_content_hash_with_none_values() -> None:
|
||||
hash_value = ads.calculate_content_hash(ad_cfg)
|
||||
assert isinstance(hash_value, str)
|
||||
assert len(hash_value) == 64 # SHA-256 hash is 64 characters long
|
||||
|
||||
|
||||
@pytest.mark.parametrize("config,prefix,expected", [
|
||||
# Test new flattened format - prefix
|
||||
(
|
||||
{"ad_defaults": {"description_prefix": "Hello"}},
|
||||
True,
|
||||
"Hello"
|
||||
),
|
||||
# Test new flattened format - suffix
|
||||
(
|
||||
{"ad_defaults": {"description_suffix": "Bye"}},
|
||||
False,
|
||||
"Bye"
|
||||
),
|
||||
# Test legacy nested format - prefix
|
||||
(
|
||||
{"ad_defaults": {"description": {"prefix": "Hi"}}},
|
||||
True,
|
||||
"Hi"
|
||||
),
|
||||
# Test legacy nested format - suffix
|
||||
(
|
||||
{"ad_defaults": {"description": {"suffix": "Ciao"}}},
|
||||
False,
|
||||
"Ciao"
|
||||
),
|
||||
# Test precedence (new format over legacy) - prefix
|
||||
(
|
||||
{
|
||||
"ad_defaults": {
|
||||
"description_prefix": "Hello",
|
||||
"description": {"prefix": "Hi"}
|
||||
}
|
||||
},
|
||||
True,
|
||||
"Hello"
|
||||
),
|
||||
# Test precedence (new format over legacy) - suffix
|
||||
(
|
||||
{
|
||||
"ad_defaults": {
|
||||
"description_suffix": "Bye",
|
||||
"description": {"suffix": "Ciao"}
|
||||
}
|
||||
},
|
||||
False,
|
||||
"Bye"
|
||||
),
|
||||
# Test empty config
|
||||
(
|
||||
{"ad_defaults": {}},
|
||||
True,
|
||||
""
|
||||
),
|
||||
# Test None values
|
||||
(
|
||||
{"ad_defaults": {"description_prefix": None, "description_suffix": None}},
|
||||
True,
|
||||
""
|
||||
),
|
||||
# Test non-string values
|
||||
(
|
||||
{"ad_defaults": {"description_prefix": 123, "description_suffix": True}},
|
||||
True,
|
||||
""
|
||||
),
|
||||
# Add test for malformed config
|
||||
(
|
||||
{}, # Empty config
|
||||
True,
|
||||
""
|
||||
),
|
||||
# Test for missing ad_defaults
|
||||
(
|
||||
{"some_other_key": {}},
|
||||
True,
|
||||
""
|
||||
),
|
||||
# Test for non-dict ad_defaults
|
||||
(
|
||||
{"ad_defaults": "invalid"},
|
||||
True,
|
||||
""
|
||||
),
|
||||
# Test for invalid type in description field
|
||||
(
|
||||
{"ad_defaults": {"description": 123}},
|
||||
True,
|
||||
""
|
||||
)
|
||||
])
|
||||
def test_get_description_affixes(
|
||||
config: dict[str, Any],
|
||||
prefix: bool,
|
||||
expected: str
|
||||
) -> None:
|
||||
"""Test get_description_affixes function with various inputs."""
|
||||
result = ads.get_description_affixes(config, prefix)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("config,prefix,expected", [
|
||||
# Add test for malformed config
|
||||
(
|
||||
{}, # Empty config
|
||||
True,
|
||||
""
|
||||
),
|
||||
# Test for missing ad_defaults
|
||||
(
|
||||
{"some_other_key": {}},
|
||||
True,
|
||||
""
|
||||
),
|
||||
# Test for non-dict ad_defaults
|
||||
(
|
||||
{"ad_defaults": "invalid"},
|
||||
True,
|
||||
""
|
||||
),
|
||||
# Test for invalid type in description field
|
||||
(
|
||||
{"ad_defaults": {"description": 123}},
|
||||
True,
|
||||
""
|
||||
)
|
||||
])
|
||||
def test_get_description_affixes_edge_cases(config: dict[str, Any], prefix: bool, expected: str) -> None:
|
||||
"""Test edge cases for description affix handling."""
|
||||
assert ads.get_description_affixes(config, prefix) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("config,expected", [
|
||||
(None, ""), # Test with None
|
||||
([], ""), # Test with an empty list
|
||||
("string", ""), # Test with a string
|
||||
(123, ""), # Test with an integer
|
||||
(3.14, ""), # Test with a float
|
||||
(set(), ""), # Test with an empty set
|
||||
])
|
||||
def test_get_description_affixes_edge_cases_non_dict(config: Any, expected: str) -> None:
|
||||
"""Test get_description_affixes function with non-dict inputs."""
|
||||
result = ads.get_description_affixes(config, prefix=True)
|
||||
assert result == expected
|
||||
|
||||
@@ -301,56 +301,110 @@ class TestAdExtractorNavigation:
|
||||
|
||||
class TestAdExtractorContent:
|
||||
"""Tests for content extraction functionality."""
|
||||
# pylint: disable=protected-access
|
||||
|
||||
@pytest.fixture
|
||||
def extractor(self) -> AdExtractor:
|
||||
browser_mock = MagicMock(spec = Browser)
|
||||
config_mock = {
|
||||
"ad_defaults": {
|
||||
"description": {
|
||||
"prefix": "Test Prefix",
|
||||
"suffix": "Test Suffix"
|
||||
}
|
||||
}
|
||||
}
|
||||
return AdExtractor(browser_mock, config_mock)
|
||||
def extractor_with_config(self) -> AdExtractor:
|
||||
"""Create extractor with specific config for testing prefix/suffix handling."""
|
||||
browser_mock = MagicMock(spec=Browser)
|
||||
return AdExtractor(browser_mock, {}) # Empty config, will be overridden in tests
|
||||
|
||||
@pytest.mark.asyncio
|
||||
# pylint: disable=protected-access
|
||||
async def test_extract_title_and_description(self, extractor: AdExtractor) -> None:
|
||||
"""Test basic extraction of title and description."""
|
||||
page_mock = AsyncMock()
|
||||
async def test_extract_description_with_affixes(
|
||||
self,
|
||||
test_extractor: AdExtractor,
|
||||
description_test_cases: list[tuple[dict[str, Any], str, str]]
|
||||
) -> None:
|
||||
"""Test extraction of description with various prefix/suffix configurations."""
|
||||
# Mock the page
|
||||
page_mock = MagicMock()
|
||||
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
||||
test_extractor.page = page_mock
|
||||
|
||||
category_mock = AsyncMock()
|
||||
category_mock.attrs = {'href': '/s-kategorie/c123'}
|
||||
for config, raw_description, _ in description_test_cases: # Changed to _ since we don't use expected_description
|
||||
test_extractor.config = config
|
||||
|
||||
with patch.object(extractor, 'page', page_mock), \
|
||||
patch.object(extractor, 'web_text', new_callable = AsyncMock) as mock_web_text, \
|
||||
patch.object(extractor, 'web_find', new_callable = AsyncMock, return_value = category_mock), \
|
||||
patch.object(extractor, '_extract_category_from_ad_page', new_callable = AsyncMock, return_value = "17/23"), \
|
||||
patch.object(extractor, '_extract_special_attributes_from_ad_page', new_callable = AsyncMock, return_value = {}), \
|
||||
patch.object(extractor, '_extract_pricing_info_from_ad_page', new_callable = AsyncMock, return_value = (None, "NOT_APPLICABLE")), \
|
||||
patch.object(extractor, '_extract_shipping_info_from_ad_page', new_callable = AsyncMock, return_value = ("NOT_APPLICABLE", None, None)), \
|
||||
patch.object(extractor, '_extract_sell_directly_from_ad_page', new_callable = AsyncMock, return_value = False), \
|
||||
patch.object(extractor, '_download_images_from_ad_page', new_callable = AsyncMock, return_value = []), \
|
||||
patch.object(extractor, '_extract_contact_from_ad_page', new_callable = AsyncMock, return_value = {}):
|
||||
|
||||
mock_web_text.side_effect = [
|
||||
"Test Title",
|
||||
"Test Prefix Original Description Test Suffix",
|
||||
"03.02.2025"
|
||||
]
|
||||
|
||||
info = await extractor._extract_ad_page_info("/some/dir", 12345)
|
||||
assert isinstance(info, dict)
|
||||
assert info["title"] == "Test Title"
|
||||
assert info["description"].strip() == "Original Description"
|
||||
assert info["created_on"] == "2025-02-03T00:00:00"
|
||||
with patch.multiple(test_extractor,
|
||||
web_text=AsyncMock(side_effect=[
|
||||
"Test Title", # Title
|
||||
raw_description, # Raw description (without affixes)
|
||||
"03.02.2025" # Creation date
|
||||
]),
|
||||
_extract_category_from_ad_page=AsyncMock(return_value="160"),
|
||||
_extract_special_attributes_from_ad_page=AsyncMock(return_value={}),
|
||||
_extract_pricing_info_from_ad_page=AsyncMock(return_value=(None, "NOT_APPLICABLE")),
|
||||
_extract_shipping_info_from_ad_page=AsyncMock(return_value=("NOT_APPLICABLE", None, None)),
|
||||
_extract_sell_directly_from_ad_page=AsyncMock(return_value=False),
|
||||
_download_images_from_ad_page=AsyncMock(return_value=[]),
|
||||
_extract_contact_from_ad_page=AsyncMock(return_value={})
|
||||
):
|
||||
info = await test_extractor._extract_ad_page_info("/some/dir", 12345)
|
||||
assert info["description"] == raw_description
|
||||
|
||||
@pytest.mark.asyncio
|
||||
# pylint: disable=protected-access
|
||||
async def test_extract_sell_directly(self, extractor: AdExtractor) -> None:
|
||||
async def test_extract_description_with_affixes_timeout(
|
||||
self,
|
||||
test_extractor: AdExtractor
|
||||
) -> None:
|
||||
"""Test handling of timeout when extracting description."""
|
||||
# Mock the page
|
||||
page_mock = MagicMock()
|
||||
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
||||
test_extractor.page = page_mock
|
||||
|
||||
with patch.multiple(test_extractor,
|
||||
web_text=AsyncMock(side_effect=[
|
||||
"Test Title", # Title succeeds
|
||||
TimeoutError("Timeout"), # Description times out
|
||||
"03.02.2025" # Date succeeds
|
||||
]),
|
||||
_extract_category_from_ad_page=AsyncMock(return_value="160"),
|
||||
_extract_special_attributes_from_ad_page=AsyncMock(return_value={}),
|
||||
_extract_pricing_info_from_ad_page=AsyncMock(return_value=(None, "NOT_APPLICABLE")),
|
||||
_extract_shipping_info_from_ad_page=AsyncMock(return_value=("NOT_APPLICABLE", None, None)),
|
||||
_extract_sell_directly_from_ad_page=AsyncMock(return_value=False),
|
||||
_download_images_from_ad_page=AsyncMock(return_value=[]),
|
||||
_extract_contact_from_ad_page=AsyncMock(return_value={})
|
||||
):
|
||||
try:
|
||||
info = await test_extractor._extract_ad_page_info("/some/dir", 12345)
|
||||
assert info["description"] == ""
|
||||
except TimeoutError:
|
||||
# This is also acceptable - depends on how we want to handle timeouts
|
||||
pass
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_description_with_affixes_no_affixes(
|
||||
self,
|
||||
test_extractor: AdExtractor
|
||||
) -> None:
|
||||
"""Test extraction of description without any affixes in config."""
|
||||
# Mock the page
|
||||
page_mock = MagicMock()
|
||||
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
||||
test_extractor.page = page_mock
|
||||
test_extractor.config = {"ad_defaults": {}} # Empty config
|
||||
raw_description = "Original Description"
|
||||
|
||||
with patch.multiple(test_extractor,
|
||||
web_text=AsyncMock(side_effect=[
|
||||
"Test Title", # Title
|
||||
raw_description, # Description without affixes
|
||||
"03.02.2025" # Creation date
|
||||
]),
|
||||
_extract_category_from_ad_page=AsyncMock(return_value="160"),
|
||||
_extract_special_attributes_from_ad_page=AsyncMock(return_value={}),
|
||||
_extract_pricing_info_from_ad_page=AsyncMock(return_value=(None, "NOT_APPLICABLE")),
|
||||
_extract_shipping_info_from_ad_page=AsyncMock(return_value=("NOT_APPLICABLE", None, None)),
|
||||
_extract_sell_directly_from_ad_page=AsyncMock(return_value=False),
|
||||
_download_images_from_ad_page=AsyncMock(return_value=[]),
|
||||
_extract_contact_from_ad_page=AsyncMock(return_value={})
|
||||
):
|
||||
info = await test_extractor._extract_ad_page_info("/some/dir", 12345)
|
||||
assert info["description"] == raw_description
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_sell_directly(self, test_extractor: AdExtractor) -> None:
|
||||
"""Test extraction of sell directly option."""
|
||||
test_cases = [
|
||||
("Direkt kaufen", True),
|
||||
@@ -358,12 +412,12 @@ class TestAdExtractorContent:
|
||||
]
|
||||
|
||||
for text, expected in test_cases:
|
||||
with patch.object(extractor, 'web_text', new_callable = AsyncMock, return_value = text):
|
||||
result = await extractor._extract_sell_directly_from_ad_page()
|
||||
with patch.object(test_extractor, 'web_text', new_callable=AsyncMock, return_value=text):
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
assert result is expected
|
||||
|
||||
with patch.object(extractor, 'web_text', new_callable = AsyncMock, side_effect = TimeoutError):
|
||||
result = await extractor._extract_sell_directly_from_ad_page()
|
||||
with patch.object(test_extractor, 'web_text', new_callable=AsyncMock, side_effect=TimeoutError):
|
||||
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||
assert result is None
|
||||
|
||||
|
||||
|
||||
@@ -85,6 +85,11 @@ def create_ad_config(base_config: dict[str, Any], **overrides: Any) -> dict[str,
|
||||
config[key] = value
|
||||
else:
|
||||
config[key] = value
|
||||
|
||||
# Only check length if description is a string
|
||||
if isinstance(config.get("description"), str):
|
||||
assert len(config["description"]) <= 4000, "Length of ad description including prefix and suffix exceeds 4000 chars"
|
||||
|
||||
return config
|
||||
|
||||
|
||||
@@ -1016,3 +1021,40 @@ class TestKleinanzeigenBotUrlConstruction:
|
||||
# Test ad publishing URL
|
||||
expected_publish_url = "https://www.kleinanzeigen.de/p-anzeige-aufgeben-schritt2.html"
|
||||
assert f"{test_bot.root_url}/p-anzeige-aufgeben-schritt2.html" == expected_publish_url
|
||||
|
||||
|
||||
class TestKleinanzeigenBotPrefixSuffix:
|
||||
"""Tests for description prefix and suffix functionality."""
|
||||
# pylint: disable=protected-access
|
||||
|
||||
def test_description_prefix_suffix_handling(
|
||||
self,
|
||||
test_bot: KleinanzeigenBot,
|
||||
description_test_cases: list[tuple[dict[str, Any], str, str]]
|
||||
) -> None:
|
||||
"""Test handling of description prefix/suffix in various configurations."""
|
||||
for config, raw_description, expected_description in description_test_cases:
|
||||
test_bot.config = config
|
||||
ad_cfg = {"description": raw_description, "active": True}
|
||||
# Access private method using the correct name mangling
|
||||
description = getattr(test_bot, "_KleinanzeigenBot__get_description_with_affixes")(ad_cfg)
|
||||
assert description == expected_description
|
||||
|
||||
def test_description_length_validation(self, test_bot: KleinanzeigenBot) -> None:
|
||||
"""Test that long descriptions with affixes raise appropriate error."""
|
||||
test_bot.config = {
|
||||
"ad_defaults": {
|
||||
"description_prefix": "P" * 1000,
|
||||
"description_suffix": "S" * 1000
|
||||
}
|
||||
}
|
||||
ad_cfg = {
|
||||
"description": "D" * 2001, # This plus affixes will exceed 4000 chars
|
||||
"active": True
|
||||
}
|
||||
|
||||
with pytest.raises(AssertionError) as exc_info:
|
||||
getattr(test_bot, "_KleinanzeigenBot__get_description_with_affixes")(ad_cfg)
|
||||
|
||||
assert "Length of ad description including prefix and suffix exceeds 4000 chars" in str(exc_info.value)
|
||||
assert "Description length: 4001" in str(exc_info.value)
|
||||
|
||||
Reference in New Issue
Block a user