mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 10:31:50 +01:00
enh: allow per-ad overriding of global description affixes (#416)
This commit is contained in:
40
README.md
40
README.md
@@ -236,9 +236,10 @@ ad_files:
|
|||||||
ad_defaults:
|
ad_defaults:
|
||||||
active: true
|
active: true
|
||||||
type: OFFER # one of: OFFER, WANTED
|
type: OFFER # one of: OFFER, WANTED
|
||||||
description:
|
|
||||||
prefix: ""
|
description_prefix: ""
|
||||||
suffix: ""
|
description_suffix: ""
|
||||||
|
|
||||||
price_type: NEGOTIABLE # one of: FIXED, NEGOTIABLE, GIVE_AWAY, NOT_APPLICABLE
|
price_type: NEGOTIABLE # one of: FIXED, NEGOTIABLE, GIVE_AWAY, NOT_APPLICABLE
|
||||||
shipping_type: SHIPPING # one of: PICKUP, SHIPPING, NOT_APPLICABLE
|
shipping_type: SHIPPING # one of: PICKUP, SHIPPING, NOT_APPLICABLE
|
||||||
shipping_costs: # e.g. 2.95
|
shipping_costs: # e.g. 2.95
|
||||||
@@ -297,6 +298,9 @@ type: # one of: OFFER, WANTED (default: OFFER)
|
|||||||
title:
|
title:
|
||||||
description: # can be multiline, see syntax here https://yaml-multiline.info/
|
description: # can be multiline, see syntax here https://yaml-multiline.info/
|
||||||
|
|
||||||
|
description_prefix: # optional prefix to be added to the description overriding the default prefix
|
||||||
|
description_suffix: # optional suffix to be added to the description overriding the default suffix
|
||||||
|
|
||||||
# built-in category name as specified in https://github.com/Second-Hand-Friends/kleinanzeigen-bot/blob/main/src/kleinanzeigen_bot/resources/categories.yaml
|
# built-in category name as specified in https://github.com/Second-Hand-Friends/kleinanzeigen-bot/blob/main/src/kleinanzeigen_bot/resources/categories.yaml
|
||||||
# or custom category name as specified in config.yaml
|
# or custom category name as specified in config.yaml
|
||||||
# or category ID (e.g. 161/278)
|
# or category ID (e.g. 161/278)
|
||||||
@@ -348,7 +352,35 @@ updated_on: # ISO timestamp when the ad was last published
|
|||||||
content_hash: # hash of the ad content, used to detect changes
|
content_hash: # hash of the ad content, used to detect changes
|
||||||
```
|
```
|
||||||
|
|
||||||
### <a name="existing-browser"></a>3) Using an existing browser window
|
### <a name="description-prefix-suffix"></a>3) Description Prefix and Suffix
|
||||||
|
|
||||||
|
You can add prefix and suffix text to your ad descriptions in two ways:
|
||||||
|
|
||||||
|
#### New Format (Recommended)
|
||||||
|
|
||||||
|
In your config.yaml file you can specify a `description_prefix` and `description_suffix` under the `ad_defaults` section.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
ad_defaults:
|
||||||
|
description_prefix: "Prefix text"
|
||||||
|
description_suffix: "Suffix text"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Legacy Format
|
||||||
|
|
||||||
|
In your ad configuration file you can specify a `description_prefix` and `description_suffix` under the `description` section.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
description:
|
||||||
|
prefix: "Prefix text"
|
||||||
|
suffix: "Suffix text"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Precedence
|
||||||
|
|
||||||
|
The new format has precedence over the legacy format. If you specify both the new and the legacy format in your config, the new format will be used. We recommend using the new format as it is more flexible and easier to manage.
|
||||||
|
|
||||||
|
### <a name="existing-browser"></a>4) Using an existing browser window
|
||||||
|
|
||||||
By default a new browser process will be launched. To reuse a manually launched browser window/process follow these steps:
|
By default a new browser process will be launched. To reuse a manually launched browser window/process follow these steps:
|
||||||
|
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ from ruamel.yaml import YAML
|
|||||||
from wcmatch import glob
|
from wcmatch import glob
|
||||||
|
|
||||||
from . import extract, resources
|
from . import extract, resources
|
||||||
from .ads import calculate_content_hash
|
from .ads import calculate_content_hash, get_description_affixes
|
||||||
from .utils import dicts, error_handlers, loggers, misc
|
from .utils import dicts, error_handlers, loggers, misc
|
||||||
from .utils.files import abspath
|
from .utils.files import abspath
|
||||||
from .utils.i18n import Locale, get_current_locale, set_current_locale, pluralize
|
from .utils.i18n import Locale, get_current_locale, set_current_locale, pluralize
|
||||||
@@ -318,11 +318,6 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
if not ad_files:
|
if not ad_files:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
description_config = {
|
|
||||||
"prefix": self.config["ad_defaults"]["description"]["prefix"] or "",
|
|
||||||
"suffix": self.config["ad_defaults"]["description"]["suffix"] or ""
|
|
||||||
}
|
|
||||||
|
|
||||||
ids = []
|
ids = []
|
||||||
use_specific_ads = False
|
use_specific_ads = False
|
||||||
if re.compile(r'\d+[,\d+]*').search(self.ads_selector):
|
if re.compile(r'\d+[,\d+]*').search(self.ads_selector):
|
||||||
@@ -356,10 +351,18 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
if not self.__check_ad_republication(ad_cfg, ad_cfg_orig, ad_file_relative):
|
if not self.__check_ad_republication(ad_cfg, ad_cfg_orig, ad_file_relative):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
ad_cfg["description"] = description_config["prefix"] + (ad_cfg["description"] or "") + description_config["suffix"]
|
# Get prefix/suffix from ad config if present, otherwise use defaults
|
||||||
|
prefix = ad_cfg.get("prefix", self.config["ad_defaults"]["description"]["prefix"] or "")
|
||||||
|
suffix = ad_cfg.get("suffix", self.config["ad_defaults"]["description"]["suffix"] or "")
|
||||||
|
|
||||||
|
# Combine description parts
|
||||||
|
ad_cfg["description"] = prefix + (ad_cfg["description"] or "") + suffix
|
||||||
ad_cfg["description"] = ad_cfg["description"].replace("@", "(at)")
|
ad_cfg["description"] = ad_cfg["description"].replace("@", "(at)")
|
||||||
ensure(len(ad_cfg["description"]) <= 4000, f"""Length of ad description including prefix and suffix exceeds 4000 chars. Description length: {
|
|
||||||
len(ad_cfg['description'])} chars. @ {ad_file}""")
|
# Validate total length
|
||||||
|
ensure(len(ad_cfg["description"]) <= 4000,
|
||||||
|
f"""Length of ad description including prefix and suffix exceeds 4000 chars. Description length: {
|
||||||
|
len(ad_cfg["description"])} chars. @ {ad_file}.""")
|
||||||
|
|
||||||
# pylint: disable=cell-var-from-loop
|
# pylint: disable=cell-var-from-loop
|
||||||
def assert_one_of(path:str, allowed:Iterable[str]) -> None:
|
def assert_one_of(path:str, allowed:Iterable[str]) -> None:
|
||||||
@@ -693,7 +696,8 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
#############################
|
#############################
|
||||||
# set description
|
# set description
|
||||||
#############################
|
#############################
|
||||||
await self.web_execute("document.querySelector('#pstad-descrptn').value = `" + ad_cfg["description"].replace("`", "'") + "`")
|
description = self.__get_description_with_affixes(ad_cfg)
|
||||||
|
await self.web_execute("document.querySelector('#pstad-descrptn').value = `" + description.replace("`", "'") + "`")
|
||||||
|
|
||||||
#############################
|
#############################
|
||||||
# set contact zipcode
|
# set contact zipcode
|
||||||
@@ -1040,10 +1044,64 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
else:
|
else:
|
||||||
LOG.error('The page with the id %d does not exist!', ad_id)
|
LOG.error('The page with the id %d does not exist!', ad_id)
|
||||||
|
|
||||||
|
def __get_description_with_affixes(self, ad_cfg: dict[str, Any]) -> str:
|
||||||
|
"""Get the complete description with prefix and suffix applied.
|
||||||
|
|
||||||
|
Precedence (highest to lowest):
|
||||||
|
1. Direct ad-level affixes (description_prefix/suffix)
|
||||||
|
2. Legacy nested ad-level affixes (description.prefix/suffix)
|
||||||
|
3. Global flattened affixes (ad_defaults.description_prefix/suffix)
|
||||||
|
4. Legacy global nested affixes (ad_defaults.description.prefix/suffix)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ad_cfg: The ad configuration dictionary
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The complete description with prefix and suffix applied
|
||||||
|
"""
|
||||||
|
# Get the main description text
|
||||||
|
description_text = ""
|
||||||
|
if isinstance(ad_cfg.get("description"), dict):
|
||||||
|
description_text = ad_cfg["description"].get("text", "")
|
||||||
|
elif isinstance(ad_cfg.get("description"), str):
|
||||||
|
description_text = ad_cfg["description"]
|
||||||
|
|
||||||
|
# Get prefix with precedence
|
||||||
|
prefix = (
|
||||||
|
# 1. Direct ad-level prefix
|
||||||
|
ad_cfg.get("description_prefix") if ad_cfg.get("description_prefix") is not None
|
||||||
|
# 2. Legacy nested ad-level prefix
|
||||||
|
else dicts.safe_get(ad_cfg, "description", "prefix")
|
||||||
|
if dicts.safe_get(ad_cfg, "description", "prefix") is not None
|
||||||
|
# 3. Global prefix from config
|
||||||
|
else get_description_affixes(self.config, prefix=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get suffix with precedence
|
||||||
|
suffix = (
|
||||||
|
# 1. Direct ad-level suffix
|
||||||
|
ad_cfg.get("description_suffix") if ad_cfg.get("description_suffix") is not None
|
||||||
|
# 2. Legacy nested ad-level suffix
|
||||||
|
else dicts.safe_get(ad_cfg, "description", "suffix")
|
||||||
|
if dicts.safe_get(ad_cfg, "description", "suffix") is not None
|
||||||
|
# 3. Global suffix from config
|
||||||
|
else get_description_affixes(self.config, prefix=False)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Combine the parts
|
||||||
|
final_description = str(prefix) + str(description_text) + str(suffix)
|
||||||
|
|
||||||
|
# Validate length
|
||||||
|
ensure(len(final_description) <= 4000,
|
||||||
|
f"Length of ad description including prefix and suffix exceeds 4000 chars. Description length: {len(final_description)} chars.")
|
||||||
|
|
||||||
|
return final_description
|
||||||
|
|
||||||
#############################
|
#############################
|
||||||
# main entry point
|
# main entry point
|
||||||
#############################
|
#############################
|
||||||
|
|
||||||
|
|
||||||
def main(args:list[str]) -> None:
|
def main(args:list[str]) -> None:
|
||||||
if "version" not in args:
|
if "version" not in args:
|
||||||
print(textwrap.dedent(r"""
|
print(textwrap.dedent(r"""
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanze
|
|||||||
"""
|
"""
|
||||||
import hashlib, json, os
|
import hashlib, json, os
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
from .utils import dicts
|
||||||
|
|
||||||
|
|
||||||
def calculate_content_hash(ad_cfg: dict[str, Any]) -> str:
|
def calculate_content_hash(ad_cfg: dict[str, Any]) -> str:
|
||||||
@@ -36,3 +37,51 @@ def calculate_content_hash(ad_cfg: dict[str, Any]) -> str:
|
|||||||
# Create sorted JSON string for consistent hashes
|
# Create sorted JSON string for consistent hashes
|
||||||
content_str = json.dumps(content, sort_keys = True)
|
content_str = json.dumps(content, sort_keys = True)
|
||||||
return hashlib.sha256(content_str.encode()).hexdigest()
|
return hashlib.sha256(content_str.encode()).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def get_description_affixes(config: dict[str, Any], prefix: bool = True) -> str:
|
||||||
|
"""Get prefix or suffix for description with proper precedence.
|
||||||
|
|
||||||
|
This function handles both the new flattened format and legacy nested format:
|
||||||
|
|
||||||
|
New format (flattened):
|
||||||
|
ad_defaults:
|
||||||
|
description_prefix: "Global Prefix"
|
||||||
|
description_suffix: "Global Suffix"
|
||||||
|
|
||||||
|
Legacy format (nested):
|
||||||
|
ad_defaults:
|
||||||
|
description:
|
||||||
|
prefix: "Legacy Prefix"
|
||||||
|
suffix: "Legacy Suffix"
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config: Configuration dictionary containing ad_defaults
|
||||||
|
prefix: If True, get prefix, otherwise get suffix
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The appropriate affix string, empty string if none found
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> config = {"ad_defaults": {"description_prefix": "Hello", "description": {"prefix": "Hi"}}}
|
||||||
|
>>> get_description_affixes(config, prefix=True)
|
||||||
|
'Hello'
|
||||||
|
"""
|
||||||
|
# Handle edge cases
|
||||||
|
if not isinstance(config, dict):
|
||||||
|
return ""
|
||||||
|
|
||||||
|
affix_type = "prefix" if prefix else "suffix"
|
||||||
|
|
||||||
|
# First try new flattened format (description_prefix/description_suffix)
|
||||||
|
flattened_key = f"description_{affix_type}"
|
||||||
|
flattened_value = dicts.safe_get(config, "ad_defaults", flattened_key)
|
||||||
|
if isinstance(flattened_value, str):
|
||||||
|
return flattened_value
|
||||||
|
|
||||||
|
# Then try legacy nested format (description.prefix/description.suffix)
|
||||||
|
nested_value = dicts.safe_get(config, "ad_defaults", "description", affix_type)
|
||||||
|
if isinstance(nested_value, str):
|
||||||
|
return nested_value
|
||||||
|
|
||||||
|
return ""
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import urllib.request as urllib_request
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Any, Final
|
from typing import Any, Final
|
||||||
|
|
||||||
from .ads import calculate_content_hash
|
from .ads import calculate_content_hash, get_description_affixes
|
||||||
from .utils import dicts, i18n, loggers, misc, reflect
|
from .utils import dicts, i18n, loggers, misc, reflect
|
||||||
from .utils.web_scraping_mixin import Browser, By, Element, Is, WebScrapingMixin
|
from .utils.web_scraping_mixin import Browser, By, Element, Is, WebScrapingMixin
|
||||||
|
|
||||||
@@ -236,9 +236,23 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
|
|
||||||
info['category'] = await self._extract_category_from_ad_page()
|
info['category'] = await self._extract_category_from_ad_page()
|
||||||
info['title'] = title
|
info['title'] = title
|
||||||
info['description'] = (await self.web_text(By.ID, 'viewad-description-text')).strip() \
|
|
||||||
.removeprefix((self.config["ad_defaults"]["description"]["prefix"] or "").strip()) \
|
# Get raw description text
|
||||||
.removesuffix((self.config["ad_defaults"]["description"]["suffix"] or "").strip())
|
raw_description = (await self.web_text(By.ID, 'viewad-description-text')).strip()
|
||||||
|
|
||||||
|
# Get prefix and suffix from config
|
||||||
|
prefix = get_description_affixes(self.config, prefix=True)
|
||||||
|
suffix = get_description_affixes(self.config, prefix=False)
|
||||||
|
|
||||||
|
# Remove prefix and suffix if present
|
||||||
|
description_text = raw_description
|
||||||
|
if prefix and description_text.startswith(prefix.strip()):
|
||||||
|
description_text = description_text[len(prefix.strip()):]
|
||||||
|
if suffix and description_text.endswith(suffix.strip()):
|
||||||
|
description_text = description_text[:-len(suffix.strip())]
|
||||||
|
|
||||||
|
info['description'] = description_text.strip()
|
||||||
|
|
||||||
info['special_attributes'] = await self._extract_special_attributes_from_ad_page()
|
info['special_attributes'] = await self._extract_special_attributes_from_ad_page()
|
||||||
if "art_s" in info['special_attributes']:
|
if "art_s" in info['special_attributes']:
|
||||||
# change e.g. category "161/172" to "161/172/lautsprecher_kopfhoerer"
|
# change e.g. category "161/172" to "161/172/lautsprecher_kopfhoerer"
|
||||||
|
|||||||
@@ -5,9 +5,9 @@ ad_files:
|
|||||||
ad_defaults:
|
ad_defaults:
|
||||||
active: true
|
active: true
|
||||||
type: OFFER # one of: OFFER, WANTED
|
type: OFFER # one of: OFFER, WANTED
|
||||||
description:
|
description_prefix: "" # prefix for the ad description
|
||||||
prefix: ""
|
description_suffix: "" # suffix for the ad description
|
||||||
suffix: ""
|
|
||||||
price_type: NEGOTIABLE # one of: FIXED, NEGOTIABLE, GIVE_AWAY, NOT_APPLICABLE
|
price_type: NEGOTIABLE # one of: FIXED, NEGOTIABLE, GIVE_AWAY, NOT_APPLICABLE
|
||||||
shipping_type: SHIPPING # one of: PICKUP, SHIPPING, NOT_APPLICABLE
|
shipping_type: SHIPPING # one of: PICKUP, SHIPPING, NOT_APPLICABLE
|
||||||
sell_directly: false # requires shipping_options to take effect
|
sell_directly: false # requires shipping_options to take effect
|
||||||
|
|||||||
@@ -107,3 +107,98 @@ def test_extractor(browser_mock: MagicMock, sample_config: dict[str, Any]) -> Ad
|
|||||||
- sample_config: Used to initialize the extractor with a valid configuration
|
- sample_config: Used to initialize the extractor with a valid configuration
|
||||||
"""
|
"""
|
||||||
return AdExtractor(browser_mock, sample_config)
|
return AdExtractor(browser_mock, sample_config)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def description_test_cases() -> list[tuple[dict[str, Any], str, str]]:
|
||||||
|
"""Provides test cases for description prefix/suffix handling.
|
||||||
|
|
||||||
|
Returns tuples of (config, raw_description, expected_description)
|
||||||
|
"""
|
||||||
|
return [
|
||||||
|
# Test case 1: New flattened format
|
||||||
|
(
|
||||||
|
{
|
||||||
|
"ad_defaults": {
|
||||||
|
"description_prefix": "Global Prefix\n",
|
||||||
|
"description_suffix": "\nGlobal Suffix"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"Original Description", # Raw description without affixes
|
||||||
|
"Global Prefix\nOriginal Description\nGlobal Suffix" # Expected with affixes
|
||||||
|
),
|
||||||
|
# Test case 2: Legacy nested format
|
||||||
|
(
|
||||||
|
{
|
||||||
|
"ad_defaults": {
|
||||||
|
"description": {
|
||||||
|
"prefix": "Legacy Prefix\n",
|
||||||
|
"suffix": "\nLegacy Suffix"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"Original Description",
|
||||||
|
"Legacy Prefix\nOriginal Description\nLegacy Suffix"
|
||||||
|
),
|
||||||
|
# Test case 3: Both formats - new format takes precedence
|
||||||
|
(
|
||||||
|
{
|
||||||
|
"ad_defaults": {
|
||||||
|
"description_prefix": "New Prefix\n",
|
||||||
|
"description_suffix": "\nNew Suffix",
|
||||||
|
"description": {
|
||||||
|
"prefix": "Legacy Prefix\n",
|
||||||
|
"suffix": "\nLegacy Suffix"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"Original Description",
|
||||||
|
"New Prefix\nOriginal Description\nNew Suffix"
|
||||||
|
),
|
||||||
|
# Test case 4: Empty config
|
||||||
|
(
|
||||||
|
{"ad_defaults": {}},
|
||||||
|
"Original Description",
|
||||||
|
"Original Description"
|
||||||
|
),
|
||||||
|
# Test case 5: None values in config
|
||||||
|
(
|
||||||
|
{
|
||||||
|
"ad_defaults": {
|
||||||
|
"description_prefix": None,
|
||||||
|
"description_suffix": None,
|
||||||
|
"description": {
|
||||||
|
"prefix": None,
|
||||||
|
"suffix": None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"Original Description",
|
||||||
|
"Original Description"
|
||||||
|
),
|
||||||
|
# Test case 6: Non-string values in config
|
||||||
|
(
|
||||||
|
{
|
||||||
|
"ad_defaults": {
|
||||||
|
"description_prefix": 123,
|
||||||
|
"description_suffix": True,
|
||||||
|
"description": {
|
||||||
|
"prefix": [],
|
||||||
|
"suffix": {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"Original Description",
|
||||||
|
"Original Description"
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_web_text_responses() -> list[str]:
|
||||||
|
"""Provides common mock responses for web_text calls."""
|
||||||
|
return [
|
||||||
|
"Test Title", # Title
|
||||||
|
"Test Description", # Description
|
||||||
|
"03.02.2025" # Creation date
|
||||||
|
]
|
||||||
|
|||||||
@@ -3,6 +3,10 @@ SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
|||||||
SPDX-License-Identifier: AGPL-3.0-or-later
|
SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||||
"""
|
"""
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
from kleinanzeigen_bot import ads
|
from kleinanzeigen_bot import ads
|
||||||
|
|
||||||
|
|
||||||
@@ -27,3 +31,148 @@ def test_calculate_content_hash_with_none_values() -> None:
|
|||||||
hash_value = ads.calculate_content_hash(ad_cfg)
|
hash_value = ads.calculate_content_hash(ad_cfg)
|
||||||
assert isinstance(hash_value, str)
|
assert isinstance(hash_value, str)
|
||||||
assert len(hash_value) == 64 # SHA-256 hash is 64 characters long
|
assert len(hash_value) == 64 # SHA-256 hash is 64 characters long
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("config,prefix,expected", [
|
||||||
|
# Test new flattened format - prefix
|
||||||
|
(
|
||||||
|
{"ad_defaults": {"description_prefix": "Hello"}},
|
||||||
|
True,
|
||||||
|
"Hello"
|
||||||
|
),
|
||||||
|
# Test new flattened format - suffix
|
||||||
|
(
|
||||||
|
{"ad_defaults": {"description_suffix": "Bye"}},
|
||||||
|
False,
|
||||||
|
"Bye"
|
||||||
|
),
|
||||||
|
# Test legacy nested format - prefix
|
||||||
|
(
|
||||||
|
{"ad_defaults": {"description": {"prefix": "Hi"}}},
|
||||||
|
True,
|
||||||
|
"Hi"
|
||||||
|
),
|
||||||
|
# Test legacy nested format - suffix
|
||||||
|
(
|
||||||
|
{"ad_defaults": {"description": {"suffix": "Ciao"}}},
|
||||||
|
False,
|
||||||
|
"Ciao"
|
||||||
|
),
|
||||||
|
# Test precedence (new format over legacy) - prefix
|
||||||
|
(
|
||||||
|
{
|
||||||
|
"ad_defaults": {
|
||||||
|
"description_prefix": "Hello",
|
||||||
|
"description": {"prefix": "Hi"}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
True,
|
||||||
|
"Hello"
|
||||||
|
),
|
||||||
|
# Test precedence (new format over legacy) - suffix
|
||||||
|
(
|
||||||
|
{
|
||||||
|
"ad_defaults": {
|
||||||
|
"description_suffix": "Bye",
|
||||||
|
"description": {"suffix": "Ciao"}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
False,
|
||||||
|
"Bye"
|
||||||
|
),
|
||||||
|
# Test empty config
|
||||||
|
(
|
||||||
|
{"ad_defaults": {}},
|
||||||
|
True,
|
||||||
|
""
|
||||||
|
),
|
||||||
|
# Test None values
|
||||||
|
(
|
||||||
|
{"ad_defaults": {"description_prefix": None, "description_suffix": None}},
|
||||||
|
True,
|
||||||
|
""
|
||||||
|
),
|
||||||
|
# Test non-string values
|
||||||
|
(
|
||||||
|
{"ad_defaults": {"description_prefix": 123, "description_suffix": True}},
|
||||||
|
True,
|
||||||
|
""
|
||||||
|
),
|
||||||
|
# Add test for malformed config
|
||||||
|
(
|
||||||
|
{}, # Empty config
|
||||||
|
True,
|
||||||
|
""
|
||||||
|
),
|
||||||
|
# Test for missing ad_defaults
|
||||||
|
(
|
||||||
|
{"some_other_key": {}},
|
||||||
|
True,
|
||||||
|
""
|
||||||
|
),
|
||||||
|
# Test for non-dict ad_defaults
|
||||||
|
(
|
||||||
|
{"ad_defaults": "invalid"},
|
||||||
|
True,
|
||||||
|
""
|
||||||
|
),
|
||||||
|
# Test for invalid type in description field
|
||||||
|
(
|
||||||
|
{"ad_defaults": {"description": 123}},
|
||||||
|
True,
|
||||||
|
""
|
||||||
|
)
|
||||||
|
])
|
||||||
|
def test_get_description_affixes(
|
||||||
|
config: dict[str, Any],
|
||||||
|
prefix: bool,
|
||||||
|
expected: str
|
||||||
|
) -> None:
|
||||||
|
"""Test get_description_affixes function with various inputs."""
|
||||||
|
result = ads.get_description_affixes(config, prefix)
|
||||||
|
assert result == expected
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("config,prefix,expected", [
|
||||||
|
# Add test for malformed config
|
||||||
|
(
|
||||||
|
{}, # Empty config
|
||||||
|
True,
|
||||||
|
""
|
||||||
|
),
|
||||||
|
# Test for missing ad_defaults
|
||||||
|
(
|
||||||
|
{"some_other_key": {}},
|
||||||
|
True,
|
||||||
|
""
|
||||||
|
),
|
||||||
|
# Test for non-dict ad_defaults
|
||||||
|
(
|
||||||
|
{"ad_defaults": "invalid"},
|
||||||
|
True,
|
||||||
|
""
|
||||||
|
),
|
||||||
|
# Test for invalid type in description field
|
||||||
|
(
|
||||||
|
{"ad_defaults": {"description": 123}},
|
||||||
|
True,
|
||||||
|
""
|
||||||
|
)
|
||||||
|
])
|
||||||
|
def test_get_description_affixes_edge_cases(config: dict[str, Any], prefix: bool, expected: str) -> None:
|
||||||
|
"""Test edge cases for description affix handling."""
|
||||||
|
assert ads.get_description_affixes(config, prefix) == expected
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("config,expected", [
|
||||||
|
(None, ""), # Test with None
|
||||||
|
([], ""), # Test with an empty list
|
||||||
|
("string", ""), # Test with a string
|
||||||
|
(123, ""), # Test with an integer
|
||||||
|
(3.14, ""), # Test with a float
|
||||||
|
(set(), ""), # Test with an empty set
|
||||||
|
])
|
||||||
|
def test_get_description_affixes_edge_cases_non_dict(config: Any, expected: str) -> None:
|
||||||
|
"""Test get_description_affixes function with non-dict inputs."""
|
||||||
|
result = ads.get_description_affixes(config, prefix=True)
|
||||||
|
assert result == expected
|
||||||
|
|||||||
@@ -301,56 +301,110 @@ class TestAdExtractorNavigation:
|
|||||||
|
|
||||||
class TestAdExtractorContent:
|
class TestAdExtractorContent:
|
||||||
"""Tests for content extraction functionality."""
|
"""Tests for content extraction functionality."""
|
||||||
|
# pylint: disable=protected-access
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def extractor(self) -> AdExtractor:
|
def extractor_with_config(self) -> AdExtractor:
|
||||||
browser_mock = MagicMock(spec = Browser)
|
"""Create extractor with specific config for testing prefix/suffix handling."""
|
||||||
config_mock = {
|
browser_mock = MagicMock(spec=Browser)
|
||||||
"ad_defaults": {
|
return AdExtractor(browser_mock, {}) # Empty config, will be overridden in tests
|
||||||
"description": {
|
|
||||||
"prefix": "Test Prefix",
|
|
||||||
"suffix": "Test Suffix"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return AdExtractor(browser_mock, config_mock)
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
# pylint: disable=protected-access
|
async def test_extract_description_with_affixes(
|
||||||
async def test_extract_title_and_description(self, extractor: AdExtractor) -> None:
|
self,
|
||||||
"""Test basic extraction of title and description."""
|
test_extractor: AdExtractor,
|
||||||
page_mock = AsyncMock()
|
description_test_cases: list[tuple[dict[str, Any], str, str]]
|
||||||
|
) -> None:
|
||||||
|
"""Test extraction of description with various prefix/suffix configurations."""
|
||||||
|
# Mock the page
|
||||||
|
page_mock = MagicMock()
|
||||||
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
||||||
|
test_extractor.page = page_mock
|
||||||
|
|
||||||
category_mock = AsyncMock()
|
for config, raw_description, _ in description_test_cases: # Changed to _ since we don't use expected_description
|
||||||
category_mock.attrs = {'href': '/s-kategorie/c123'}
|
test_extractor.config = config
|
||||||
|
|
||||||
with patch.object(extractor, 'page', page_mock), \
|
with patch.multiple(test_extractor,
|
||||||
patch.object(extractor, 'web_text', new_callable = AsyncMock) as mock_web_text, \
|
web_text=AsyncMock(side_effect=[
|
||||||
patch.object(extractor, 'web_find', new_callable = AsyncMock, return_value = category_mock), \
|
"Test Title", # Title
|
||||||
patch.object(extractor, '_extract_category_from_ad_page', new_callable = AsyncMock, return_value = "17/23"), \
|
raw_description, # Raw description (without affixes)
|
||||||
patch.object(extractor, '_extract_special_attributes_from_ad_page', new_callable = AsyncMock, return_value = {}), \
|
"03.02.2025" # Creation date
|
||||||
patch.object(extractor, '_extract_pricing_info_from_ad_page', new_callable = AsyncMock, return_value = (None, "NOT_APPLICABLE")), \
|
]),
|
||||||
patch.object(extractor, '_extract_shipping_info_from_ad_page', new_callable = AsyncMock, return_value = ("NOT_APPLICABLE", None, None)), \
|
_extract_category_from_ad_page=AsyncMock(return_value="160"),
|
||||||
patch.object(extractor, '_extract_sell_directly_from_ad_page', new_callable = AsyncMock, return_value = False), \
|
_extract_special_attributes_from_ad_page=AsyncMock(return_value={}),
|
||||||
patch.object(extractor, '_download_images_from_ad_page', new_callable = AsyncMock, return_value = []), \
|
_extract_pricing_info_from_ad_page=AsyncMock(return_value=(None, "NOT_APPLICABLE")),
|
||||||
patch.object(extractor, '_extract_contact_from_ad_page', new_callable = AsyncMock, return_value = {}):
|
_extract_shipping_info_from_ad_page=AsyncMock(return_value=("NOT_APPLICABLE", None, None)),
|
||||||
|
_extract_sell_directly_from_ad_page=AsyncMock(return_value=False),
|
||||||
mock_web_text.side_effect = [
|
_download_images_from_ad_page=AsyncMock(return_value=[]),
|
||||||
"Test Title",
|
_extract_contact_from_ad_page=AsyncMock(return_value={})
|
||||||
"Test Prefix Original Description Test Suffix",
|
):
|
||||||
"03.02.2025"
|
info = await test_extractor._extract_ad_page_info("/some/dir", 12345)
|
||||||
]
|
assert info["description"] == raw_description
|
||||||
|
|
||||||
info = await extractor._extract_ad_page_info("/some/dir", 12345)
|
|
||||||
assert isinstance(info, dict)
|
|
||||||
assert info["title"] == "Test Title"
|
|
||||||
assert info["description"].strip() == "Original Description"
|
|
||||||
assert info["created_on"] == "2025-02-03T00:00:00"
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
# pylint: disable=protected-access
|
async def test_extract_description_with_affixes_timeout(
|
||||||
async def test_extract_sell_directly(self, extractor: AdExtractor) -> None:
|
self,
|
||||||
|
test_extractor: AdExtractor
|
||||||
|
) -> None:
|
||||||
|
"""Test handling of timeout when extracting description."""
|
||||||
|
# Mock the page
|
||||||
|
page_mock = MagicMock()
|
||||||
|
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
||||||
|
test_extractor.page = page_mock
|
||||||
|
|
||||||
|
with patch.multiple(test_extractor,
|
||||||
|
web_text=AsyncMock(side_effect=[
|
||||||
|
"Test Title", # Title succeeds
|
||||||
|
TimeoutError("Timeout"), # Description times out
|
||||||
|
"03.02.2025" # Date succeeds
|
||||||
|
]),
|
||||||
|
_extract_category_from_ad_page=AsyncMock(return_value="160"),
|
||||||
|
_extract_special_attributes_from_ad_page=AsyncMock(return_value={}),
|
||||||
|
_extract_pricing_info_from_ad_page=AsyncMock(return_value=(None, "NOT_APPLICABLE")),
|
||||||
|
_extract_shipping_info_from_ad_page=AsyncMock(return_value=("NOT_APPLICABLE", None, None)),
|
||||||
|
_extract_sell_directly_from_ad_page=AsyncMock(return_value=False),
|
||||||
|
_download_images_from_ad_page=AsyncMock(return_value=[]),
|
||||||
|
_extract_contact_from_ad_page=AsyncMock(return_value={})
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
info = await test_extractor._extract_ad_page_info("/some/dir", 12345)
|
||||||
|
assert info["description"] == ""
|
||||||
|
except TimeoutError:
|
||||||
|
# This is also acceptable - depends on how we want to handle timeouts
|
||||||
|
pass
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_extract_description_with_affixes_no_affixes(
|
||||||
|
self,
|
||||||
|
test_extractor: AdExtractor
|
||||||
|
) -> None:
|
||||||
|
"""Test extraction of description without any affixes in config."""
|
||||||
|
# Mock the page
|
||||||
|
page_mock = MagicMock()
|
||||||
|
page_mock.url = "https://www.kleinanzeigen.de/s-anzeige/test/12345"
|
||||||
|
test_extractor.page = page_mock
|
||||||
|
test_extractor.config = {"ad_defaults": {}} # Empty config
|
||||||
|
raw_description = "Original Description"
|
||||||
|
|
||||||
|
with patch.multiple(test_extractor,
|
||||||
|
web_text=AsyncMock(side_effect=[
|
||||||
|
"Test Title", # Title
|
||||||
|
raw_description, # Description without affixes
|
||||||
|
"03.02.2025" # Creation date
|
||||||
|
]),
|
||||||
|
_extract_category_from_ad_page=AsyncMock(return_value="160"),
|
||||||
|
_extract_special_attributes_from_ad_page=AsyncMock(return_value={}),
|
||||||
|
_extract_pricing_info_from_ad_page=AsyncMock(return_value=(None, "NOT_APPLICABLE")),
|
||||||
|
_extract_shipping_info_from_ad_page=AsyncMock(return_value=("NOT_APPLICABLE", None, None)),
|
||||||
|
_extract_sell_directly_from_ad_page=AsyncMock(return_value=False),
|
||||||
|
_download_images_from_ad_page=AsyncMock(return_value=[]),
|
||||||
|
_extract_contact_from_ad_page=AsyncMock(return_value={})
|
||||||
|
):
|
||||||
|
info = await test_extractor._extract_ad_page_info("/some/dir", 12345)
|
||||||
|
assert info["description"] == raw_description
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_extract_sell_directly(self, test_extractor: AdExtractor) -> None:
|
||||||
"""Test extraction of sell directly option."""
|
"""Test extraction of sell directly option."""
|
||||||
test_cases = [
|
test_cases = [
|
||||||
("Direkt kaufen", True),
|
("Direkt kaufen", True),
|
||||||
@@ -358,12 +412,12 @@ class TestAdExtractorContent:
|
|||||||
]
|
]
|
||||||
|
|
||||||
for text, expected in test_cases:
|
for text, expected in test_cases:
|
||||||
with patch.object(extractor, 'web_text', new_callable = AsyncMock, return_value = text):
|
with patch.object(test_extractor, 'web_text', new_callable=AsyncMock, return_value=text):
|
||||||
result = await extractor._extract_sell_directly_from_ad_page()
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||||
assert result is expected
|
assert result is expected
|
||||||
|
|
||||||
with patch.object(extractor, 'web_text', new_callable = AsyncMock, side_effect = TimeoutError):
|
with patch.object(test_extractor, 'web_text', new_callable=AsyncMock, side_effect=TimeoutError):
|
||||||
result = await extractor._extract_sell_directly_from_ad_page()
|
result = await test_extractor._extract_sell_directly_from_ad_page()
|
||||||
assert result is None
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -85,6 +85,11 @@ def create_ad_config(base_config: dict[str, Any], **overrides: Any) -> dict[str,
|
|||||||
config[key] = value
|
config[key] = value
|
||||||
else:
|
else:
|
||||||
config[key] = value
|
config[key] = value
|
||||||
|
|
||||||
|
# Only check length if description is a string
|
||||||
|
if isinstance(config.get("description"), str):
|
||||||
|
assert len(config["description"]) <= 4000, "Length of ad description including prefix and suffix exceeds 4000 chars"
|
||||||
|
|
||||||
return config
|
return config
|
||||||
|
|
||||||
|
|
||||||
@@ -1016,3 +1021,40 @@ class TestKleinanzeigenBotUrlConstruction:
|
|||||||
# Test ad publishing URL
|
# Test ad publishing URL
|
||||||
expected_publish_url = "https://www.kleinanzeigen.de/p-anzeige-aufgeben-schritt2.html"
|
expected_publish_url = "https://www.kleinanzeigen.de/p-anzeige-aufgeben-schritt2.html"
|
||||||
assert f"{test_bot.root_url}/p-anzeige-aufgeben-schritt2.html" == expected_publish_url
|
assert f"{test_bot.root_url}/p-anzeige-aufgeben-schritt2.html" == expected_publish_url
|
||||||
|
|
||||||
|
|
||||||
|
class TestKleinanzeigenBotPrefixSuffix:
|
||||||
|
"""Tests for description prefix and suffix functionality."""
|
||||||
|
# pylint: disable=protected-access
|
||||||
|
|
||||||
|
def test_description_prefix_suffix_handling(
|
||||||
|
self,
|
||||||
|
test_bot: KleinanzeigenBot,
|
||||||
|
description_test_cases: list[tuple[dict[str, Any], str, str]]
|
||||||
|
) -> None:
|
||||||
|
"""Test handling of description prefix/suffix in various configurations."""
|
||||||
|
for config, raw_description, expected_description in description_test_cases:
|
||||||
|
test_bot.config = config
|
||||||
|
ad_cfg = {"description": raw_description, "active": True}
|
||||||
|
# Access private method using the correct name mangling
|
||||||
|
description = getattr(test_bot, "_KleinanzeigenBot__get_description_with_affixes")(ad_cfg)
|
||||||
|
assert description == expected_description
|
||||||
|
|
||||||
|
def test_description_length_validation(self, test_bot: KleinanzeigenBot) -> None:
|
||||||
|
"""Test that long descriptions with affixes raise appropriate error."""
|
||||||
|
test_bot.config = {
|
||||||
|
"ad_defaults": {
|
||||||
|
"description_prefix": "P" * 1000,
|
||||||
|
"description_suffix": "S" * 1000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ad_cfg = {
|
||||||
|
"description": "D" * 2001, # This plus affixes will exceed 4000 chars
|
||||||
|
"active": True
|
||||||
|
}
|
||||||
|
|
||||||
|
with pytest.raises(AssertionError) as exc_info:
|
||||||
|
getattr(test_bot, "_KleinanzeigenBot__get_description_with_affixes")(ad_cfg)
|
||||||
|
|
||||||
|
assert "Length of ad description including prefix and suffix exceeds 4000 chars" in str(exc_info.value)
|
||||||
|
assert "Description length: 4001" in str(exc_info.value)
|
||||||
|
|||||||
Reference in New Issue
Block a user