mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 10:31:50 +01:00
enh: allow per-ad overriding of global description affixes (#416)
This commit is contained in:
@@ -17,7 +17,7 @@ from ruamel.yaml import YAML
|
||||
from wcmatch import glob
|
||||
|
||||
from . import extract, resources
|
||||
from .ads import calculate_content_hash
|
||||
from .ads import calculate_content_hash, get_description_affixes
|
||||
from .utils import dicts, error_handlers, loggers, misc
|
||||
from .utils.files import abspath
|
||||
from .utils.i18n import Locale, get_current_locale, set_current_locale, pluralize
|
||||
@@ -318,11 +318,6 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
if not ad_files:
|
||||
return []
|
||||
|
||||
description_config = {
|
||||
"prefix": self.config["ad_defaults"]["description"]["prefix"] or "",
|
||||
"suffix": self.config["ad_defaults"]["description"]["suffix"] or ""
|
||||
}
|
||||
|
||||
ids = []
|
||||
use_specific_ads = False
|
||||
if re.compile(r'\d+[,\d+]*').search(self.ads_selector):
|
||||
@@ -356,10 +351,18 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
if not self.__check_ad_republication(ad_cfg, ad_cfg_orig, ad_file_relative):
|
||||
continue
|
||||
|
||||
ad_cfg["description"] = description_config["prefix"] + (ad_cfg["description"] or "") + description_config["suffix"]
|
||||
# Get prefix/suffix from ad config if present, otherwise use defaults
|
||||
prefix = ad_cfg.get("prefix", self.config["ad_defaults"]["description"]["prefix"] or "")
|
||||
suffix = ad_cfg.get("suffix", self.config["ad_defaults"]["description"]["suffix"] or "")
|
||||
|
||||
# Combine description parts
|
||||
ad_cfg["description"] = prefix + (ad_cfg["description"] or "") + suffix
|
||||
ad_cfg["description"] = ad_cfg["description"].replace("@", "(at)")
|
||||
ensure(len(ad_cfg["description"]) <= 4000, f"""Length of ad description including prefix and suffix exceeds 4000 chars. Description length: {
|
||||
len(ad_cfg['description'])} chars. @ {ad_file}""")
|
||||
|
||||
# Validate total length
|
||||
ensure(len(ad_cfg["description"]) <= 4000,
|
||||
f"""Length of ad description including prefix and suffix exceeds 4000 chars. Description length: {
|
||||
len(ad_cfg["description"])} chars. @ {ad_file}.""")
|
||||
|
||||
# pylint: disable=cell-var-from-loop
|
||||
def assert_one_of(path:str, allowed:Iterable[str]) -> None:
|
||||
@@ -693,7 +696,8 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
#############################
|
||||
# set description
|
||||
#############################
|
||||
await self.web_execute("document.querySelector('#pstad-descrptn').value = `" + ad_cfg["description"].replace("`", "'") + "`")
|
||||
description = self.__get_description_with_affixes(ad_cfg)
|
||||
await self.web_execute("document.querySelector('#pstad-descrptn').value = `" + description.replace("`", "'") + "`")
|
||||
|
||||
#############################
|
||||
# set contact zipcode
|
||||
@@ -1040,10 +1044,64 @@ class KleinanzeigenBot(WebScrapingMixin):
|
||||
else:
|
||||
LOG.error('The page with the id %d does not exist!', ad_id)
|
||||
|
||||
def __get_description_with_affixes(self, ad_cfg: dict[str, Any]) -> str:
|
||||
"""Get the complete description with prefix and suffix applied.
|
||||
|
||||
Precedence (highest to lowest):
|
||||
1. Direct ad-level affixes (description_prefix/suffix)
|
||||
2. Legacy nested ad-level affixes (description.prefix/suffix)
|
||||
3. Global flattened affixes (ad_defaults.description_prefix/suffix)
|
||||
4. Legacy global nested affixes (ad_defaults.description.prefix/suffix)
|
||||
|
||||
Args:
|
||||
ad_cfg: The ad configuration dictionary
|
||||
|
||||
Returns:
|
||||
The complete description with prefix and suffix applied
|
||||
"""
|
||||
# Get the main description text
|
||||
description_text = ""
|
||||
if isinstance(ad_cfg.get("description"), dict):
|
||||
description_text = ad_cfg["description"].get("text", "")
|
||||
elif isinstance(ad_cfg.get("description"), str):
|
||||
description_text = ad_cfg["description"]
|
||||
|
||||
# Get prefix with precedence
|
||||
prefix = (
|
||||
# 1. Direct ad-level prefix
|
||||
ad_cfg.get("description_prefix") if ad_cfg.get("description_prefix") is not None
|
||||
# 2. Legacy nested ad-level prefix
|
||||
else dicts.safe_get(ad_cfg, "description", "prefix")
|
||||
if dicts.safe_get(ad_cfg, "description", "prefix") is not None
|
||||
# 3. Global prefix from config
|
||||
else get_description_affixes(self.config, prefix=True)
|
||||
)
|
||||
|
||||
# Get suffix with precedence
|
||||
suffix = (
|
||||
# 1. Direct ad-level suffix
|
||||
ad_cfg.get("description_suffix") if ad_cfg.get("description_suffix") is not None
|
||||
# 2. Legacy nested ad-level suffix
|
||||
else dicts.safe_get(ad_cfg, "description", "suffix")
|
||||
if dicts.safe_get(ad_cfg, "description", "suffix") is not None
|
||||
# 3. Global suffix from config
|
||||
else get_description_affixes(self.config, prefix=False)
|
||||
)
|
||||
|
||||
# Combine the parts
|
||||
final_description = str(prefix) + str(description_text) + str(suffix)
|
||||
|
||||
# Validate length
|
||||
ensure(len(final_description) <= 4000,
|
||||
f"Length of ad description including prefix and suffix exceeds 4000 chars. Description length: {len(final_description)} chars.")
|
||||
|
||||
return final_description
|
||||
|
||||
#############################
|
||||
# main entry point
|
||||
#############################
|
||||
|
||||
|
||||
def main(args:list[str]) -> None:
|
||||
if "version" not in args:
|
||||
print(textwrap.dedent(r"""
|
||||
|
||||
@@ -5,6 +5,7 @@ SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanze
|
||||
"""
|
||||
import hashlib, json, os
|
||||
from typing import Any
|
||||
from .utils import dicts
|
||||
|
||||
|
||||
def calculate_content_hash(ad_cfg: dict[str, Any]) -> str:
|
||||
@@ -36,3 +37,51 @@ def calculate_content_hash(ad_cfg: dict[str, Any]) -> str:
|
||||
# Create sorted JSON string for consistent hashes
|
||||
content_str = json.dumps(content, sort_keys = True)
|
||||
return hashlib.sha256(content_str.encode()).hexdigest()
|
||||
|
||||
|
||||
def get_description_affixes(config: dict[str, Any], prefix: bool = True) -> str:
|
||||
"""Get prefix or suffix for description with proper precedence.
|
||||
|
||||
This function handles both the new flattened format and legacy nested format:
|
||||
|
||||
New format (flattened):
|
||||
ad_defaults:
|
||||
description_prefix: "Global Prefix"
|
||||
description_suffix: "Global Suffix"
|
||||
|
||||
Legacy format (nested):
|
||||
ad_defaults:
|
||||
description:
|
||||
prefix: "Legacy Prefix"
|
||||
suffix: "Legacy Suffix"
|
||||
|
||||
Args:
|
||||
config: Configuration dictionary containing ad_defaults
|
||||
prefix: If True, get prefix, otherwise get suffix
|
||||
|
||||
Returns:
|
||||
The appropriate affix string, empty string if none found
|
||||
|
||||
Example:
|
||||
>>> config = {"ad_defaults": {"description_prefix": "Hello", "description": {"prefix": "Hi"}}}
|
||||
>>> get_description_affixes(config, prefix=True)
|
||||
'Hello'
|
||||
"""
|
||||
# Handle edge cases
|
||||
if not isinstance(config, dict):
|
||||
return ""
|
||||
|
||||
affix_type = "prefix" if prefix else "suffix"
|
||||
|
||||
# First try new flattened format (description_prefix/description_suffix)
|
||||
flattened_key = f"description_{affix_type}"
|
||||
flattened_value = dicts.safe_get(config, "ad_defaults", flattened_key)
|
||||
if isinstance(flattened_value, str):
|
||||
return flattened_value
|
||||
|
||||
# Then try legacy nested format (description.prefix/description.suffix)
|
||||
nested_value = dicts.safe_get(config, "ad_defaults", "description", affix_type)
|
||||
if isinstance(nested_value, str):
|
||||
return nested_value
|
||||
|
||||
return ""
|
||||
|
||||
@@ -8,7 +8,7 @@ import urllib.request as urllib_request
|
||||
from datetime import datetime
|
||||
from typing import Any, Final
|
||||
|
||||
from .ads import calculate_content_hash
|
||||
from .ads import calculate_content_hash, get_description_affixes
|
||||
from .utils import dicts, i18n, loggers, misc, reflect
|
||||
from .utils.web_scraping_mixin import Browser, By, Element, Is, WebScrapingMixin
|
||||
|
||||
@@ -236,9 +236,23 @@ class AdExtractor(WebScrapingMixin):
|
||||
|
||||
info['category'] = await self._extract_category_from_ad_page()
|
||||
info['title'] = title
|
||||
info['description'] = (await self.web_text(By.ID, 'viewad-description-text')).strip() \
|
||||
.removeprefix((self.config["ad_defaults"]["description"]["prefix"] or "").strip()) \
|
||||
.removesuffix((self.config["ad_defaults"]["description"]["suffix"] or "").strip())
|
||||
|
||||
# Get raw description text
|
||||
raw_description = (await self.web_text(By.ID, 'viewad-description-text')).strip()
|
||||
|
||||
# Get prefix and suffix from config
|
||||
prefix = get_description_affixes(self.config, prefix=True)
|
||||
suffix = get_description_affixes(self.config, prefix=False)
|
||||
|
||||
# Remove prefix and suffix if present
|
||||
description_text = raw_description
|
||||
if prefix and description_text.startswith(prefix.strip()):
|
||||
description_text = description_text[len(prefix.strip()):]
|
||||
if suffix and description_text.endswith(suffix.strip()):
|
||||
description_text = description_text[:-len(suffix.strip())]
|
||||
|
||||
info['description'] = description_text.strip()
|
||||
|
||||
info['special_attributes'] = await self._extract_special_attributes_from_ad_page()
|
||||
if "art_s" in info['special_attributes']:
|
||||
# change e.g. category "161/172" to "161/172/lautsprecher_kopfhoerer"
|
||||
|
||||
@@ -5,9 +5,9 @@ ad_files:
|
||||
ad_defaults:
|
||||
active: true
|
||||
type: OFFER # one of: OFFER, WANTED
|
||||
description:
|
||||
prefix: ""
|
||||
suffix: ""
|
||||
description_prefix: "" # prefix for the ad description
|
||||
description_suffix: "" # suffix for the ad description
|
||||
|
||||
price_type: NEGOTIABLE # one of: FIXED, NEGOTIABLE, GIVE_AWAY, NOT_APPLICABLE
|
||||
shipping_type: SHIPPING # one of: PICKUP, SHIPPING, NOT_APPLICABLE
|
||||
sell_directly: false # requires shipping_options to take effect
|
||||
|
||||
Reference in New Issue
Block a user