fix: improve ad description length validation

This commit is contained in:
sebthom
2025-04-28 13:17:09 +02:00
parent ef923a8337
commit f98251ade3
2 changed files with 42 additions and 46 deletions

View File

@@ -386,14 +386,6 @@ class KleinanzeigenBot(WebScrapingMixin):
if not should_include: if not should_include:
continue continue
# Get description with prefix/suffix from ad config if present, otherwise use defaults
description = self.__get_description_with_affixes(ad_cfg)
# Validate total length
ensure(len(description) <= MAX_DESCRIPTION_LENGTH,
f"Length of ad description including prefix and suffix exceeds 4000 chars. "
f"Description length: {len(description)} chars. @ {ad_file}.")
def assert_one_of(path:str, allowed:Iterable[str]) -> None: def assert_one_of(path:str, allowed:Iterable[str]) -> None:
# ruff: noqa: B023 function-uses-loop-variable # ruff: noqa: B023 function-uses-loop-variable
ensure(dicts.safe_get(ad_cfg, *path.split(".")) in allowed, f"-> property [{path}] must be one of: {allowed} @ [{ad_file}]") ensure(dicts.safe_get(ad_cfg, *path.split(".")) in allowed, f"-> property [{path}] must be one of: {allowed} @ [{ad_file}]")
@@ -408,7 +400,8 @@ class KleinanzeigenBot(WebScrapingMixin):
assert_one_of("type", {"OFFER", "WANTED"}) assert_one_of("type", {"OFFER", "WANTED"})
assert_min_len("title", 10) assert_min_len("title", 10)
assert_has_value("description") ensure(self.__get_description(ad_cfg, with_affixes = False), f"-> property [description] not specified @ [{ad_file}]")
self.__get_description(ad_cfg, with_affixes = True) # validates complete description
assert_one_of("price_type", {"FIXED", "NEGOTIABLE", "GIVE_AWAY", "NOT_APPLICABLE"}) assert_one_of("price_type", {"FIXED", "NEGOTIABLE", "GIVE_AWAY", "NOT_APPLICABLE"})
if ad_cfg["price_type"] == "GIVE_AWAY": if ad_cfg["price_type"] == "GIVE_AWAY":
ensure(not dicts.safe_get(ad_cfg, "price"), f"-> [price] must not be specified for GIVE_AWAY ad @ [{ad_file}]") ensure(not dicts.safe_get(ad_cfg, "price"), f"-> [price] must not be specified for GIVE_AWAY ad @ [{ad_file}]")
@@ -707,7 +700,7 @@ class KleinanzeigenBot(WebScrapingMixin):
############################# #############################
# set description # set description
############################# #############################
description = self.__get_description_with_affixes(ad_cfg) description = self.__get_description(ad_cfg, with_affixes = True)
await self.web_execute("document.querySelector('#pstad-descrptn').value = `" + description.replace("`", "'") + "`") await self.web_execute("document.querySelector('#pstad-descrptn').value = `" + description.replace("`", "'") + "`")
############################# #############################
@@ -1104,8 +1097,8 @@ class KleinanzeigenBot(WebScrapingMixin):
else: else:
LOG.error("The page with the id %d does not exist!", ad_id) LOG.error("The page with the id %d does not exist!", ad_id)
def __get_description_with_affixes(self, ad_cfg:dict[str, Any]) -> str: def __get_description(self, ad_cfg:dict[str, Any], *, with_affixes:bool) -> str:
"""Get the complete description with prefix and suffix applied. """Get the ad description optionally with prefix and suffix applied.
Precedence (highest to lowest): Precedence (highest to lowest):
1. Direct ad-level affixes (description_prefix/suffix) 1. Direct ad-level affixes (description_prefix/suffix)
@@ -1117,7 +1110,7 @@ class KleinanzeigenBot(WebScrapingMixin):
ad_cfg: The ad configuration dictionary ad_cfg: The ad configuration dictionary
Returns: Returns:
The complete description with prefix and suffix applied The raw or complete description with prefix and suffix applied
""" """
# Get the main description text # Get the main description text
description_text = "" description_text = ""
@@ -1126,33 +1119,36 @@ class KleinanzeigenBot(WebScrapingMixin):
elif isinstance(ad_cfg.get("description"), str): elif isinstance(ad_cfg.get("description"), str):
description_text = ad_cfg["description"] description_text = ad_cfg["description"]
# Get prefix with precedence if with_affixes:
prefix = ( # Get prefix with precedence
# 1. Direct ad-level prefix prefix = (
ad_cfg.get("description_prefix") if ad_cfg.get("description_prefix") is not None # 1. Direct ad-level prefix
# 2. Legacy nested ad-level prefix ad_cfg.get("description_prefix") if ad_cfg.get("description_prefix") is not None
else dicts.safe_get(ad_cfg, "description", "prefix") # 2. Legacy nested ad-level prefix
if dicts.safe_get(ad_cfg, "description", "prefix") is not None else dicts.safe_get(ad_cfg, "description", "prefix")
# 3. Global prefix from config if dicts.safe_get(ad_cfg, "description", "prefix") is not None
else get_description_affixes(self.config, prefix = True) # 3. Global prefix from config
or "" # Default to empty string if all sources are None else get_description_affixes(self.config, prefix = True)
) or "" # Default to empty string if all sources are None
)
# Get suffix with precedence # Get suffix with precedence
suffix = ( suffix = (
# 1. Direct ad-level suffix # 1. Direct ad-level suffix
ad_cfg.get("description_suffix") if ad_cfg.get("description_suffix") is not None ad_cfg.get("description_suffix") if ad_cfg.get("description_suffix") is not None
# 2. Legacy nested ad-level suffix # 2. Legacy nested ad-level suffix
else dicts.safe_get(ad_cfg, "description", "suffix") else dicts.safe_get(ad_cfg, "description", "suffix")
if dicts.safe_get(ad_cfg, "description", "suffix") is not None if dicts.safe_get(ad_cfg, "description", "suffix") is not None
# 3. Global suffix from config # 3. Global suffix from config
else get_description_affixes(self.config, prefix = False) else get_description_affixes(self.config, prefix = False)
or "" # Default to empty string if all sources are None or "" # Default to empty string if all sources are None
) )
# Combine the parts and replace @ with (at) # Combine the parts and replace @ with (at)
final_description = str(prefix) + str(description_text) + str(suffix) final_description = str(prefix) + str(description_text) + str(suffix)
final_description = final_description.replace("@", "(at)") final_description = final_description.replace("@", "(at)")
else:
final_description = description_text
# Validate length # Validate length
ensure(len(final_description) <= MAX_DESCRIPTION_LENGTH, ensure(len(final_description) <= MAX_DESCRIPTION_LENGTH,

View File

@@ -1045,7 +1045,7 @@ class TestKleinanzeigenBotPrefixSuffix:
test_bot.config = config test_bot.config = config
ad_cfg = {"description": raw_description, "active": True} ad_cfg = {"description": raw_description, "active": True}
# Access private method using the correct name mangling # Access private method using the correct name mangling
description = getattr(test_bot, "_KleinanzeigenBot__get_description_with_affixes")(ad_cfg) description = getattr(test_bot, "_KleinanzeigenBot__get_description")(ad_cfg, with_affixes = True)
assert description == expected_description assert description == expected_description
def test_description_length_validation(self, test_bot:KleinanzeigenBot) -> None: def test_description_length_validation(self, test_bot:KleinanzeigenBot) -> None:
@@ -1062,7 +1062,7 @@ class TestKleinanzeigenBotPrefixSuffix:
} }
with pytest.raises(AssertionError) as exc_info: with pytest.raises(AssertionError) as exc_info:
getattr(test_bot, "_KleinanzeigenBot__get_description_with_affixes")(ad_cfg) getattr(test_bot, "_KleinanzeigenBot__get_description")(ad_cfg, with_affixes = True)
assert "Length of ad description including prefix and suffix exceeds 4000 chars" in str(exc_info.value) assert "Length of ad description including prefix and suffix exceeds 4000 chars" in str(exc_info.value)
assert "Description length: 4001" in str(exc_info.value) assert "Description length: 4001" in str(exc_info.value)
@@ -1087,7 +1087,7 @@ class TestKleinanzeigenBotDescriptionHandling:
} }
# The description should be returned as-is without any prefix/suffix # The description should be returned as-is without any prefix/suffix
description = getattr(test_bot, "_KleinanzeigenBot__get_description_with_affixes")(ad_cfg) description = getattr(test_bot, "_KleinanzeigenBot__get_description")(ad_cfg, with_affixes = True)
assert description == "Test Description" assert description == "Test Description"
def test_description_with_only_new_format_affixes(self, test_bot:KleinanzeigenBot) -> None: def test_description_with_only_new_format_affixes(self, test_bot:KleinanzeigenBot) -> None:
@@ -1104,7 +1104,7 @@ class TestKleinanzeigenBotDescriptionHandling:
"active": True "active": True
} }
description = getattr(test_bot, "_KleinanzeigenBot__get_description_with_affixes")(ad_cfg) description = getattr(test_bot, "_KleinanzeigenBot__get_description")(ad_cfg, with_affixes = True)
assert description == "Prefix: Test Description :Suffix" assert description == "Prefix: Test Description :Suffix"
def test_description_with_mixed_config_formats(self, test_bot:KleinanzeigenBot) -> None: def test_description_with_mixed_config_formats(self, test_bot:KleinanzeigenBot) -> None:
@@ -1125,7 +1125,7 @@ class TestKleinanzeigenBotDescriptionHandling:
"active": True "active": True
} }
description = getattr(test_bot, "_KleinanzeigenBot__get_description_with_affixes")(ad_cfg) description = getattr(test_bot, "_KleinanzeigenBot__get_description")(ad_cfg, with_affixes = True)
assert description == "New Prefix: Test Description :New Suffix" assert description == "New Prefix: Test Description :New Suffix"
def test_description_with_ad_level_affixes(self, test_bot:KleinanzeigenBot) -> None: def test_description_with_ad_level_affixes(self, test_bot:KleinanzeigenBot) -> None:
@@ -1144,7 +1144,7 @@ class TestKleinanzeigenBotDescriptionHandling:
"active": True "active": True
} }
description = getattr(test_bot, "_KleinanzeigenBot__get_description_with_affixes")(ad_cfg) description = getattr(test_bot, "_KleinanzeigenBot__get_description")(ad_cfg, with_affixes = True)
assert description == "Ad Prefix: Test Description :Ad Suffix" assert description == "Ad Prefix: Test Description :Ad Suffix"
def test_description_with_none_values(self, test_bot:KleinanzeigenBot) -> None: def test_description_with_none_values(self, test_bot:KleinanzeigenBot) -> None:
@@ -1165,7 +1165,7 @@ class TestKleinanzeigenBotDescriptionHandling:
"active": True "active": True
} }
description = getattr(test_bot, "_KleinanzeigenBot__get_description_with_affixes")(ad_cfg) description = getattr(test_bot, "_KleinanzeigenBot__get_description")(ad_cfg, with_affixes = True)
assert description == "Test Description" assert description == "Test Description"
def test_description_with_email_replacement(self, test_bot:KleinanzeigenBot) -> None: def test_description_with_email_replacement(self, test_bot:KleinanzeigenBot) -> None:
@@ -1179,7 +1179,7 @@ class TestKleinanzeigenBotDescriptionHandling:
"active": True "active": True
} }
description = getattr(test_bot, "_KleinanzeigenBot__get_description_with_affixes")(ad_cfg) description = getattr(test_bot, "_KleinanzeigenBot__get_description")(ad_cfg, with_affixes = True)
assert description == "Contact: test(at)example.com" assert description == "Contact: test(at)example.com"