feat: add multi-language support

This commit is contained in:
sebthom
2024-12-27 13:04:30 +01:00
parent 0aa1975325
commit 9d54a949e7
10 changed files with 613 additions and 100 deletions

View File

@@ -24,8 +24,9 @@
**kleinanzeigen-bot** is a console based application to ease publishing of ads to [kleinanzeigen.de](https://kleinanzeigen.de). **kleinanzeigen-bot** is a console based application to ease publishing of ads to [kleinanzeigen.de](https://kleinanzeigen.de).
It is the spiritual successor to [Second-Hand-Friends/ebayKleinanzeigen](https://github.com/Second-Hand-Friends/ebayKleinanzeigen) with the following advantages: It is the spiritual successor to [Second-Hand-Friends/ebayKleinanzeigen](https://github.com/Second-Hand-Friends/ebayKleinanzeigen) with the following advantages:
- multi-language support (DE/EN)
- supports Microsoft Edge browser (Chromium based) - supports Microsoft Edge browser (Chromium based)
- does not require selenium and chromedrivers - does not require Selenium and chromedrivers
- better captcha handling - better captcha handling
- config: - config:
- use YAML or JSON for config files - use YAML or JSON for config files

View File

@@ -200,6 +200,7 @@ disable= [
"broad-except", "broad-except",
"consider-using-assignment-expr", "consider-using-assignment-expr",
"docstring-first-line-empty", "docstring-first-line-empty",
"global-statement",
"missing-docstring", "missing-docstring",
"multiple-imports", "multiple-imports",
"multiple-statements", "multiple-statements",
@@ -215,11 +216,13 @@ notes = [ "FIXME", "XXX", "TODO" ] # list of note tags to take in consideration
[tool.pylint.design] [tool.pylint.design]
# https://pylint.pycqa.org/en/latest/user_guide/configuration/all-options.html#design-checker # https://pylint.pycqa.org/en/latest/user_guide/configuration/all-options.html#design-checker
# https://pylint.pycqa.org/en/latest/user_guide/checkers/features.html#design-checker-messages # https://pylint.pycqa.org/en/latest/user_guide/checkers/features.html#design-checker-messages
max-args = 6 # maximum number of arguments for function / method (R0913)
max-attributes = 15 # maximum number of instance attributes for a class (R0902) max-attributes = 15 # maximum number of instance attributes for a class (R0902)
max-branches = 40 # maximum number of branch for function / method body (R0912) max-branches = 40 # maximum number of branch for function / method body (R0912)
max-locals = 30 # maximum number of local variables for function / method body (R0914) max-locals = 30 # maximum number of local variables for function / method body (R0914)
max-returns = 10 # maximum number of return / yield for function / method body (R0911) max-returns = 15 # maximum number of return / yield for function / method body (R0911)
max-statements = 150 # maximum number of statements in function / method body (R0915) max-statements = 150 # maximum number of statements in function / method body (R0915)
max-positional-arguments = 6 # maximum number of positional arguments for function / method (R0917)
max-public-methods = 30 # maximum number of public methods for a class (R0904) max-public-methods = 30 # maximum number of public methods for a class (R0904)

View File

@@ -9,6 +9,7 @@ import urllib.parse as urllib_parse
import urllib.request as urllib_request import urllib.request as urllib_request
from collections.abc import Iterable from collections.abc import Iterable
from datetime import datetime from datetime import datetime
from gettext import gettext as _
from logging.handlers import RotatingFileHandler from logging.handlers import RotatingFileHandler
from typing import Any, Final from typing import Any, Final
@@ -17,14 +18,15 @@ from ruamel.yaml import YAML
from wcmatch import glob from wcmatch import glob
from . import utils, resources, extract from . import utils, resources, extract
from .utils import abspath, ainput, apply_defaults, ensure, is_frozen, pluralize, safe_get, parse_datetime from .i18n import Locale, get_current_locale, set_current_locale, get_translating_logger, pluralize
from .utils import abspath, ainput, apply_defaults, ensure, is_frozen, safe_get, parse_datetime
from .web_scraping_mixin import By, Element, Page, Is, WebScrapingMixin from .web_scraping_mixin import By, Element, Page, Is, WebScrapingMixin
from ._version import __version__ from ._version import __version__
# W0406: possibly a bug, see https://github.com/PyCQA/pylint/issues/3933 # W0406: possibly a bug, see https://github.com/PyCQA/pylint/issues/3933
LOG_ROOT:Final[logging.Logger] = logging.getLogger() LOG_ROOT:Final[logging.Logger] = logging.getLogger()
LOG:Final[logging.Logger] = logging.getLogger("kleinanzeigen_bot") LOG:Final[logging.Logger] = get_translating_logger(__name__)
LOG.setLevel(logging.INFO) LOG.setLevel(logging.INFO)
colorama.init() colorama.init()
@@ -130,7 +132,40 @@ class KleinanzeigenBot(WebScrapingMixin):
else: else:
exe = "python -m kleinanzeigen_bot" exe = "python -m kleinanzeigen_bot"
print(textwrap.dedent(f"""\ if get_current_locale().language == "de":
print(textwrap.dedent(f"""\
Verwendung: {colorama.Fore.LIGHTMAGENTA_EX}{exe} BEFEHL [OPTIONEN]{colorama.Style.RESET_ALL}
Befehle:
publish - (erneutes) Veröffentlichen von Anzeigen
verify - Überprüft der Konfigurationsdateien
delete - Löscht Anzeigen
download - Lädt eine oder mehrere Anzeigen herunter
--
help - Zeigt diese Hilfe an (Standardbefehl)
version - Zeigt die Version der Anwendung an
Optionen:
--ads=all|due|new|<id(s)> (publish) - Gibt an, welche Anzeigen (erneut) veröffentlicht werden sollen (STANDARD: due)
Mögliche Werte:
* all: Veröffentlicht alle Anzeigen erneut, ignoriert republication_interval
* due: Veröffentlicht alle neuen Anzeigen und erneut entsprechend dem republication_interval
* new: Veröffentlicht nur neue Anzeigen (d.h. Anzeigen ohne ID in der Konfigurationsdatei)
* <id(s)>: Gibt eine oder mehrere Anzeigen-IDs an, die veröffentlicht werden sollen, z. B. "--ads=1,2,3", ignoriert republication_interval
--ads=all|new|<id(s)> (download) - Gibt an, welche Anzeigen heruntergeladen werden sollen (STANDARD: new)
Mögliche Werte:
* all: Lädt alle Anzeigen aus Ihrem Profil herunter
* new: Lädt Anzeigen aus Ihrem Profil herunter, die lokal noch nicht gespeichert sind
* <id(s)>: Gibt eine oder mehrere Anzeigen-IDs zum Herunterladen an, z. B. "--ads=1,2,3"
--force - Alias für '--ads=all'
--keep-old - Verhindert das Löschen alter Anzeigen bei erneuter Veröffentlichung
--config=<PATH> - Pfad zur YAML- oder JSON-Konfigurationsdatei (STANDARD: ./config.yaml)
--lang=en|de - Anzeigesprache (STANDARD: Systemsprache, wenn unterstützt, sonst Englisch)
--logfile=<PATH> - Pfad zur Protokolldatei (STANDARD: ./kleinanzeigen-bot.log)
-v, --verbose - Aktiviert detaillierte Ausgabe nur nützlich zur Fehlerbehebung
"""))
else:
print(textwrap.dedent(f"""\
Usage: {colorama.Fore.LIGHTMAGENTA_EX}{exe} COMMAND [OPTIONS]{colorama.Style.RESET_ALL} Usage: {colorama.Fore.LIGHTMAGENTA_EX}{exe} COMMAND [OPTIONS]{colorama.Style.RESET_ALL}
Commands: Commands:
@@ -158,8 +193,9 @@ class KleinanzeigenBot(WebScrapingMixin):
--keep-old - don't delete old ads on republication --keep-old - don't delete old ads on republication
--config=<PATH> - path to the config YAML or JSON file (DEFAULT: ./config.yaml) --config=<PATH> - path to the config YAML or JSON file (DEFAULT: ./config.yaml)
--logfile=<PATH> - path to the logfile (DEFAULT: ./kleinanzeigen-bot.log) --logfile=<PATH> - path to the logfile (DEFAULT: ./kleinanzeigen-bot.log)
--lang=en|de - Displaylanguage (STANDARD: Systemlangauge if supported, otherwise English)
-v, --verbose - enables verbose output - only useful when troubleshooting issues -v, --verbose - enables verbose output - only useful when troubleshooting issues
""")) """))
def parse_args(self, args:list[str]) -> None: def parse_args(self, args:list[str]) -> None:
try: try:
@@ -170,11 +206,12 @@ class KleinanzeigenBot(WebScrapingMixin):
"help", "help",
"keep-old", "keep-old",
"logfile=", "logfile=",
"lang=",
"verbose" "verbose"
]) ])
except getopt.error as ex: except getopt.error as ex:
LOG.error(ex.msg) LOG.error(ex.msg)
LOG.error("Use --help to display available options") LOG.error("Use --help to display available options.")
sys.exit(2) sys.exit(2)
for option, value in options: for option, value in options:
@@ -195,6 +232,8 @@ class KleinanzeigenBot(WebScrapingMixin):
self.ads_selector = "all" self.ads_selector = "all"
case "--keep-old": case "--keep-old":
self.keep_old_ads = True self.keep_old_ads = True
case "--lang":
set_current_locale(Locale.of(value))
case "-v" | "--verbose": case "-v" | "--verbose":
LOG.setLevel(logging.DEBUG) LOG.setLevel(logging.DEBUG)
logging.getLogger("nodriver").setLevel(logging.INFO) logging.getLogger("nodriver").setLevel(logging.INFO)
@@ -244,7 +283,7 @@ class KleinanzeigenBot(WebScrapingMixin):
if re.compile(r'\d+[,\d+]*').search(self.ads_selector): if re.compile(r'\d+[,\d+]*').search(self.ads_selector):
ids = [int(n) for n in self.ads_selector.split(',')] ids = [int(n) for n in self.ads_selector.split(',')]
use_specific_ads = True use_specific_ads = True
LOG.info('Start fetch task for the ad(s) with the id(s):') LOG.info('Start fetch task for the ad(s) with id(s):')
LOG.info(' | '.join([str(id_) for id_ in ids])) LOG.info(' | '.join([str(id_) for id_ in ids]))
ad_fields = utils.load_dict_from_module(resources, "ad_fields.yaml") ad_fields = utils.load_dict_from_module(resources, "ad_fields.yaml")
@@ -261,7 +300,7 @@ class KleinanzeigenBot(WebScrapingMixin):
continue continue
if use_specific_ads: if use_specific_ads:
if not ad_cfg["id"] in ids: if ad_cfg["id"] not in ids:
LOG.info(" -> SKIPPED: ad [%s] is not in list of given ids.", ad_file) LOG.info(" -> SKIPPED: ad [%s] is not in list of given ids.", ad_file)
continue continue
else: else:
@@ -362,7 +401,7 @@ class KleinanzeigenBot(WebScrapingMixin):
def load_config(self) -> None: def load_config(self) -> None:
config_defaults = utils.load_dict_from_module(resources, "config_defaults.yaml") config_defaults = utils.load_dict_from_module(resources, "config_defaults.yaml")
config = utils.load_dict_if_exists(self.config_file_path, "config") config = utils.load_dict_if_exists(self.config_file_path, _("config"))
if config is None: if config is None:
LOG.warning("Config file %s does not exist. Creating it with default values...", self.config_file_path) LOG.warning("Config file %s does not exist. Creating it with default values...", self.config_file_path)
@@ -427,7 +466,7 @@ class KleinanzeigenBot(WebScrapingMixin):
try: try:
await self.web_find(By.TEXT, "Wir haben dir gerade einen 6-stelligen Code für die Telefonnummer", timeout = 4) await self.web_find(By.TEXT, "Wir haben dir gerade einen 6-stelligen Code für die Telefonnummer", timeout = 4)
LOG.warning("############################################") LOG.warning("############################################")
LOG.warning("# Device verification message detected. Please handle it.") LOG.warning("# Device verification message detected. Please follow the instruction displayed in the Browser.")
LOG.warning("############################################") LOG.warning("############################################")
await ainput("Press ENTER when done...") await ainput("Press ENTER when done...")
except TimeoutError: except TimeoutError:
@@ -460,7 +499,7 @@ class KleinanzeigenBot(WebScrapingMixin):
await self.web_sleep() await self.web_sleep()
LOG.info("############################################") LOG.info("############################################")
LOG.info("DONE: Deleting %s", pluralize("ad", count)) LOG.info("DONE: Deleted %s", pluralize("ad", count))
LOG.info("############################################") LOG.info("############################################")
async def delete_ad(self, ad_cfg: dict[str, Any], delete_old_ads_by_title: bool) -> bool: async def delete_ad(self, ad_cfg: dict[str, Any], delete_old_ads_by_title: bool) -> bool:
@@ -469,8 +508,7 @@ class KleinanzeigenBot(WebScrapingMixin):
await self.web_open(f"{self.root_url}/m-meine-anzeigen.html") await self.web_open(f"{self.root_url}/m-meine-anzeigen.html")
csrf_token_elem = await self.web_find(By.CSS_SELECTOR, "meta[name=_csrf]") csrf_token_elem = await self.web_find(By.CSS_SELECTOR, "meta[name=_csrf]")
csrf_token = csrf_token_elem.attrs["content"] csrf_token = csrf_token_elem.attrs["content"]
if csrf_token is None: ensure(csrf_token is not None, "Expected CSRF Token not found in HTML content!")
raise AssertionError("Expected CSRF Token not found in HTML content!")
if delete_old_ads_by_title: if delete_old_ads_by_title:
published_ads = json.loads((await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT"))["content"])["ads"] published_ads = json.loads((await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT"))["content"])["ads"]
@@ -666,7 +704,7 @@ class KleinanzeigenBot(WebScrapingMixin):
LOG.warning("# Captcha present! Please solve the captcha.") LOG.warning("# Captcha present! Please solve the captcha.")
LOG.warning("############################################") LOG.warning("############################################")
await self.web_scroll_page_down() await self.web_scroll_page_down()
input("Press a key to continue...") input(_("Press a key to continue..."))
except TimeoutError: except TimeoutError:
pass pass
@@ -722,7 +760,7 @@ class KleinanzeigenBot(WebScrapingMixin):
# Click continue button # Click continue button
await self.web_click(By.XPATH, '//*[contains(@class, "ModalDialog--Actions")]//button[.//*[text()[contains(.,"Bestätigen")]]]') await self.web_click(By.XPATH, '//*[contains(@class, "ModalDialog--Actions")]//button[.//*[text()[contains(.,"Bestätigen")]]]')
except TimeoutError as ex: except TimeoutError as ex:
raise TimeoutError("Unable to close condition dialog") from ex raise TimeoutError(_("Unable to close condition dialog!")) from ex
async def __set_category(self, category: str | None, ad_file:str) -> None: async def __set_category(self, category: str | None, ad_file:str) -> None:
# click on something to trigger automatic category detection # click on something to trigger automatic category detection
@@ -867,10 +905,10 @@ class KleinanzeigenBot(WebScrapingMixin):
if self.ads_selector in {'all', 'new'}: # explore ads overview for these two modes if self.ads_selector in {'all', 'new'}: # explore ads overview for these two modes
LOG.info('Scanning your ad overview...') LOG.info('Scanning your ad overview...')
own_ad_urls = await ad_extractor.extract_own_ads_urls() own_ad_urls = await ad_extractor.extract_own_ads_urls()
LOG.info('%d ads were found!', len(own_ad_urls)) LOG.info('%s found.', pluralize("ad", len(own_ad_urls)))
if self.ads_selector == 'all': # download all of your adds if self.ads_selector == 'all': # download all of your adds
LOG.info('Start fetch task for all your ads!') LOG.info('Starting download of all ads...')
success_count = 0 success_count = 0
# call download function for each ad page # call download function for each ad page
@@ -892,7 +930,7 @@ class KleinanzeigenBot(WebScrapingMixin):
# determine ad IDs from links # determine ad IDs from links
ad_id_by_url = {url:ad_extractor.extract_ad_id_from_ad_url(url) for url in own_ad_urls} ad_id_by_url = {url:ad_extractor.extract_ad_id_from_ad_url(url) for url in own_ad_urls}
LOG.info('Start fetch task for your unsaved ads!') LOG.info("Starting download of not yet downloaded ads...")
new_count = 0 new_count = 0
for ad_url, ad_id in ad_id_by_url.items(): for ad_url, ad_id in ad_id_by_url.items():
# check if ad with ID already saved # check if ad with ID already saved
@@ -903,11 +941,11 @@ class KleinanzeigenBot(WebScrapingMixin):
if await ad_extractor.naviagte_to_ad_page(ad_url): if await ad_extractor.naviagte_to_ad_page(ad_url):
await ad_extractor.download_ad(ad_id) await ad_extractor.download_ad(ad_id)
new_count += 1 new_count += 1
LOG.info('%d new ad(s) were downloaded from your profile.', new_count) LOG.info('%s were downloaded from your profile.', pluralize("new ad", new_count))
elif re.compile(r'\d+[,\d+]*').search(self.ads_selector): # download ad(s) with specific id(s) elif re.compile(r'\d+[,\d+]*').search(self.ads_selector): # download ad(s) with specific id(s)
ids = [int(n) for n in self.ads_selector.split(',')] ids = [int(n) for n in self.ads_selector.split(',')]
LOG.info('Start fetch task for the ad(s) with the id(s):') LOG.info('Starting download of ad(s) with the id(s):')
LOG.info(' | '.join([str(ad_id) for ad_id in ids])) LOG.info(' | '.join([str(ad_id) for ad_id in ids]))
for ad_id in ids: # call download routine for every id for ad_id in ids: # call download routine for every id
@@ -932,7 +970,7 @@ def main(args:list[str]) -> None:
|_|\_\_|\___|_|_| |_|\__,_|_| |_/___\___|_|\__, |\___|_| |_| |_.__/ \___/ \__| |_|\_\_|\___|_|_| |_|\__,_|_| |_/___\___|_|\__, |\___|_| |_| |_.__/ \___/ \__|
|___/ |___/
https://github.com/Second-Hand-Friends/kleinanzeigen-bot https://github.com/Second-Hand-Friends/kleinanzeigen-bot
"""), flush = True) """)[1:], flush = True) # [1:] removes the first empty blank line
utils.configure_console_logging() utils.configure_console_logging()

View File

@@ -9,10 +9,15 @@ import mimetypes
from datetime import datetime from datetime import datetime
from typing import Any, Final from typing import Any, Final
from .i18n import get_translating_logger, pluralize
from .utils import is_integer, parse_decimal, save_dict from .utils import is_integer, parse_decimal, save_dict
from .web_scraping_mixin import Browser, By, Element, Is, WebScrapingMixin from .web_scraping_mixin import Browser, By, Element, Is, WebScrapingMixin
LOG:Final[logging.Logger] = logging.getLogger("kleinanzeigen_bot.AdExtractor") __all__ = [
"AdExtractor",
]
LOG:Final[logging.Logger] = get_translating_logger(__name__)
class AdExtractor(WebScrapingMixin): class AdExtractor(WebScrapingMixin):
@@ -42,7 +47,7 @@ class AdExtractor(WebScrapingMixin):
new_base_dir = os.path.join(relative_directory, f'ad_{ad_id}') new_base_dir = os.path.join(relative_directory, f'ad_{ad_id}')
if os.path.exists(new_base_dir): if os.path.exists(new_base_dir):
LOG.info('Deleting current folder of ad...') LOG.info('Deleting current folder of ad %s...', ad_id)
shutil.rmtree(new_base_dir) shutil.rmtree(new_base_dir)
os.mkdir(new_base_dir) os.mkdir(new_base_dir)
LOG.info('New directory for ad created at %s.', new_base_dir) LOG.info('New directory for ad created at %s.', new_base_dir)
@@ -68,7 +73,7 @@ class AdExtractor(WebScrapingMixin):
image_box = await self.web_find(By.CLASS_NAME, 'galleryimage-large') image_box = await self.web_find(By.CLASS_NAME, 'galleryimage-large')
n_images = len(await self.web_find_all(By.CSS_SELECTOR, '.galleryimage-element[data-ix]', parent = image_box)) n_images = len(await self.web_find_all(By.CSS_SELECTOR, '.galleryimage-element[data-ix]', parent = image_box))
LOG.info('Found %d images.', n_images) LOG.info('Found %s.', pluralize("image", n_images))
img_element:Element = await self.web_find(By.CSS_SELECTOR, 'div:nth-child(1) > img', parent = image_box) img_element:Element = await self.web_find(By.CSS_SELECTOR, 'div:nth-child(1) > img', parent = image_box)
img_fn_prefix = 'ad_' + str(ad_id) + '__img' img_fn_prefix = 'ad_' + str(ad_id) + '__img'
@@ -97,13 +102,13 @@ class AdExtractor(WebScrapingMixin):
new_div = await self.web_find(By.CSS_SELECTOR, f'div.galleryimage-element:nth-child({img_nr + 1})') new_div = await self.web_find(By.CSS_SELECTOR, f'div.galleryimage-element:nth-child({img_nr + 1})')
img_element = await self.web_find(By.TAG_NAME, 'img', parent = new_div) img_element = await self.web_find(By.TAG_NAME, 'img', parent = new_div)
except TimeoutError: except TimeoutError:
LOG.error('NEXT button in image gallery somehow missing, abort image fetching.') LOG.error('NEXT button in image gallery somehow missing, aborting image fetching.')
break break
img_nr += 1 img_nr += 1
LOG.info('Downloaded %d image(s).', dl_counter) LOG.info('Downloaded %s.', pluralize("image", dl_counter))
except TimeoutError: # some ads do not require images except TimeoutError: # some ads do not require images
LOG.warning('No image area found. Continue without downloading images.') LOG.warning('No image area found. Continuing without downloading images.')
return img_paths return img_paths
@@ -153,10 +158,10 @@ class AdExtractor(WebScrapingMixin):
parent = await self.web_find(By.CSS_SELECTOR, 'div:nth-of-type(1)', parent = pagination))) parent = await self.web_find(By.CSS_SELECTOR, 'div:nth-of-type(1)', parent = pagination)))
if n_buttons > 1: if n_buttons > 1:
multi_page = True multi_page = True
LOG.info('It seems like you have many ads!') LOG.info('It looks like you have many ads!')
else: else:
multi_page = False multi_page = False
LOG.info('It seems like all your ads fit on one overview page.') LOG.info('It looks like all your ads fit on one overview page.')
refs:list[str] = [] refs:list[str] = []
while True: # loop reference extraction until no more forward page while True: # loop reference extraction until no more forward page
@@ -208,7 +213,7 @@ class AdExtractor(WebScrapingMixin):
# close (warning) popup, if given # close (warning) popup, if given
try: try:
await self.web_find(By.ID, 'vap-ovrly-secure') await self.web_find(By.ID, 'vap-ovrly-secure')
LOG.warning('A popup appeared.') LOG.warning('A popup appeared!')
await self.web_click(By.CLASS_NAME, 'mfp-close') await self.web_click(By.CLASS_NAME, 'mfp-close')
await self.web_sleep() await self.web_sleep()
except TimeoutError: except TimeoutError:

View File

@@ -0,0 +1,206 @@
"""
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import ctypes, gettext, inspect, locale, logging, os, sys
from collections.abc import Sized
from typing import Any, Final, NamedTuple
from . import resources, utils # pylint: disable=cyclic-import
__all__ = [
"Locale",
"get_translating_logger",
]
LOG_ROOT:Final[logging.Logger] = logging.getLogger()
LOG:Final[logging.Logger] = logging.getLogger(__name__)
class Locale(NamedTuple):
language:str # Language code (e.g., "en", "de")
region:str | None = None # Region code (e.g., "US", "DE")
encoding:str = "UTF-8" # Encoding format (e.g., "UTF-8")
def __str__(self) -> str:
"""
>>> str(Locale("en", "US", "UTF-8"))
'en_US.UTF-8'
>>> str(Locale("en", "US"))
'en_US.UTF-8'
>>> str(Locale("en"))
'en.UTF-8'
>>> str(Locale("de", None, "UTF-8"))
'de.UTF-8'
"""
region_part = f"_{self.region}" if self.region else ""
encoding_part = f".{self.encoding}" if self.encoding else ""
return f"{self.language}{region_part}{encoding_part}"
@staticmethod
def of(locale_string: str) -> 'Locale':
"""
>>> Locale.of("en_US.UTF-8")
Locale(language='en', region='US', encoding='UTF-8')
>>> Locale.of("de.UTF-8")
Locale(language='de', region=None, encoding='UTF-8')
>>> Locale.of("de_DE")
Locale(language='de', region='DE', encoding='UTF-8')
>>> Locale.of("en")
Locale(language='en', region=None, encoding='UTF-8')
>>> Locale.of("en.UTF-8")
Locale(language='en', region=None, encoding='UTF-8')
"""
parts = locale_string.split(".")
language_and_region = parts[0]
encoding = parts[1].upper() if len(parts) > 1 else "UTF-8"
parts = language_and_region.split("_")
language = parts[0]
region = parts[1].upper() if len(parts) > 1 else None
return Locale(language = language, region = region, encoding = encoding)
def _detect_locale() -> Locale:
"""
Detects the system language, returning a tuple of (language, region, encoding).
- On macOS/Linux, it uses the LANG environment variable.
- On Windows, it uses the Windows API via ctypes to get the default UI language.
Returns:
(language, region, encoding): e.g. ("en", "US", "UTF-8")
"""
lang = os.environ.get("LANG", None)
if not lang and os.name == "nt": # Windows
try:
lang = locale.windows_locale.get(ctypes.windll.kernel32.GetUserDefaultUILanguage(), "en_US") # type: ignore[attr-defined,unused-ignore] # mypy
except Exception:
LOG.warning("Error detecting language on Windows", exc_info = True)
return Locale.of(lang) if lang else Locale("en", "US", "UTF-8")
_CURRENT_LOCALE: Locale = _detect_locale()
_TRANSLATIONS: dict[str, Any] | None = None
def translate(text:object, caller: inspect.FrameInfo | None) -> str:
text = str(text)
if not caller:
return text
global _TRANSLATIONS
if _TRANSLATIONS is None:
try:
_TRANSLATIONS = utils.load_dict_from_module(resources, f"translations.{_CURRENT_LOCALE[0]}.yaml")
except FileNotFoundError:
_TRANSLATIONS = {}
if not _TRANSLATIONS:
return text
module_name = caller.frame.f_globals.get('__name__') # pylint: disable=redefined-outer-name
file_basename = os.path.splitext(os.path.basename(caller.filename))[0]
if module_name and module_name.endswith(f".{file_basename}"):
module_name = module_name[:-(len(file_basename) + 1)]
file_key = f"{file_basename}.py" if module_name == file_basename else f"{module_name}/{file_basename}.py"
translation = utils.safe_get(_TRANSLATIONS,
file_key,
caller.function,
text
)
return translation if translation else text
_original_gettext = gettext.gettext
gettext.gettext = lambda message: translate(_original_gettext(message), utils.get_caller())
for module_name, module in sys.modules.items():
if module is None or module_name in sys.builtin_module_names:
continue
if hasattr(module, '_') and getattr(module, '_') is _original_gettext:
setattr(module, '_', gettext.gettext)
if hasattr(module, 'gettext') and getattr(module, 'gettext') is _original_gettext:
setattr(module, 'gettext', gettext.gettext)
def get_translating_logger(name: str | None = None) -> logging.Logger:
class TranslatingLogger(logging.Logger):
def _log(self, level: int, msg: object, *args: Any, **kwargs: Any) -> None:
if level != logging.DEBUG: # debug messages should not be translated
msg = translate(msg, utils.get_caller(2))
super()._log(level, msg, *args, **kwargs)
logging.setLoggerClass(TranslatingLogger)
return logging.getLogger(name)
def get_current_locale() -> Locale:
return _CURRENT_LOCALE
def set_current_locale(new_locale:Locale) -> None:
global _CURRENT_LOCALE, _TRANSLATIONS
if new_locale.language != _CURRENT_LOCALE.language:
_TRANSLATIONS = None
_CURRENT_LOCALE = new_locale
def pluralize(noun:str, count:int | Sized, prefix_with_count:bool = True) -> str:
"""
>>> pluralize("field", 1)
'1 field'
>>> pluralize("field", 2)
'2 fields'
>>> pluralize("field", 2, prefix_with_count = False)
'fields'
"""
noun = translate(noun, utils.get_caller())
if isinstance(count, Sized):
count = len(count)
prefix = f"{count} " if prefix_with_count else ""
if count == 1:
return f"{prefix}{noun}"
# German
if _CURRENT_LOCALE.language == "de":
# Special cases
irregular_plurals = {
"Attribute": "Attribute",
"Bild": "Bilder",
"Feld": "Felder",
}
if noun in irregular_plurals:
return f"{prefix}{irregular_plurals[noun]}"
for singular_suffix, plural_suffix in irregular_plurals.items():
if noun.lower().endswith(singular_suffix):
pluralized = noun[:-len(singular_suffix)] + plural_suffix.lower()
return f"{prefix}{pluralized}"
# Very simplified German rules
if noun.endswith("ei"):
return f"{prefix}{noun}en" # Datei -> Dateien
if noun.endswith("e"):
return f"{prefix}{noun}n" # Blume -> Blumen
if noun.endswith(("el", "er", "en")):
return f"{prefix}{noun}" # Keller -> Keller
if noun[-1] in "aeiou":
return f"{prefix}{noun}s" # Auto -> Autos
return f"{prefix}{noun}e" # Hund -> Hunde
# English
if len(noun) < 2:
return f"{prefix}{noun}s"
if noun.endswith(('s', 'sh', 'ch', 'x', 'z')):
return f"{prefix}{noun}es"
if noun.endswith('y') and noun[-2].lower() not in "aeiou":
return f"{prefix}{noun[:-1]}ies"
return f"{prefix}{noun}s"

View File

@@ -0,0 +1,201 @@
#################################################
getopt.py:
#################################################
do_longs:
"option --%s requires argument": "Option --%s benötigt ein Argument"
"option --%s must not have an argument": "Option --%s darf kein Argument haben"
long_has_args:
"option --%s not recognized": "Option --%s unbekannt"
"option --%s not a unique prefix": "Option --%s ist kein eindeutiger Prefix"
do_shorts:
"option -%s requires argument": "Option -%s benötigt ein Argument"
short_has_arg:
"option -%s not recognized": "Option -%s unbekannt"
#################################################
kleinanzeigen_bot/__init__.py:
#################################################
run:
"DONE: No configuration errors found.": "FERTIG: Keine Konfigurationsfehler gefunden."
'You provided no ads selector. Defaulting to "due".': 'Es wurden keine Anzeigen-Selektor angegeben. Es wird "due" verwendet.'
"DONE: No new/outdated ads found.": "FERTIG: Keine neuen/veralteten Anzeigen gefunden."
"DONE: No ads to delete found.": "FERTIG: Keine zu löschnenden Anzeigen gefunden."
'You provided no ads selector. Defaulting to "new".': 'Es wurden keine Anzeigen-Selektor angegeben. Es wird "new" verwendet.'
"Unknown command: %s" : "Unbekannter Befehl: %s"
show_help:
"Usage:": "Verwendung:"
"COMMAND [OPTIONS]" : "BEFEHL [OPTIONEN]"
"Commands:": "Befehle"
parse_args:
"Use --help to display available options.": "Mit --help können die verfügbaren Optionen angezeigt werden."
"More than one command given: %s": "Mehr als ein Befehl angegeben: %s"
configure_file_logging:
"Logging to [%s]...": "Protokollierung in [%s]..."
"App version: %s": "App Version: %s"
"Python version: %s": "Python Version: %s"
load_ads:
"Searching for ad config files...": "Suche nach Anzeigendateien..."
" -> found %s": "-> %s gefunden"
"ad config file": "Anzeigendatei"
"Start fetch task for the ad(s) with id(s):": "Starte Abrufaufgabe für die Anzeige(n) mit ID(s):"
" -> SKIPPED: inactive ad [%s]": " -> ÜBERSPRUNGEN: inaktive Anzeige [%s]"
" -> SKIPPED: ad [%s] is not in list of given ids.": " -> ÜBERSPRUNGEN: Anzeige [%s] ist nicht in der Liste der angegebenen IDs."
" -> SKIPPED: ad [%s] is not new. already has an id assigned.":
" -> ÜBERSPRUNGEN: Anzeige [%s] ist nicht neu. Eine ID wurde bereits zugewiesen."
" -> SKIPPED: ad [%s] was last published %d days ago. republication is only required every %s days":
" -> ÜBERSPRUNGEN: Anzeige [%s] wurde zuletzt vor %d Tagen veröffentlicht. Eine erneute Veröffentlichung ist nur alle %s Tage erforderlich."
"Loaded %s": "%s geladen"
"ad": "Anzeige"
load_config:
" -> found %s": "-> %s gefunden"
"category": "Kategorie"
"config": "Konfiguration"
login:
"Checking if already logged in...": "Überprüfe, ob bereits eingeloggt..."
"Already logged in as [%s]. Skipping login.": "Bereits eingeloggt als [%s]. Überspringe Anmeldung."
"Opening login page...": "Öffne Anmeldeseite..."
"Captcha present! Please solve the captcha.": "Captcha vorhanden! Bitte lösen Sie das Captcha."
handle_after_login_logic:
"# Device verification message detected. Please follow the instruction displayed in the Browser.":
"# Nachricht zur Geräteverifizierung erkannt. Bitte den Anweisungen im Browser folgen."
"Press ENTER when done...": "EINGABETASTE drücken, wenn erledigt..."
"Handling GDPR disclaimer...": "Verarbeite DSGVO-Hinweis..."
delete_ads:
"Processing %s/%s: '%s' from [%s]...": "Verarbeite %s/%s: '%s' von [%s]..."
"DONE: Deleted %s": "FERTIG: %s gelöscht"
"ad": "Anzeige"
delete_ad:
"Deleting ad '%s' if already present...": "Lösche Anzeige '%s', falls bereits vorhanden..."
"Expected CSRF Token not found in HTML content!": "Erwartetes CSRF-Token wurde im HTML-Inhalt nicht gefunden!"
publish_ads:
"Processing %s/%s: '%s' from [%s]...": "Verarbeite %s/%s: '%s' von [%s]..."
"DONE: (Re-)published %s": "FERTIG: %s (erneut) veröffentlicht"
"ad": "Anzeige"
publish_ad:
"Publishing ad '%s'...": "Veröffentliche Anzeige '%s'..."
"Failed to set shipping attribute for type '%s'!": "Fehler beim setzen des Versandattributs für den Typ '%s'!"
"# Captcha present! Please solve the captcha.": "# Captcha vorhanden! Bitte lösen Sie das Captcha."
"Press a key to continue...": "Eine Taste drücken, um fortzufahren..."
" -> SUCCESS: ad published with ID %s": " -> ERFOLG: Anzeige mit ID %s veröffentlicht"
__set_condition:
"Unable to close condition dialog!": "Kann den Dialog für Artikelzustand nicht schließen!"
__upload_images:
" -> found %s": "-> %s gefunden"
"image": "Bild"
" -> uploading image [%s]": " -> Lade Bild [%s] hoch"
download_ads:
"Scanning your ad overview...": "Scanne Anzeigenübersicht..."
'%s found!': '%s gefunden.'
"ad": "Anzeige"
"Starting download of all ads...": "Starte den Download aller Anzeigen..."
'%d of %d ads were downloaded from your profile.': '%d von %d Anzeigen wurden aus Ihrem Profil heruntergeladen.'
"Starting download of not yet downloaded ads...": "Starte den Download noch nicht heruntergeladener Anzeigen..."
'The ad with id %d has already been saved.': 'Die Anzeige mit der ID %d wurde bereits gespeichert.'
'%s were downloaded from your profile.': '%s wurden aus Ihrem Profil heruntergeladen.'
"new ad": "neue Anzeige"
'Starting download of ad(s) with the id(s):': 'Starte Download der Anzeige(n) mit den ID(s):'
'Downloaded ad with id %d': 'Anzeige mit der ID %d heruntergeladen'
'The page with the id %d does not exist!': 'Die Seite mit der ID %d existiert nicht!'
#################################################
kleinanzeigen_bot/extract.py:
#################################################
download_ad:
"Created ads directory at ./%s.": "Verzeichnis für Anzeigen erstellt unter ./%s."
"Deleting current folder of ad %s...": "Lösche aktuellen Ordner der Anzeige %s..."
"New directory for ad created at %s.": "Neues Verzeichnis für Anzeige erstellt unter %s."
_download_images_from_ad_page:
"Found %s.": "%s gefunden."
"NEXT button in image gallery is missing, aborting image fetching.":
"NEXT-Schaltfläche in der Bildergalerie fehlt, Bildabruf abgebrochen."
"Downloaded %s.": "%s heruntergeladen."
"No image area found. Continue without downloading images.":
"Kein Bildbereich gefunden. Fahre fort ohne Bilder herunterzuladen."
extract_ad_id_from_ad_url:
"The ad ID could not be extracted from the given URL %s":
"Die Anzeigen-ID konnte nicht aus der angegebenen URL extrahiert werden: %s"
extract_own_ads_urls:
"There are currently no ads on your profile!": "Derzeit gibt es keine Anzeigen auf deinem Profil!"
"It looks like you have many ads!": "Es scheint viele Anzeigen zu geben!"
"It looks like all your ads fit on one overview page.": "Alle Anzeigen scheinen auf eine Übersichtsseite zu passen."
"Last ad overview page explored.": "Letzte Anzeigenübersichtsseite gesichtet."
naviagte_to_ad_page:
"There is no ad under the given ID.": "Es gibt keine Anzeige unter der angegebenen ID."
"A popup appeared!": "Ein Popup ist erschienen!"
_extract_ad_page_info:
'Extracting information from ad with title \"%s\"': 'Extrahiere Informationen aus der Anzeige mit dem Titel "%s"'
_extract_contact_from_ad_page:
'No street given in the contact.': 'Keine Straße in den Kontaktdaten angegeben.'
#################################################
kleinanzeigen_bot/utils.py:
#################################################
format:
"ERROR": "FEHLER"
"WARNING": "WARNUNG"
"CRITICAL": "KRITISCH"
load_dict_if_exists:
"Loading %s[%s]...": "Lade %s[%s]..."
" from ": " aus "
'Unsupported file type. The file name "%s" must end with *.json, *.yaml, or *.yml':
'Nicht unterstützter Dateityp. Der Dateiname "%s" muss mit *.json, *.yaml oder *.yml enden.'
save_dict:
"Saving [%s]...": "Speichere [%s]..."
on_sigint:
"Aborted on user request.": "Auf Benutzerwunsch abgebrochen."
#################################################
kleinanzeigen_bot/web_scraping_mixin.py:
#################################################
create_browser_session:
"Creating Browser session...": "Erstelle Browsersitzung..."
" -> Browser binary location: %s": " -> Speicherort der Browser-Binärdatei: %s"
"Using existing browser process at %s:%s": "Verwende bestehenden Browser-Prozess unter %s:%s"
"New Browser session is %s": "Neue Browsersitzung ist %s"
" -> Browser profile name: %s": " -> Browser-Profilname: %s"
" -> Custom Browser argument: %s": " -> Benutzerdefiniertes Browser-Argument: %s"
" -> Browser user data dir: %s": " -> Benutzerdatenverzeichnis des Browsers: %s"
" -> Setting chrome prefs [%s]...": " -> Setze Chrome-Einstellungen [%s]..."
" -> Adding Browser extension: [%s]": " -> Füge Browser-Erweiterung hinzu: [%s]"
get_compatible_browser:
"Installed browser for OS %s could not be detected": "Installierter Browser für OS %s konnte nicht erkannt werden"
"Installed browser could not be detected": "Installierter Browser konnte nicht erkannt werden"
web_check:
"Unsupported attribute: %s": "Nicht unterstütztes Attribut: %s"
web_find:
"Unsupported selector type: %s": "Nicht unterstützter Selektortyp: %s"
web_find_all:
"Unsupported selector type: %s": "Nicht unterstützter Selektortyp: %s"
web_sleep:
" ... pausing for %d ms ...": " ... pausiere für %d ms ..."

View File

@@ -3,18 +3,20 @@ SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
""" """
import asyncio, copy, decimal, json, logging, os, re, socket, sys, traceback, time import asyncio, copy, decimal, inspect, json, logging, os, re, socket, sys, traceback, time
from importlib.resources import read_text as get_resource_as_string from importlib.resources import read_text as get_resource_as_string
from collections.abc import Callable, Sized from collections.abc import Callable
from datetime import datetime from datetime import datetime
from gettext import gettext as _
from types import FrameType, ModuleType, TracebackType from types import FrameType, ModuleType, TracebackType
from typing import Any, Final, TypeVar from typing import Any, Final, TypeVar
import coloredlogs import coloredlogs
from ruamel.yaml import YAML from ruamel.yaml import YAML
from .i18n import get_translating_logger
LOG_ROOT:Final[logging.Logger] = logging.getLogger() LOG_ROOT:Final[logging.Logger] = logging.getLogger()
LOG:Final[logging.Logger] = logging.getLogger("kleinanzeigen_bot.utils") LOG:Final[logging.Logger] = get_translating_logger(__name__)
# https://mypy.readthedocs.io/en/stable/generics.html#generic-functions # https://mypy.readthedocs.io/en/stable/generics.html#generic-functions
T = TypeVar('T') T = TypeVar('T')
@@ -45,7 +47,7 @@ def ensure(condition:Any | bool | Callable[[], bool], error_message:str, timeout
if not isinstance(condition, Callable): # type: ignore[arg-type] # https://github.com/python/mypy/issues/6864 if not isinstance(condition, Callable): # type: ignore[arg-type] # https://github.com/python/mypy/issues/6864
if condition: if condition:
return return
raise AssertionError(error_message) raise AssertionError(_(error_message))
if timeout < 0: if timeout < 0:
raise AssertionError("[timeout] must be >= 0") raise AssertionError("[timeout] must be >= 0")
@@ -56,10 +58,21 @@ def ensure(condition:Any | bool | Callable[[], bool], error_message:str, timeout
while not condition(): # type: ignore[operator] while not condition(): # type: ignore[operator]
elapsed = time.time() - start_at elapsed = time.time() - start_at
if elapsed >= timeout: if elapsed >= timeout:
raise AssertionError(error_message) raise AssertionError(_(error_message))
time.sleep(poll_requency) time.sleep(poll_requency)
def get_caller(depth: int = 1) -> inspect.FrameInfo | None:
stack = inspect.stack()
try:
for frame in stack[depth + 1:]:
if frame.function and frame.function != "<lambda>":
return frame
return None
finally:
del stack # Clean up the stack to avoid reference cycles
def is_frozen() -> bool: def is_frozen() -> bool:
""" """
>>> is_frozen() >>> is_frozen()
@@ -140,17 +153,30 @@ def safe_get(a_map:dict[Any, Any], *keys:str) -> Any:
def configure_console_logging() -> None: def configure_console_logging() -> None:
class LevelTranslatingFormatter(coloredlogs.ColoredFormatter): # type: ignore
def format(self, record:logging.LogRecord) -> str:
msg:str = super().format(record)
if record.levelno > logging.DEBUG:
levelname = _(record.levelname)
if levelname != record.levelname:
msg = msg.replace(record.levelname, levelname, 1)
return msg
formatter = LevelTranslatingFormatter("[%(levelname)s] %(message)s")
stdout_log = logging.StreamHandler(sys.stderr) stdout_log = logging.StreamHandler(sys.stderr)
stdout_log.setLevel(logging.DEBUG) stdout_log.setLevel(logging.DEBUG)
stdout_log.setFormatter(coloredlogs.ColoredFormatter("[%(levelname)s] %(message)s")) stdout_log.addFilter(type("", (logging.Filter,), {
stdout_log.addFilter(type("", (logging.Filter,), { # pyright: ignore
"filter": lambda rec: rec.levelno <= logging.INFO "filter": lambda rec: rec.levelno <= logging.INFO
})) }))
stdout_log.setFormatter(formatter)
LOG_ROOT.addHandler(stdout_log) LOG_ROOT.addHandler(stdout_log)
stderr_log = logging.StreamHandler(sys.stderr) stderr_log = logging.StreamHandler(sys.stderr)
stderr_log.setLevel(logging.WARNING) stderr_log.setLevel(logging.WARNING)
stderr_log.setFormatter(coloredlogs.ColoredFormatter("[%(levelname)s] %(message)s")) stderr_log.setFormatter(formatter)
LOG_ROOT.addHandler(stderr_log) LOG_ROOT.addHandler(stderr_log)
@@ -175,29 +201,6 @@ def on_sigint(_sig:int, _frame:FrameType | None) -> None:
sys.exit(0) sys.exit(0)
def pluralize(noun:str, count:int | Sized, prefix_with_count:bool = True) -> str:
"""
>>> pluralize("field", 1)
'1 field'
>>> pluralize("field", 2)
'2 fields'
>>> pluralize("field", 2, prefix_with_count = False)
'fields'
"""
if isinstance(count, Sized):
count = len(count)
prefix = f"{count} " if prefix_with_count else ""
if count == 1:
return f"{prefix}{noun}"
if noun.endswith('s') or noun.endswith('sh') or noun.endswith('ch') or noun.endswith('x') or noun.endswith('z'):
return f"{prefix}{noun}es"
if noun.endswith('y'):
return f"{prefix}{noun[:-1]}ies"
return f"{prefix}{noun}s"
def load_dict(filepath:str, content_label:str = "") -> dict[str, Any]: def load_dict(filepath:str, content_label:str = "") -> dict[str, Any]:
""" """
:raises FileNotFoundError :raises FileNotFoundError
@@ -209,12 +212,12 @@ def load_dict(filepath:str, content_label:str = "") -> dict[str, Any]:
def load_dict_if_exists(filepath:str, content_label:str = "") -> dict[str, Any] | None: def load_dict_if_exists(filepath:str, content_label:str = "") -> dict[str, Any] | None:
filepath = os.path.abspath(filepath) abs_filepath = os.path.abspath(filepath)
LOG.info("Loading %s[%s]...", content_label and content_label + " from " or "", filepath) LOG.info("Loading %s[%s]...", content_label and content_label + _(" from ") or "", abs_filepath)
_, file_ext = os.path.splitext(filepath) __, file_ext = os.path.splitext(filepath)
if file_ext not in [".json", ".yaml", ".yml"]: if file_ext not in (".json", ".yaml", ".yml"):
raise ValueError(f'Unsupported file type. The file name "{filepath}" must end with *.json, *.yaml, or *.yml') raise ValueError(_('Unsupported file type. The filename "%s" must end with *.json, *.yaml, or *.yml') % filepath)
if not os.path.exists(filepath): if not os.path.exists(filepath):
return None return None
@@ -229,9 +232,9 @@ def load_dict_from_module(module:ModuleType, filename:str, content_label:str = "
""" """
LOG.debug("Loading %s[%s.%s]...", content_label and content_label + " from " or "", module.__name__, filename) LOG.debug("Loading %s[%s.%s]...", content_label and content_label + " from " or "", module.__name__, filename)
_, file_ext = os.path.splitext(filename) __, file_ext = os.path.splitext(filename)
if file_ext not in (".json", ".yaml", ".yml"): if file_ext not in (".json", ".yaml", ".yml"):
raise ValueError(f'Unsupported file type. The file name "{filename}" must end with *.json, *.yaml, or *.yml') raise ValueError(f'Unsupported file type. The filename "{filename}" must end with *.json, *.yaml, or *.yml')
content = get_resource_as_string(module, filename) # pylint: disable=deprecated-method content = get_resource_as_string(module, filename) # pylint: disable=deprecated-method
return json.loads(content) if filename.endswith(".json") else YAML().load(content) # type: ignore[no-any-return] # mypy return json.loads(content) if filename.endswith(".json") else YAML().load(content) # type: ignore[no-any-return] # mypy

View File

@@ -5,6 +5,7 @@ SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanze
""" """
import asyncio, enum, inspect, json, logging, os, platform, secrets, shutil, time import asyncio, enum, inspect, json, logging, os, platform, secrets, shutil, time
from collections.abc import Callable, Coroutine, Iterable from collections.abc import Callable, Coroutine, Iterable
from gettext import gettext as _
from typing import cast, Any, Final from typing import cast, Any, Final
try: try:
@@ -18,12 +19,9 @@ from nodriver.core.config import Config
from nodriver.core.element import Element from nodriver.core.element import Element
from nodriver.core.tab import Tab as Page from nodriver.core.tab import Tab as Page
from .i18n import get_translating_logger
from .utils import ensure, is_port_open, T from .utils import ensure, is_port_open, T
LOG:Final[logging.Logger] = logging.getLogger("kleinanzeigen_bot.selenium_mixin")
__all__ = [ __all__ = [
"Browser", "Browser",
"BrowserConfig", "BrowserConfig",
@@ -31,9 +29,11 @@ __all__ = [
"Element", "Element",
"Page", "Page",
"Is", "Is",
"WebScrapingMixin" "WebScrapingMixin",
] ]
LOG:Final[logging.Logger] = get_translating_logger(__name__)
class By(enum.Enum): class By(enum.Enum):
ID = enum.auto() ID = enum.auto()
@@ -77,7 +77,7 @@ class WebScrapingMixin:
ensure(os.path.exists(self.browser_config.binary_location), f"Specified browser binary [{self.browser_config.binary_location}] does not exist.") ensure(os.path.exists(self.browser_config.binary_location), f"Specified browser binary [{self.browser_config.binary_location}] does not exist.")
else: else:
self.browser_config.binary_location = self.get_compatible_browser() self.browser_config.binary_location = self.get_compatible_browser()
LOG.info(" -> Chrome binary location: %s", self.browser_config.binary_location) LOG.info(" -> Browser binary location: %s", self.browser_config.binary_location)
######################################################## ########################################################
# check if an existing browser instance shall be used... # check if an existing browser instance shall be used...
@@ -92,9 +92,9 @@ class WebScrapingMixin:
if remote_port > 0: if remote_port > 0:
LOG.info("Using existing browser process at %s:%s", remote_host, remote_port) LOG.info("Using existing browser process at %s:%s", remote_host, remote_port)
if not is_port_open(remote_host, remote_port): ensure(is_port_open(remote_host, remote_port),
raise AssertionError(f"Browser process not reachable at {remote_host}:{remote_port}. " f"Browser process not reachable at {remote_host}:{remote_port}. " +
+ f"Start the browser with --remote-debugging-port={remote_port} or remove this port from your config.yaml") f"Start the browser with --remote-debugging-port={remote_port} or remove this port from your config.yaml")
cfg = Config( cfg = Config(
browser_executable_path = self.browser_config.binary_location # actually not necessary but nodriver fails without browser_executable_path = self.browser_config.binary_location # actually not necessary but nodriver fails without
) )
@@ -140,7 +140,7 @@ class WebScrapingMixin:
browser_args.append(f"--profile-directory={self.browser_config.profile_name}") browser_args.append(f"--profile-directory={self.browser_config.profile_name}")
for browser_arg in self.browser_config.arguments: for browser_arg in self.browser_config.arguments:
LOG.info(" -> Custom Chrome argument: %s", browser_arg) LOG.info(" -> Custom Browser argument: %s", browser_arg)
browser_args.append(browser_arg) browser_args.append(browser_arg)
if not LOG.isEnabledFor(logging.DEBUG): if not LOG.isEnabledFor(logging.DEBUG):
@@ -163,8 +163,8 @@ class WebScrapingMixin:
os.makedirs(profile_dir, exist_ok = True) os.makedirs(profile_dir, exist_ok = True)
prefs_file = os.path.join(profile_dir, "Preferences") prefs_file = os.path.join(profile_dir, "Preferences")
if not os.path.exists(prefs_file): if not os.path.exists(prefs_file):
LOG.info("-> Setting chrome prefs [%s]...", prefs_file) LOG.info(" -> Setting chrome prefs [%s]...", prefs_file)
with open(prefs_file, "w", encoding='UTF-8') as fd: with open(prefs_file, "w", encoding = 'UTF-8') as fd:
json.dump({ json.dump({
"credentials_enable_service": False, "credentials_enable_service": False,
"enable_do_not_track": True, "enable_do_not_track": True,
@@ -195,7 +195,7 @@ class WebScrapingMixin:
# load extensions # load extensions
for crx_extension in self.browser_config.extensions: for crx_extension in self.browser_config.extensions:
LOG.info(" -> Adding extension: [%s]", crx_extension) LOG.info(" -> Adding Browser extension: [%s]", crx_extension)
ensure(os.path.exists(crx_extension), f"Configured extension-file [{crx_extension}] does not exist.") ensure(os.path.exists(crx_extension), f"Configured extension-file [{crx_extension}] does not exist.")
cfg.add_extension(crx_extension) cfg.add_extension(crx_extension)
@@ -250,15 +250,15 @@ class WebScrapingMixin:
] ]
case _ as os_name: case _ as os_name:
raise AssertionError(f"Installed browser for OS [{os_name}] could not be detected") raise AssertionError(_("Installed browser for OS %s could not be detected") % os_name)
for browser_path in browser_paths: for browser_path in browser_paths:
if browser_path and os.path.isfile(browser_path): if browser_path and os.path.isfile(browser_path):
return browser_path return browser_path
raise AssertionError("Installed browser could not be detected") raise AssertionError(_("Installed browser could not be detected"))
async def web_await(self, condition: Callable[[], T | Never | Coroutine[Any,Any,T | Never]], *, async def web_await(self, condition: Callable[[], T | Never | Coroutine[Any, Any, T | Never]], *,
timeout:int | float = 5, timeout_error_message: str = "") -> T: timeout:int | float = 5, timeout_error_message: str = "") -> T:
""" """
Blocks/waits until the given condition is met. Blocks/waits until the given condition is met.
@@ -307,6 +307,7 @@ class WebScrapingMixin:
&& element.offsetHeight > 0 && element.offsetHeight > 0
} }
""")) """))
elem:Element = await self.web_find(selector_type, selector_value, timeout = timeout) elem:Element = await self.web_find(selector_type, selector_value, timeout = timeout)
match attr: match attr:
@@ -329,7 +330,7 @@ class WebScrapingMixin:
return false return false
} }
""")) """))
raise AssertionError(f"Unsupported attribute: {attr}") raise AssertionError(_("Unsupported attribute: %s") % attr)
async def web_click(self, selector_type:By, selector_value:str, *, timeout:int | float = 5) -> Element: async def web_click(self, selector_type:By, selector_value:str, *, timeout:int | float = 5) -> Element:
""" """
@@ -380,21 +381,19 @@ class WebScrapingMixin:
timeout = timeout, timeout = timeout,
timeout_error_message = f"No HTML element found using CSS selector '{selector_value}' within {timeout} seconds.") timeout_error_message = f"No HTML element found using CSS selector '{selector_value}' within {timeout} seconds.")
case By.TEXT: case By.TEXT:
if parent: ensure(not parent, f"Specifying a parent element currently not supported with selector type: {selector_type}")
raise AssertionError(f"Specifying a parent element currently not supported with selector type: {selector_type}")
return await self.web_await( return await self.web_await(
lambda: self.page.find_element_by_text(selector_value, True), lambda: self.page.find_element_by_text(selector_value, True),
timeout = timeout, timeout = timeout,
timeout_error_message = f"No HTML element found containing text '{selector_value}' within {timeout} seconds.") timeout_error_message = f"No HTML element found containing text '{selector_value}' within {timeout} seconds.")
case By.XPATH: case By.XPATH:
if parent: ensure(not parent, f"Specifying a parent element currently not supported with selector type: {selector_type}")
raise AssertionError(f"Specifying a parent element currently not supported with selector type: {selector_type}")
return await self.web_await( return await self.web_await(
lambda: self.page.find_element_by_text(selector_value, True), lambda: self.page.find_element_by_text(selector_value, True),
timeout = timeout, timeout = timeout,
timeout_error_message = f"No HTML element found using XPath '{selector_value}' within {timeout} seconds.") timeout_error_message = f"No HTML element found using XPath '{selector_value}' within {timeout} seconds.")
raise AssertionError(f"Unsupported selector type: {selector_type}") raise AssertionError(_("Unsupported selector type: %s") % selector_type)
async def web_find_all(self, selector_type:By, selector_value:str, *, parent:Element = None, timeout:int | float = 5) -> list[Element]: async def web_find_all(self, selector_type:By, selector_value:str, *, parent:Element = None, timeout:int | float = 5) -> list[Element]:
""" """
@@ -420,21 +419,19 @@ class WebScrapingMixin:
timeout = timeout, timeout = timeout,
timeout_error_message = f"No HTML elements found of tag <{selector_value}> within {timeout} seconds.") timeout_error_message = f"No HTML elements found of tag <{selector_value}> within {timeout} seconds.")
case By.TEXT: case By.TEXT:
if parent: ensure(not parent, f"Specifying a parent element currently not supported with selector type: {selector_type}")
raise AssertionError(f"Specifying a parent element currently not supported with selector type: {selector_type}")
return await self.web_await( return await self.web_await(
lambda: self.page.find_elements_by_text(selector_value), lambda: self.page.find_elements_by_text(selector_value),
timeout = timeout, timeout = timeout,
timeout_error_message = f"No HTML elements found containing text '{selector_value}' within {timeout} seconds.") timeout_error_message = f"No HTML elements found containing text '{selector_value}' within {timeout} seconds.")
case By.XPATH: case By.XPATH:
if parent: ensure(not parent, f"Specifying a parent element currently not supported with selector type: {selector_type}")
raise AssertionError(f"Specifying a parent element currently not supported with selector type: {selector_type}")
return await self.web_await( return await self.web_await(
lambda: self.page.find_elements_by_text(selector_value), lambda: self.page.find_elements_by_text(selector_value),
timeout = timeout, timeout = timeout,
timeout_error_message = f"No HTML elements found using XPath '{selector_value}' within {timeout} seconds.") timeout_error_message = f"No HTML elements found using XPath '{selector_value}' within {timeout} seconds.")
raise AssertionError(f"Unsupported selector type: {selector_type}") raise AssertionError(_("Unsupported selector type: %s") % selector_type)
async def web_input(self, selector_type:By, selector_value:str, text:str | int, *, timeout:int | float = 5) -> Element: async def web_input(self, selector_type:By, selector_value:str, text:str | int, *, timeout:int | float = 5) -> Element:
""" """
@@ -503,7 +500,7 @@ class WebScrapingMixin:
content: responseText content: responseText
}} }}
}})) }}))
""", await_promise=True)) """, await_promise = True))
if isinstance(valid_response_codes, int): if isinstance(valid_response_codes, int):
valid_response_codes = [valid_response_codes] valid_response_codes = [valid_response_codes]
ensure( ensure(

58
tests/test_i18n.py Normal file
View File

@@ -0,0 +1,58 @@
"""
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import pytest
from _pytest.monkeypatch import MonkeyPatch # pylint: disable=import-private-name
from kleinanzeigen_bot import i18n
@pytest.mark.parametrize("lang, expected", [
(None, ("en", "US", "UTF-8")), # Test with no LANG variable (should default to ("en", "US", "UTF-8"))
("fr", ("fr", None, "UTF-8")), # Test with just a language code
("fr_CA", ("fr", "CA", "UTF-8")), # Test with language + region, no encoding
("pt_BR.iso8859-1", ("pt", "BR", "ISO8859-1")), # Test with language + region + encoding
])
def test_detect_locale(monkeypatch: MonkeyPatch, lang: str | None, expected: i18n.Locale) -> None:
"""
Pytest test case to verify detect_system_language() behavior under various LANG values.
"""
# Clear or set the LANG environment variable as needed.
if lang is None:
monkeypatch.delenv("LANG", raising = False)
else:
monkeypatch.setenv("LANG", lang)
# Call the function and compare the result to the expected output.
result = i18n._detect_locale() # pylint: disable=protected-access
assert result == expected, f"For LANG={lang}, expected {expected} but got {result}"
@pytest.mark.parametrize("lang, noun, count, prefix_with_count, expected", [
("en", "field", 1, True, "1 field"),
("en", "field", 2, True, "2 fields"),
("en", "field", 2, False, "fields"),
("en", "attribute", 2, False, "attributes"),
("en", "bus", 2, False, "buses"),
("en", "city", 2, False, "cities"),
("de", "Feld", 1, True, "1 Feld"),
("de", "Feld", 2, True, "2 Felder"),
("de", "Feld", 2, False, "Felder"),
("de", "Anzeige", 2, False, "Anzeigen"),
("de", "Attribute", 2, False, "Attribute"),
("de", "Bild", 2, False, "Bilder"),
("de", "Datei", 2, False, "Dateien"),
("de", "Kategorie", 2, False, "Kategorien")
])
def test_pluralize(
lang:str,
noun:str,
count:int,
prefix_with_count:bool,
expected: str
) -> None:
i18n.set_current_locale(i18n.Locale(lang, "US", "UTF_8"))
result = i18n.pluralize(noun, count, prefix_with_count)
assert result == expected, f"For LANG={lang}, expected {expected} but got {result}"

View File

@@ -8,12 +8,13 @@ from typing import cast
import nodriver, pytest import nodriver, pytest
from kleinanzeigen_bot.web_scraping_mixin import WebScrapingMixin
from kleinanzeigen_bot.utils import ensure from kleinanzeigen_bot.utils import ensure
from kleinanzeigen_bot.i18n import get_translating_logger
from kleinanzeigen_bot.web_scraping_mixin import WebScrapingMixin
if os.environ.get("CI"): if os.environ.get("CI"):
logging.getLogger("kleinanzeigen_bot").setLevel(logging.DEBUG) get_translating_logger("kleinanzeigen_bot").setLevel(logging.DEBUG)
logging.getLogger("nodriver").setLevel(logging.DEBUG) get_translating_logger("nodriver").setLevel(logging.DEBUG)
async def atest_init() -> None: async def atest_init() -> None: