mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 10:31:50 +01:00
feat: add multi-language support
This commit is contained in:
@@ -24,8 +24,9 @@
|
|||||||
**kleinanzeigen-bot** is a console based application to ease publishing of ads to [kleinanzeigen.de](https://kleinanzeigen.de).
|
**kleinanzeigen-bot** is a console based application to ease publishing of ads to [kleinanzeigen.de](https://kleinanzeigen.de).
|
||||||
|
|
||||||
It is the spiritual successor to [Second-Hand-Friends/ebayKleinanzeigen](https://github.com/Second-Hand-Friends/ebayKleinanzeigen) with the following advantages:
|
It is the spiritual successor to [Second-Hand-Friends/ebayKleinanzeigen](https://github.com/Second-Hand-Friends/ebayKleinanzeigen) with the following advantages:
|
||||||
|
- multi-language support (DE/EN)
|
||||||
- supports Microsoft Edge browser (Chromium based)
|
- supports Microsoft Edge browser (Chromium based)
|
||||||
- does not require selenium and chromedrivers
|
- does not require Selenium and chromedrivers
|
||||||
- better captcha handling
|
- better captcha handling
|
||||||
- config:
|
- config:
|
||||||
- use YAML or JSON for config files
|
- use YAML or JSON for config files
|
||||||
|
|||||||
@@ -200,6 +200,7 @@ disable= [
|
|||||||
"broad-except",
|
"broad-except",
|
||||||
"consider-using-assignment-expr",
|
"consider-using-assignment-expr",
|
||||||
"docstring-first-line-empty",
|
"docstring-first-line-empty",
|
||||||
|
"global-statement",
|
||||||
"missing-docstring",
|
"missing-docstring",
|
||||||
"multiple-imports",
|
"multiple-imports",
|
||||||
"multiple-statements",
|
"multiple-statements",
|
||||||
@@ -215,11 +216,13 @@ notes = [ "FIXME", "XXX", "TODO" ] # list of note tags to take in consideration
|
|||||||
[tool.pylint.design]
|
[tool.pylint.design]
|
||||||
# https://pylint.pycqa.org/en/latest/user_guide/configuration/all-options.html#design-checker
|
# https://pylint.pycqa.org/en/latest/user_guide/configuration/all-options.html#design-checker
|
||||||
# https://pylint.pycqa.org/en/latest/user_guide/checkers/features.html#design-checker-messages
|
# https://pylint.pycqa.org/en/latest/user_guide/checkers/features.html#design-checker-messages
|
||||||
|
max-args = 6 # maximum number of arguments for function / method (R0913)
|
||||||
max-attributes = 15 # maximum number of instance attributes for a class (R0902)
|
max-attributes = 15 # maximum number of instance attributes for a class (R0902)
|
||||||
max-branches = 40 # maximum number of branch for function / method body (R0912)
|
max-branches = 40 # maximum number of branch for function / method body (R0912)
|
||||||
max-locals = 30 # maximum number of local variables for function / method body (R0914)
|
max-locals = 30 # maximum number of local variables for function / method body (R0914)
|
||||||
max-returns = 10 # maximum number of return / yield for function / method body (R0911)
|
max-returns = 15 # maximum number of return / yield for function / method body (R0911)
|
||||||
max-statements = 150 # maximum number of statements in function / method body (R0915)
|
max-statements = 150 # maximum number of statements in function / method body (R0915)
|
||||||
|
max-positional-arguments = 6 # maximum number of positional arguments for function / method (R0917)
|
||||||
max-public-methods = 30 # maximum number of public methods for a class (R0904)
|
max-public-methods = 30 # maximum number of public methods for a class (R0904)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import urllib.parse as urllib_parse
|
|||||||
import urllib.request as urllib_request
|
import urllib.request as urllib_request
|
||||||
from collections.abc import Iterable
|
from collections.abc import Iterable
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from gettext import gettext as _
|
||||||
from logging.handlers import RotatingFileHandler
|
from logging.handlers import RotatingFileHandler
|
||||||
from typing import Any, Final
|
from typing import Any, Final
|
||||||
|
|
||||||
@@ -17,14 +18,15 @@ from ruamel.yaml import YAML
|
|||||||
from wcmatch import glob
|
from wcmatch import glob
|
||||||
|
|
||||||
from . import utils, resources, extract
|
from . import utils, resources, extract
|
||||||
from .utils import abspath, ainput, apply_defaults, ensure, is_frozen, pluralize, safe_get, parse_datetime
|
from .i18n import Locale, get_current_locale, set_current_locale, get_translating_logger, pluralize
|
||||||
|
from .utils import abspath, ainput, apply_defaults, ensure, is_frozen, safe_get, parse_datetime
|
||||||
from .web_scraping_mixin import By, Element, Page, Is, WebScrapingMixin
|
from .web_scraping_mixin import By, Element, Page, Is, WebScrapingMixin
|
||||||
from ._version import __version__
|
from ._version import __version__
|
||||||
|
|
||||||
# W0406: possibly a bug, see https://github.com/PyCQA/pylint/issues/3933
|
# W0406: possibly a bug, see https://github.com/PyCQA/pylint/issues/3933
|
||||||
|
|
||||||
LOG_ROOT:Final[logging.Logger] = logging.getLogger()
|
LOG_ROOT:Final[logging.Logger] = logging.getLogger()
|
||||||
LOG:Final[logging.Logger] = logging.getLogger("kleinanzeigen_bot")
|
LOG:Final[logging.Logger] = get_translating_logger(__name__)
|
||||||
LOG.setLevel(logging.INFO)
|
LOG.setLevel(logging.INFO)
|
||||||
|
|
||||||
colorama.init()
|
colorama.init()
|
||||||
@@ -130,7 +132,40 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
else:
|
else:
|
||||||
exe = "python -m kleinanzeigen_bot"
|
exe = "python -m kleinanzeigen_bot"
|
||||||
|
|
||||||
print(textwrap.dedent(f"""\
|
if get_current_locale().language == "de":
|
||||||
|
print(textwrap.dedent(f"""\
|
||||||
|
Verwendung: {colorama.Fore.LIGHTMAGENTA_EX}{exe} BEFEHL [OPTIONEN]{colorama.Style.RESET_ALL}
|
||||||
|
|
||||||
|
Befehle:
|
||||||
|
publish - (erneutes) Veröffentlichen von Anzeigen
|
||||||
|
verify - Überprüft der Konfigurationsdateien
|
||||||
|
delete - Löscht Anzeigen
|
||||||
|
download - Lädt eine oder mehrere Anzeigen herunter
|
||||||
|
--
|
||||||
|
help - Zeigt diese Hilfe an (Standardbefehl)
|
||||||
|
version - Zeigt die Version der Anwendung an
|
||||||
|
|
||||||
|
Optionen:
|
||||||
|
--ads=all|due|new|<id(s)> (publish) - Gibt an, welche Anzeigen (erneut) veröffentlicht werden sollen (STANDARD: due)
|
||||||
|
Mögliche Werte:
|
||||||
|
* all: Veröffentlicht alle Anzeigen erneut, ignoriert republication_interval
|
||||||
|
* due: Veröffentlicht alle neuen Anzeigen und erneut entsprechend dem republication_interval
|
||||||
|
* new: Veröffentlicht nur neue Anzeigen (d.h. Anzeigen ohne ID in der Konfigurationsdatei)
|
||||||
|
* <id(s)>: Gibt eine oder mehrere Anzeigen-IDs an, die veröffentlicht werden sollen, z. B. "--ads=1,2,3", ignoriert republication_interval
|
||||||
|
--ads=all|new|<id(s)> (download) - Gibt an, welche Anzeigen heruntergeladen werden sollen (STANDARD: new)
|
||||||
|
Mögliche Werte:
|
||||||
|
* all: Lädt alle Anzeigen aus Ihrem Profil herunter
|
||||||
|
* new: Lädt Anzeigen aus Ihrem Profil herunter, die lokal noch nicht gespeichert sind
|
||||||
|
* <id(s)>: Gibt eine oder mehrere Anzeigen-IDs zum Herunterladen an, z. B. "--ads=1,2,3"
|
||||||
|
--force - Alias für '--ads=all'
|
||||||
|
--keep-old - Verhindert das Löschen alter Anzeigen bei erneuter Veröffentlichung
|
||||||
|
--config=<PATH> - Pfad zur YAML- oder JSON-Konfigurationsdatei (STANDARD: ./config.yaml)
|
||||||
|
--lang=en|de - Anzeigesprache (STANDARD: Systemsprache, wenn unterstützt, sonst Englisch)
|
||||||
|
--logfile=<PATH> - Pfad zur Protokolldatei (STANDARD: ./kleinanzeigen-bot.log)
|
||||||
|
-v, --verbose - Aktiviert detaillierte Ausgabe – nur nützlich zur Fehlerbehebung
|
||||||
|
"""))
|
||||||
|
else:
|
||||||
|
print(textwrap.dedent(f"""\
|
||||||
Usage: {colorama.Fore.LIGHTMAGENTA_EX}{exe} COMMAND [OPTIONS]{colorama.Style.RESET_ALL}
|
Usage: {colorama.Fore.LIGHTMAGENTA_EX}{exe} COMMAND [OPTIONS]{colorama.Style.RESET_ALL}
|
||||||
|
|
||||||
Commands:
|
Commands:
|
||||||
@@ -158,8 +193,9 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
--keep-old - don't delete old ads on republication
|
--keep-old - don't delete old ads on republication
|
||||||
--config=<PATH> - path to the config YAML or JSON file (DEFAULT: ./config.yaml)
|
--config=<PATH> - path to the config YAML or JSON file (DEFAULT: ./config.yaml)
|
||||||
--logfile=<PATH> - path to the logfile (DEFAULT: ./kleinanzeigen-bot.log)
|
--logfile=<PATH> - path to the logfile (DEFAULT: ./kleinanzeigen-bot.log)
|
||||||
|
--lang=en|de - Displaylanguage (STANDARD: Systemlangauge if supported, otherwise English)
|
||||||
-v, --verbose - enables verbose output - only useful when troubleshooting issues
|
-v, --verbose - enables verbose output - only useful when troubleshooting issues
|
||||||
"""))
|
"""))
|
||||||
|
|
||||||
def parse_args(self, args:list[str]) -> None:
|
def parse_args(self, args:list[str]) -> None:
|
||||||
try:
|
try:
|
||||||
@@ -170,11 +206,12 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
"help",
|
"help",
|
||||||
"keep-old",
|
"keep-old",
|
||||||
"logfile=",
|
"logfile=",
|
||||||
|
"lang=",
|
||||||
"verbose"
|
"verbose"
|
||||||
])
|
])
|
||||||
except getopt.error as ex:
|
except getopt.error as ex:
|
||||||
LOG.error(ex.msg)
|
LOG.error(ex.msg)
|
||||||
LOG.error("Use --help to display available options")
|
LOG.error("Use --help to display available options.")
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
|
|
||||||
for option, value in options:
|
for option, value in options:
|
||||||
@@ -195,6 +232,8 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
self.ads_selector = "all"
|
self.ads_selector = "all"
|
||||||
case "--keep-old":
|
case "--keep-old":
|
||||||
self.keep_old_ads = True
|
self.keep_old_ads = True
|
||||||
|
case "--lang":
|
||||||
|
set_current_locale(Locale.of(value))
|
||||||
case "-v" | "--verbose":
|
case "-v" | "--verbose":
|
||||||
LOG.setLevel(logging.DEBUG)
|
LOG.setLevel(logging.DEBUG)
|
||||||
logging.getLogger("nodriver").setLevel(logging.INFO)
|
logging.getLogger("nodriver").setLevel(logging.INFO)
|
||||||
@@ -244,7 +283,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
if re.compile(r'\d+[,\d+]*').search(self.ads_selector):
|
if re.compile(r'\d+[,\d+]*').search(self.ads_selector):
|
||||||
ids = [int(n) for n in self.ads_selector.split(',')]
|
ids = [int(n) for n in self.ads_selector.split(',')]
|
||||||
use_specific_ads = True
|
use_specific_ads = True
|
||||||
LOG.info('Start fetch task for the ad(s) with the id(s):')
|
LOG.info('Start fetch task for the ad(s) with id(s):')
|
||||||
LOG.info(' | '.join([str(id_) for id_ in ids]))
|
LOG.info(' | '.join([str(id_) for id_ in ids]))
|
||||||
|
|
||||||
ad_fields = utils.load_dict_from_module(resources, "ad_fields.yaml")
|
ad_fields = utils.load_dict_from_module(resources, "ad_fields.yaml")
|
||||||
@@ -261,7 +300,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
if use_specific_ads:
|
if use_specific_ads:
|
||||||
if not ad_cfg["id"] in ids:
|
if ad_cfg["id"] not in ids:
|
||||||
LOG.info(" -> SKIPPED: ad [%s] is not in list of given ids.", ad_file)
|
LOG.info(" -> SKIPPED: ad [%s] is not in list of given ids.", ad_file)
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
@@ -362,7 +401,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
|
|
||||||
def load_config(self) -> None:
|
def load_config(self) -> None:
|
||||||
config_defaults = utils.load_dict_from_module(resources, "config_defaults.yaml")
|
config_defaults = utils.load_dict_from_module(resources, "config_defaults.yaml")
|
||||||
config = utils.load_dict_if_exists(self.config_file_path, "config")
|
config = utils.load_dict_if_exists(self.config_file_path, _("config"))
|
||||||
|
|
||||||
if config is None:
|
if config is None:
|
||||||
LOG.warning("Config file %s does not exist. Creating it with default values...", self.config_file_path)
|
LOG.warning("Config file %s does not exist. Creating it with default values...", self.config_file_path)
|
||||||
@@ -427,7 +466,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
try:
|
try:
|
||||||
await self.web_find(By.TEXT, "Wir haben dir gerade einen 6-stelligen Code für die Telefonnummer", timeout = 4)
|
await self.web_find(By.TEXT, "Wir haben dir gerade einen 6-stelligen Code für die Telefonnummer", timeout = 4)
|
||||||
LOG.warning("############################################")
|
LOG.warning("############################################")
|
||||||
LOG.warning("# Device verification message detected. Please handle it.")
|
LOG.warning("# Device verification message detected. Please follow the instruction displayed in the Browser.")
|
||||||
LOG.warning("############################################")
|
LOG.warning("############################################")
|
||||||
await ainput("Press ENTER when done...")
|
await ainput("Press ENTER when done...")
|
||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
@@ -460,7 +499,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
await self.web_sleep()
|
await self.web_sleep()
|
||||||
|
|
||||||
LOG.info("############################################")
|
LOG.info("############################################")
|
||||||
LOG.info("DONE: Deleting %s", pluralize("ad", count))
|
LOG.info("DONE: Deleted %s", pluralize("ad", count))
|
||||||
LOG.info("############################################")
|
LOG.info("############################################")
|
||||||
|
|
||||||
async def delete_ad(self, ad_cfg: dict[str, Any], delete_old_ads_by_title: bool) -> bool:
|
async def delete_ad(self, ad_cfg: dict[str, Any], delete_old_ads_by_title: bool) -> bool:
|
||||||
@@ -469,8 +508,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
await self.web_open(f"{self.root_url}/m-meine-anzeigen.html")
|
await self.web_open(f"{self.root_url}/m-meine-anzeigen.html")
|
||||||
csrf_token_elem = await self.web_find(By.CSS_SELECTOR, "meta[name=_csrf]")
|
csrf_token_elem = await self.web_find(By.CSS_SELECTOR, "meta[name=_csrf]")
|
||||||
csrf_token = csrf_token_elem.attrs["content"]
|
csrf_token = csrf_token_elem.attrs["content"]
|
||||||
if csrf_token is None:
|
ensure(csrf_token is not None, "Expected CSRF Token not found in HTML content!")
|
||||||
raise AssertionError("Expected CSRF Token not found in HTML content!")
|
|
||||||
|
|
||||||
if delete_old_ads_by_title:
|
if delete_old_ads_by_title:
|
||||||
published_ads = json.loads((await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT"))["content"])["ads"]
|
published_ads = json.loads((await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT"))["content"])["ads"]
|
||||||
@@ -666,7 +704,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
LOG.warning("# Captcha present! Please solve the captcha.")
|
LOG.warning("# Captcha present! Please solve the captcha.")
|
||||||
LOG.warning("############################################")
|
LOG.warning("############################################")
|
||||||
await self.web_scroll_page_down()
|
await self.web_scroll_page_down()
|
||||||
input("Press a key to continue...")
|
input(_("Press a key to continue..."))
|
||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -722,7 +760,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
# Click continue button
|
# Click continue button
|
||||||
await self.web_click(By.XPATH, '//*[contains(@class, "ModalDialog--Actions")]//button[.//*[text()[contains(.,"Bestätigen")]]]')
|
await self.web_click(By.XPATH, '//*[contains(@class, "ModalDialog--Actions")]//button[.//*[text()[contains(.,"Bestätigen")]]]')
|
||||||
except TimeoutError as ex:
|
except TimeoutError as ex:
|
||||||
raise TimeoutError("Unable to close condition dialog") from ex
|
raise TimeoutError(_("Unable to close condition dialog!")) from ex
|
||||||
|
|
||||||
async def __set_category(self, category: str | None, ad_file:str) -> None:
|
async def __set_category(self, category: str | None, ad_file:str) -> None:
|
||||||
# click on something to trigger automatic category detection
|
# click on something to trigger automatic category detection
|
||||||
@@ -867,10 +905,10 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
if self.ads_selector in {'all', 'new'}: # explore ads overview for these two modes
|
if self.ads_selector in {'all', 'new'}: # explore ads overview for these two modes
|
||||||
LOG.info('Scanning your ad overview...')
|
LOG.info('Scanning your ad overview...')
|
||||||
own_ad_urls = await ad_extractor.extract_own_ads_urls()
|
own_ad_urls = await ad_extractor.extract_own_ads_urls()
|
||||||
LOG.info('%d ads were found!', len(own_ad_urls))
|
LOG.info('%s found.', pluralize("ad", len(own_ad_urls)))
|
||||||
|
|
||||||
if self.ads_selector == 'all': # download all of your adds
|
if self.ads_selector == 'all': # download all of your adds
|
||||||
LOG.info('Start fetch task for all your ads!')
|
LOG.info('Starting download of all ads...')
|
||||||
|
|
||||||
success_count = 0
|
success_count = 0
|
||||||
# call download function for each ad page
|
# call download function for each ad page
|
||||||
@@ -892,7 +930,7 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
# determine ad IDs from links
|
# determine ad IDs from links
|
||||||
ad_id_by_url = {url:ad_extractor.extract_ad_id_from_ad_url(url) for url in own_ad_urls}
|
ad_id_by_url = {url:ad_extractor.extract_ad_id_from_ad_url(url) for url in own_ad_urls}
|
||||||
|
|
||||||
LOG.info('Start fetch task for your unsaved ads!')
|
LOG.info("Starting download of not yet downloaded ads...")
|
||||||
new_count = 0
|
new_count = 0
|
||||||
for ad_url, ad_id in ad_id_by_url.items():
|
for ad_url, ad_id in ad_id_by_url.items():
|
||||||
# check if ad with ID already saved
|
# check if ad with ID already saved
|
||||||
@@ -903,11 +941,11 @@ class KleinanzeigenBot(WebScrapingMixin):
|
|||||||
if await ad_extractor.naviagte_to_ad_page(ad_url):
|
if await ad_extractor.naviagte_to_ad_page(ad_url):
|
||||||
await ad_extractor.download_ad(ad_id)
|
await ad_extractor.download_ad(ad_id)
|
||||||
new_count += 1
|
new_count += 1
|
||||||
LOG.info('%d new ad(s) were downloaded from your profile.', new_count)
|
LOG.info('%s were downloaded from your profile.', pluralize("new ad", new_count))
|
||||||
|
|
||||||
elif re.compile(r'\d+[,\d+]*').search(self.ads_selector): # download ad(s) with specific id(s)
|
elif re.compile(r'\d+[,\d+]*').search(self.ads_selector): # download ad(s) with specific id(s)
|
||||||
ids = [int(n) for n in self.ads_selector.split(',')]
|
ids = [int(n) for n in self.ads_selector.split(',')]
|
||||||
LOG.info('Start fetch task for the ad(s) with the id(s):')
|
LOG.info('Starting download of ad(s) with the id(s):')
|
||||||
LOG.info(' | '.join([str(ad_id) for ad_id in ids]))
|
LOG.info(' | '.join([str(ad_id) for ad_id in ids]))
|
||||||
|
|
||||||
for ad_id in ids: # call download routine for every id
|
for ad_id in ids: # call download routine for every id
|
||||||
@@ -932,7 +970,7 @@ def main(args:list[str]) -> None:
|
|||||||
|_|\_\_|\___|_|_| |_|\__,_|_| |_/___\___|_|\__, |\___|_| |_| |_.__/ \___/ \__|
|
|_|\_\_|\___|_|_| |_|\__,_|_| |_/___\___|_|\__, |\___|_| |_| |_.__/ \___/ \__|
|
||||||
|___/
|
|___/
|
||||||
https://github.com/Second-Hand-Friends/kleinanzeigen-bot
|
https://github.com/Second-Hand-Friends/kleinanzeigen-bot
|
||||||
"""), flush = True)
|
""")[1:], flush = True) # [1:] removes the first empty blank line
|
||||||
|
|
||||||
utils.configure_console_logging()
|
utils.configure_console_logging()
|
||||||
|
|
||||||
|
|||||||
@@ -9,10 +9,15 @@ import mimetypes
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Any, Final
|
from typing import Any, Final
|
||||||
|
|
||||||
|
from .i18n import get_translating_logger, pluralize
|
||||||
from .utils import is_integer, parse_decimal, save_dict
|
from .utils import is_integer, parse_decimal, save_dict
|
||||||
from .web_scraping_mixin import Browser, By, Element, Is, WebScrapingMixin
|
from .web_scraping_mixin import Browser, By, Element, Is, WebScrapingMixin
|
||||||
|
|
||||||
LOG:Final[logging.Logger] = logging.getLogger("kleinanzeigen_bot.AdExtractor")
|
__all__ = [
|
||||||
|
"AdExtractor",
|
||||||
|
]
|
||||||
|
|
||||||
|
LOG:Final[logging.Logger] = get_translating_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class AdExtractor(WebScrapingMixin):
|
class AdExtractor(WebScrapingMixin):
|
||||||
@@ -42,7 +47,7 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
|
|
||||||
new_base_dir = os.path.join(relative_directory, f'ad_{ad_id}')
|
new_base_dir = os.path.join(relative_directory, f'ad_{ad_id}')
|
||||||
if os.path.exists(new_base_dir):
|
if os.path.exists(new_base_dir):
|
||||||
LOG.info('Deleting current folder of ad...')
|
LOG.info('Deleting current folder of ad %s...', ad_id)
|
||||||
shutil.rmtree(new_base_dir)
|
shutil.rmtree(new_base_dir)
|
||||||
os.mkdir(new_base_dir)
|
os.mkdir(new_base_dir)
|
||||||
LOG.info('New directory for ad created at %s.', new_base_dir)
|
LOG.info('New directory for ad created at %s.', new_base_dir)
|
||||||
@@ -68,7 +73,7 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
image_box = await self.web_find(By.CLASS_NAME, 'galleryimage-large')
|
image_box = await self.web_find(By.CLASS_NAME, 'galleryimage-large')
|
||||||
|
|
||||||
n_images = len(await self.web_find_all(By.CSS_SELECTOR, '.galleryimage-element[data-ix]', parent = image_box))
|
n_images = len(await self.web_find_all(By.CSS_SELECTOR, '.galleryimage-element[data-ix]', parent = image_box))
|
||||||
LOG.info('Found %d images.', n_images)
|
LOG.info('Found %s.', pluralize("image", n_images))
|
||||||
|
|
||||||
img_element:Element = await self.web_find(By.CSS_SELECTOR, 'div:nth-child(1) > img', parent = image_box)
|
img_element:Element = await self.web_find(By.CSS_SELECTOR, 'div:nth-child(1) > img', parent = image_box)
|
||||||
img_fn_prefix = 'ad_' + str(ad_id) + '__img'
|
img_fn_prefix = 'ad_' + str(ad_id) + '__img'
|
||||||
@@ -97,13 +102,13 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
new_div = await self.web_find(By.CSS_SELECTOR, f'div.galleryimage-element:nth-child({img_nr + 1})')
|
new_div = await self.web_find(By.CSS_SELECTOR, f'div.galleryimage-element:nth-child({img_nr + 1})')
|
||||||
img_element = await self.web_find(By.TAG_NAME, 'img', parent = new_div)
|
img_element = await self.web_find(By.TAG_NAME, 'img', parent = new_div)
|
||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
LOG.error('NEXT button in image gallery somehow missing, abort image fetching.')
|
LOG.error('NEXT button in image gallery somehow missing, aborting image fetching.')
|
||||||
break
|
break
|
||||||
img_nr += 1
|
img_nr += 1
|
||||||
LOG.info('Downloaded %d image(s).', dl_counter)
|
LOG.info('Downloaded %s.', pluralize("image", dl_counter))
|
||||||
|
|
||||||
except TimeoutError: # some ads do not require images
|
except TimeoutError: # some ads do not require images
|
||||||
LOG.warning('No image area found. Continue without downloading images.')
|
LOG.warning('No image area found. Continuing without downloading images.')
|
||||||
|
|
||||||
return img_paths
|
return img_paths
|
||||||
|
|
||||||
@@ -153,10 +158,10 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
parent = await self.web_find(By.CSS_SELECTOR, 'div:nth-of-type(1)', parent = pagination)))
|
parent = await self.web_find(By.CSS_SELECTOR, 'div:nth-of-type(1)', parent = pagination)))
|
||||||
if n_buttons > 1:
|
if n_buttons > 1:
|
||||||
multi_page = True
|
multi_page = True
|
||||||
LOG.info('It seems like you have many ads!')
|
LOG.info('It looks like you have many ads!')
|
||||||
else:
|
else:
|
||||||
multi_page = False
|
multi_page = False
|
||||||
LOG.info('It seems like all your ads fit on one overview page.')
|
LOG.info('It looks like all your ads fit on one overview page.')
|
||||||
|
|
||||||
refs:list[str] = []
|
refs:list[str] = []
|
||||||
while True: # loop reference extraction until no more forward page
|
while True: # loop reference extraction until no more forward page
|
||||||
@@ -208,7 +213,7 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
# close (warning) popup, if given
|
# close (warning) popup, if given
|
||||||
try:
|
try:
|
||||||
await self.web_find(By.ID, 'vap-ovrly-secure')
|
await self.web_find(By.ID, 'vap-ovrly-secure')
|
||||||
LOG.warning('A popup appeared.')
|
LOG.warning('A popup appeared!')
|
||||||
await self.web_click(By.CLASS_NAME, 'mfp-close')
|
await self.web_click(By.CLASS_NAME, 'mfp-close')
|
||||||
await self.web_sleep()
|
await self.web_sleep()
|
||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
|
|||||||
206
src/kleinanzeigen_bot/i18n.py
Normal file
206
src/kleinanzeigen_bot/i18n.py
Normal file
@@ -0,0 +1,206 @@
|
|||||||
|
"""
|
||||||
|
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
||||||
|
SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||||
|
"""
|
||||||
|
import ctypes, gettext, inspect, locale, logging, os, sys
|
||||||
|
from collections.abc import Sized
|
||||||
|
from typing import Any, Final, NamedTuple
|
||||||
|
|
||||||
|
from . import resources, utils # pylint: disable=cyclic-import
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"Locale",
|
||||||
|
"get_translating_logger",
|
||||||
|
]
|
||||||
|
|
||||||
|
LOG_ROOT:Final[logging.Logger] = logging.getLogger()
|
||||||
|
LOG:Final[logging.Logger] = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class Locale(NamedTuple):
|
||||||
|
|
||||||
|
language:str # Language code (e.g., "en", "de")
|
||||||
|
region:str | None = None # Region code (e.g., "US", "DE")
|
||||||
|
encoding:str = "UTF-8" # Encoding format (e.g., "UTF-8")
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
"""
|
||||||
|
>>> str(Locale("en", "US", "UTF-8"))
|
||||||
|
'en_US.UTF-8'
|
||||||
|
>>> str(Locale("en", "US"))
|
||||||
|
'en_US.UTF-8'
|
||||||
|
>>> str(Locale("en"))
|
||||||
|
'en.UTF-8'
|
||||||
|
>>> str(Locale("de", None, "UTF-8"))
|
||||||
|
'de.UTF-8'
|
||||||
|
"""
|
||||||
|
region_part = f"_{self.region}" if self.region else ""
|
||||||
|
encoding_part = f".{self.encoding}" if self.encoding else ""
|
||||||
|
return f"{self.language}{region_part}{encoding_part}"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def of(locale_string: str) -> 'Locale':
|
||||||
|
"""
|
||||||
|
>>> Locale.of("en_US.UTF-8")
|
||||||
|
Locale(language='en', region='US', encoding='UTF-8')
|
||||||
|
>>> Locale.of("de.UTF-8")
|
||||||
|
Locale(language='de', region=None, encoding='UTF-8')
|
||||||
|
>>> Locale.of("de_DE")
|
||||||
|
Locale(language='de', region='DE', encoding='UTF-8')
|
||||||
|
>>> Locale.of("en")
|
||||||
|
Locale(language='en', region=None, encoding='UTF-8')
|
||||||
|
>>> Locale.of("en.UTF-8")
|
||||||
|
Locale(language='en', region=None, encoding='UTF-8')
|
||||||
|
"""
|
||||||
|
parts = locale_string.split(".")
|
||||||
|
language_and_region = parts[0]
|
||||||
|
encoding = parts[1].upper() if len(parts) > 1 else "UTF-8"
|
||||||
|
|
||||||
|
parts = language_and_region.split("_")
|
||||||
|
language = parts[0]
|
||||||
|
region = parts[1].upper() if len(parts) > 1 else None
|
||||||
|
|
||||||
|
return Locale(language = language, region = region, encoding = encoding)
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_locale() -> Locale:
|
||||||
|
"""
|
||||||
|
Detects the system language, returning a tuple of (language, region, encoding).
|
||||||
|
- On macOS/Linux, it uses the LANG environment variable.
|
||||||
|
- On Windows, it uses the Windows API via ctypes to get the default UI language.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(language, region, encoding): e.g. ("en", "US", "UTF-8")
|
||||||
|
"""
|
||||||
|
lang = os.environ.get("LANG", None)
|
||||||
|
|
||||||
|
if not lang and os.name == "nt": # Windows
|
||||||
|
try:
|
||||||
|
lang = locale.windows_locale.get(ctypes.windll.kernel32.GetUserDefaultUILanguage(), "en_US") # type: ignore[attr-defined,unused-ignore] # mypy
|
||||||
|
except Exception:
|
||||||
|
LOG.warning("Error detecting language on Windows", exc_info = True)
|
||||||
|
|
||||||
|
return Locale.of(lang) if lang else Locale("en", "US", "UTF-8")
|
||||||
|
|
||||||
|
|
||||||
|
_CURRENT_LOCALE: Locale = _detect_locale()
|
||||||
|
_TRANSLATIONS: dict[str, Any] | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def translate(text:object, caller: inspect.FrameInfo | None) -> str:
|
||||||
|
text = str(text)
|
||||||
|
if not caller:
|
||||||
|
return text
|
||||||
|
|
||||||
|
global _TRANSLATIONS
|
||||||
|
if _TRANSLATIONS is None:
|
||||||
|
try:
|
||||||
|
_TRANSLATIONS = utils.load_dict_from_module(resources, f"translations.{_CURRENT_LOCALE[0]}.yaml")
|
||||||
|
except FileNotFoundError:
|
||||||
|
_TRANSLATIONS = {}
|
||||||
|
|
||||||
|
if not _TRANSLATIONS:
|
||||||
|
return text
|
||||||
|
|
||||||
|
module_name = caller.frame.f_globals.get('__name__') # pylint: disable=redefined-outer-name
|
||||||
|
file_basename = os.path.splitext(os.path.basename(caller.filename))[0]
|
||||||
|
if module_name and module_name.endswith(f".{file_basename}"):
|
||||||
|
module_name = module_name[:-(len(file_basename) + 1)]
|
||||||
|
file_key = f"{file_basename}.py" if module_name == file_basename else f"{module_name}/{file_basename}.py"
|
||||||
|
translation = utils.safe_get(_TRANSLATIONS,
|
||||||
|
file_key,
|
||||||
|
caller.function,
|
||||||
|
text
|
||||||
|
)
|
||||||
|
return translation if translation else text
|
||||||
|
|
||||||
|
|
||||||
|
_original_gettext = gettext.gettext
|
||||||
|
gettext.gettext = lambda message: translate(_original_gettext(message), utils.get_caller())
|
||||||
|
for module_name, module in sys.modules.items():
|
||||||
|
if module is None or module_name in sys.builtin_module_names:
|
||||||
|
continue
|
||||||
|
if hasattr(module, '_') and getattr(module, '_') is _original_gettext:
|
||||||
|
setattr(module, '_', gettext.gettext)
|
||||||
|
if hasattr(module, 'gettext') and getattr(module, 'gettext') is _original_gettext:
|
||||||
|
setattr(module, 'gettext', gettext.gettext)
|
||||||
|
|
||||||
|
|
||||||
|
def get_translating_logger(name: str | None = None) -> logging.Logger:
|
||||||
|
|
||||||
|
class TranslatingLogger(logging.Logger):
|
||||||
|
|
||||||
|
def _log(self, level: int, msg: object, *args: Any, **kwargs: Any) -> None:
|
||||||
|
if level != logging.DEBUG: # debug messages should not be translated
|
||||||
|
msg = translate(msg, utils.get_caller(2))
|
||||||
|
super()._log(level, msg, *args, **kwargs)
|
||||||
|
|
||||||
|
logging.setLoggerClass(TranslatingLogger)
|
||||||
|
return logging.getLogger(name)
|
||||||
|
|
||||||
|
|
||||||
|
def get_current_locale() -> Locale:
|
||||||
|
return _CURRENT_LOCALE
|
||||||
|
|
||||||
|
|
||||||
|
def set_current_locale(new_locale:Locale) -> None:
|
||||||
|
global _CURRENT_LOCALE, _TRANSLATIONS
|
||||||
|
if new_locale.language != _CURRENT_LOCALE.language:
|
||||||
|
_TRANSLATIONS = None
|
||||||
|
_CURRENT_LOCALE = new_locale
|
||||||
|
|
||||||
|
|
||||||
|
def pluralize(noun:str, count:int | Sized, prefix_with_count:bool = True) -> str:
|
||||||
|
"""
|
||||||
|
>>> pluralize("field", 1)
|
||||||
|
'1 field'
|
||||||
|
>>> pluralize("field", 2)
|
||||||
|
'2 fields'
|
||||||
|
>>> pluralize("field", 2, prefix_with_count = False)
|
||||||
|
'fields'
|
||||||
|
"""
|
||||||
|
noun = translate(noun, utils.get_caller())
|
||||||
|
|
||||||
|
if isinstance(count, Sized):
|
||||||
|
count = len(count)
|
||||||
|
|
||||||
|
prefix = f"{count} " if prefix_with_count else ""
|
||||||
|
|
||||||
|
if count == 1:
|
||||||
|
return f"{prefix}{noun}"
|
||||||
|
|
||||||
|
# German
|
||||||
|
if _CURRENT_LOCALE.language == "de":
|
||||||
|
# Special cases
|
||||||
|
irregular_plurals = {
|
||||||
|
"Attribute": "Attribute",
|
||||||
|
"Bild": "Bilder",
|
||||||
|
"Feld": "Felder",
|
||||||
|
}
|
||||||
|
if noun in irregular_plurals:
|
||||||
|
return f"{prefix}{irregular_plurals[noun]}"
|
||||||
|
for singular_suffix, plural_suffix in irregular_plurals.items():
|
||||||
|
if noun.lower().endswith(singular_suffix):
|
||||||
|
pluralized = noun[:-len(singular_suffix)] + plural_suffix.lower()
|
||||||
|
return f"{prefix}{pluralized}"
|
||||||
|
|
||||||
|
# Very simplified German rules
|
||||||
|
if noun.endswith("ei"):
|
||||||
|
return f"{prefix}{noun}en" # Datei -> Dateien
|
||||||
|
if noun.endswith("e"):
|
||||||
|
return f"{prefix}{noun}n" # Blume -> Blumen
|
||||||
|
if noun.endswith(("el", "er", "en")):
|
||||||
|
return f"{prefix}{noun}" # Keller -> Keller
|
||||||
|
if noun[-1] in "aeiou":
|
||||||
|
return f"{prefix}{noun}s" # Auto -> Autos
|
||||||
|
return f"{prefix}{noun}e" # Hund -> Hunde
|
||||||
|
|
||||||
|
# English
|
||||||
|
if len(noun) < 2:
|
||||||
|
return f"{prefix}{noun}s"
|
||||||
|
if noun.endswith(('s', 'sh', 'ch', 'x', 'z')):
|
||||||
|
return f"{prefix}{noun}es"
|
||||||
|
if noun.endswith('y') and noun[-2].lower() not in "aeiou":
|
||||||
|
return f"{prefix}{noun[:-1]}ies"
|
||||||
|
return f"{prefix}{noun}s"
|
||||||
201
src/kleinanzeigen_bot/resources/translations.de.yaml
Normal file
201
src/kleinanzeigen_bot/resources/translations.de.yaml
Normal file
@@ -0,0 +1,201 @@
|
|||||||
|
#################################################
|
||||||
|
getopt.py:
|
||||||
|
#################################################
|
||||||
|
do_longs:
|
||||||
|
"option --%s requires argument": "Option --%s benötigt ein Argument"
|
||||||
|
"option --%s must not have an argument": "Option --%s darf kein Argument haben"
|
||||||
|
long_has_args:
|
||||||
|
"option --%s not recognized": "Option --%s unbekannt"
|
||||||
|
"option --%s not a unique prefix": "Option --%s ist kein eindeutiger Prefix"
|
||||||
|
do_shorts:
|
||||||
|
"option -%s requires argument": "Option -%s benötigt ein Argument"
|
||||||
|
short_has_arg:
|
||||||
|
"option -%s not recognized": "Option -%s unbekannt"
|
||||||
|
|
||||||
|
|
||||||
|
#################################################
|
||||||
|
kleinanzeigen_bot/__init__.py:
|
||||||
|
#################################################
|
||||||
|
run:
|
||||||
|
"DONE: No configuration errors found.": "FERTIG: Keine Konfigurationsfehler gefunden."
|
||||||
|
'You provided no ads selector. Defaulting to "due".': 'Es wurden keine Anzeigen-Selektor angegeben. Es wird "due" verwendet.'
|
||||||
|
"DONE: No new/outdated ads found.": "FERTIG: Keine neuen/veralteten Anzeigen gefunden."
|
||||||
|
"DONE: No ads to delete found.": "FERTIG: Keine zu löschnenden Anzeigen gefunden."
|
||||||
|
'You provided no ads selector. Defaulting to "new".': 'Es wurden keine Anzeigen-Selektor angegeben. Es wird "new" verwendet.'
|
||||||
|
"Unknown command: %s" : "Unbekannter Befehl: %s"
|
||||||
|
|
||||||
|
show_help:
|
||||||
|
"Usage:": "Verwendung:"
|
||||||
|
"COMMAND [OPTIONS]" : "BEFEHL [OPTIONEN]"
|
||||||
|
"Commands:": "Befehle"
|
||||||
|
|
||||||
|
parse_args:
|
||||||
|
"Use --help to display available options.": "Mit --help können die verfügbaren Optionen angezeigt werden."
|
||||||
|
"More than one command given: %s": "Mehr als ein Befehl angegeben: %s"
|
||||||
|
|
||||||
|
configure_file_logging:
|
||||||
|
"Logging to [%s]...": "Protokollierung in [%s]..."
|
||||||
|
"App version: %s": "App Version: %s"
|
||||||
|
"Python version: %s": "Python Version: %s"
|
||||||
|
|
||||||
|
load_ads:
|
||||||
|
"Searching for ad config files...": "Suche nach Anzeigendateien..."
|
||||||
|
" -> found %s": "-> %s gefunden"
|
||||||
|
"ad config file": "Anzeigendatei"
|
||||||
|
"Start fetch task for the ad(s) with id(s):": "Starte Abrufaufgabe für die Anzeige(n) mit ID(s):"
|
||||||
|
" -> SKIPPED: inactive ad [%s]": " -> ÜBERSPRUNGEN: inaktive Anzeige [%s]"
|
||||||
|
" -> SKIPPED: ad [%s] is not in list of given ids.": " -> ÜBERSPRUNGEN: Anzeige [%s] ist nicht in der Liste der angegebenen IDs."
|
||||||
|
" -> SKIPPED: ad [%s] is not new. already has an id assigned.":
|
||||||
|
" -> ÜBERSPRUNGEN: Anzeige [%s] ist nicht neu. Eine ID wurde bereits zugewiesen."
|
||||||
|
" -> SKIPPED: ad [%s] was last published %d days ago. republication is only required every %s days":
|
||||||
|
" -> ÜBERSPRUNGEN: Anzeige [%s] wurde zuletzt vor %d Tagen veröffentlicht. Eine erneute Veröffentlichung ist nur alle %s Tage erforderlich."
|
||||||
|
"Loaded %s": "%s geladen"
|
||||||
|
"ad": "Anzeige"
|
||||||
|
|
||||||
|
load_config:
|
||||||
|
" -> found %s": "-> %s gefunden"
|
||||||
|
"category": "Kategorie"
|
||||||
|
"config": "Konfiguration"
|
||||||
|
|
||||||
|
login:
|
||||||
|
"Checking if already logged in...": "Überprüfe, ob bereits eingeloggt..."
|
||||||
|
"Already logged in as [%s]. Skipping login.": "Bereits eingeloggt als [%s]. Überspringe Anmeldung."
|
||||||
|
"Opening login page...": "Öffne Anmeldeseite..."
|
||||||
|
"Captcha present! Please solve the captcha.": "Captcha vorhanden! Bitte lösen Sie das Captcha."
|
||||||
|
|
||||||
|
handle_after_login_logic:
|
||||||
|
"# Device verification message detected. Please follow the instruction displayed in the Browser.":
|
||||||
|
"# Nachricht zur Geräteverifizierung erkannt. Bitte den Anweisungen im Browser folgen."
|
||||||
|
"Press ENTER when done...": "EINGABETASTE drücken, wenn erledigt..."
|
||||||
|
"Handling GDPR disclaimer...": "Verarbeite DSGVO-Hinweis..."
|
||||||
|
|
||||||
|
delete_ads:
|
||||||
|
"Processing %s/%s: '%s' from [%s]...": "Verarbeite %s/%s: '%s' von [%s]..."
|
||||||
|
"DONE: Deleted %s": "FERTIG: %s gelöscht"
|
||||||
|
"ad": "Anzeige"
|
||||||
|
|
||||||
|
delete_ad:
|
||||||
|
"Deleting ad '%s' if already present...": "Lösche Anzeige '%s', falls bereits vorhanden..."
|
||||||
|
"Expected CSRF Token not found in HTML content!": "Erwartetes CSRF-Token wurde im HTML-Inhalt nicht gefunden!"
|
||||||
|
|
||||||
|
publish_ads:
|
||||||
|
"Processing %s/%s: '%s' from [%s]...": "Verarbeite %s/%s: '%s' von [%s]..."
|
||||||
|
"DONE: (Re-)published %s": "FERTIG: %s (erneut) veröffentlicht"
|
||||||
|
"ad": "Anzeige"
|
||||||
|
|
||||||
|
publish_ad:
|
||||||
|
"Publishing ad '%s'...": "Veröffentliche Anzeige '%s'..."
|
||||||
|
"Failed to set shipping attribute for type '%s'!": "Fehler beim setzen des Versandattributs für den Typ '%s'!"
|
||||||
|
"# Captcha present! Please solve the captcha.": "# Captcha vorhanden! Bitte lösen Sie das Captcha."
|
||||||
|
"Press a key to continue...": "Eine Taste drücken, um fortzufahren..."
|
||||||
|
" -> SUCCESS: ad published with ID %s": " -> ERFOLG: Anzeige mit ID %s veröffentlicht"
|
||||||
|
|
||||||
|
__set_condition:
|
||||||
|
"Unable to close condition dialog!": "Kann den Dialog für Artikelzustand nicht schließen!"
|
||||||
|
|
||||||
|
__upload_images:
|
||||||
|
" -> found %s": "-> %s gefunden"
|
||||||
|
"image": "Bild"
|
||||||
|
" -> uploading image [%s]": " -> Lade Bild [%s] hoch"
|
||||||
|
|
||||||
|
download_ads:
|
||||||
|
"Scanning your ad overview...": "Scanne Anzeigenübersicht..."
|
||||||
|
'%s found!': '%s gefunden.'
|
||||||
|
"ad": "Anzeige"
|
||||||
|
"Starting download of all ads...": "Starte den Download aller Anzeigen..."
|
||||||
|
'%d of %d ads were downloaded from your profile.': '%d von %d Anzeigen wurden aus Ihrem Profil heruntergeladen.'
|
||||||
|
"Starting download of not yet downloaded ads...": "Starte den Download noch nicht heruntergeladener Anzeigen..."
|
||||||
|
'The ad with id %d has already been saved.': 'Die Anzeige mit der ID %d wurde bereits gespeichert.'
|
||||||
|
'%s were downloaded from your profile.': '%s wurden aus Ihrem Profil heruntergeladen.'
|
||||||
|
"new ad": "neue Anzeige"
|
||||||
|
'Starting download of ad(s) with the id(s):': 'Starte Download der Anzeige(n) mit den ID(s):'
|
||||||
|
'Downloaded ad with id %d': 'Anzeige mit der ID %d heruntergeladen'
|
||||||
|
'The page with the id %d does not exist!': 'Die Seite mit der ID %d existiert nicht!'
|
||||||
|
|
||||||
|
|
||||||
|
#################################################
|
||||||
|
kleinanzeigen_bot/extract.py:
|
||||||
|
#################################################
|
||||||
|
download_ad:
|
||||||
|
"Created ads directory at ./%s.": "Verzeichnis für Anzeigen erstellt unter ./%s."
|
||||||
|
"Deleting current folder of ad %s...": "Lösche aktuellen Ordner der Anzeige %s..."
|
||||||
|
"New directory for ad created at %s.": "Neues Verzeichnis für Anzeige erstellt unter %s."
|
||||||
|
|
||||||
|
_download_images_from_ad_page:
|
||||||
|
"Found %s.": "%s gefunden."
|
||||||
|
"NEXT button in image gallery is missing, aborting image fetching.":
|
||||||
|
"NEXT-Schaltfläche in der Bildergalerie fehlt, Bildabruf abgebrochen."
|
||||||
|
"Downloaded %s.": "%s heruntergeladen."
|
||||||
|
"No image area found. Continue without downloading images.":
|
||||||
|
"Kein Bildbereich gefunden. Fahre fort ohne Bilder herunterzuladen."
|
||||||
|
|
||||||
|
extract_ad_id_from_ad_url:
|
||||||
|
"The ad ID could not be extracted from the given URL %s":
|
||||||
|
"Die Anzeigen-ID konnte nicht aus der angegebenen URL extrahiert werden: %s"
|
||||||
|
|
||||||
|
extract_own_ads_urls:
|
||||||
|
"There are currently no ads on your profile!": "Derzeit gibt es keine Anzeigen auf deinem Profil!"
|
||||||
|
"It looks like you have many ads!": "Es scheint viele Anzeigen zu geben!"
|
||||||
|
"It looks like all your ads fit on one overview page.": "Alle Anzeigen scheinen auf eine Übersichtsseite zu passen."
|
||||||
|
"Last ad overview page explored.": "Letzte Anzeigenübersichtsseite gesichtet."
|
||||||
|
|
||||||
|
naviagte_to_ad_page:
|
||||||
|
"There is no ad under the given ID.": "Es gibt keine Anzeige unter der angegebenen ID."
|
||||||
|
"A popup appeared!": "Ein Popup ist erschienen!"
|
||||||
|
|
||||||
|
_extract_ad_page_info:
|
||||||
|
'Extracting information from ad with title \"%s\"': 'Extrahiere Informationen aus der Anzeige mit dem Titel "%s"'
|
||||||
|
|
||||||
|
_extract_contact_from_ad_page:
|
||||||
|
'No street given in the contact.': 'Keine Straße in den Kontaktdaten angegeben.'
|
||||||
|
|
||||||
|
#################################################
|
||||||
|
kleinanzeigen_bot/utils.py:
|
||||||
|
#################################################
|
||||||
|
format:
|
||||||
|
"ERROR": "FEHLER"
|
||||||
|
"WARNING": "WARNUNG"
|
||||||
|
"CRITICAL": "KRITISCH"
|
||||||
|
|
||||||
|
load_dict_if_exists:
|
||||||
|
"Loading %s[%s]...": "Lade %s[%s]..."
|
||||||
|
" from ": " aus "
|
||||||
|
'Unsupported file type. The file name "%s" must end with *.json, *.yaml, or *.yml':
|
||||||
|
'Nicht unterstützter Dateityp. Der Dateiname "%s" muss mit *.json, *.yaml oder *.yml enden.'
|
||||||
|
|
||||||
|
save_dict:
|
||||||
|
"Saving [%s]...": "Speichere [%s]..."
|
||||||
|
|
||||||
|
on_sigint:
|
||||||
|
"Aborted on user request.": "Auf Benutzerwunsch abgebrochen."
|
||||||
|
|
||||||
|
|
||||||
|
#################################################
|
||||||
|
kleinanzeigen_bot/web_scraping_mixin.py:
|
||||||
|
#################################################
|
||||||
|
create_browser_session:
|
||||||
|
"Creating Browser session...": "Erstelle Browsersitzung..."
|
||||||
|
" -> Browser binary location: %s": " -> Speicherort der Browser-Binärdatei: %s"
|
||||||
|
"Using existing browser process at %s:%s": "Verwende bestehenden Browser-Prozess unter %s:%s"
|
||||||
|
"New Browser session is %s": "Neue Browsersitzung ist %s"
|
||||||
|
" -> Browser profile name: %s": " -> Browser-Profilname: %s"
|
||||||
|
" -> Custom Browser argument: %s": " -> Benutzerdefiniertes Browser-Argument: %s"
|
||||||
|
" -> Browser user data dir: %s": " -> Benutzerdatenverzeichnis des Browsers: %s"
|
||||||
|
" -> Setting chrome prefs [%s]...": " -> Setze Chrome-Einstellungen [%s]..."
|
||||||
|
" -> Adding Browser extension: [%s]": " -> Füge Browser-Erweiterung hinzu: [%s]"
|
||||||
|
|
||||||
|
get_compatible_browser:
|
||||||
|
"Installed browser for OS %s could not be detected": "Installierter Browser für OS %s konnte nicht erkannt werden"
|
||||||
|
"Installed browser could not be detected": "Installierter Browser konnte nicht erkannt werden"
|
||||||
|
|
||||||
|
web_check:
|
||||||
|
"Unsupported attribute: %s": "Nicht unterstütztes Attribut: %s"
|
||||||
|
|
||||||
|
web_find:
|
||||||
|
"Unsupported selector type: %s": "Nicht unterstützter Selektortyp: %s"
|
||||||
|
|
||||||
|
web_find_all:
|
||||||
|
"Unsupported selector type: %s": "Nicht unterstützter Selektortyp: %s"
|
||||||
|
|
||||||
|
web_sleep:
|
||||||
|
" ... pausing for %d ms ...": " ... pausiere für %d ms ..."
|
||||||
@@ -3,18 +3,20 @@ SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
|||||||
SPDX-License-Identifier: AGPL-3.0-or-later
|
SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||||
"""
|
"""
|
||||||
import asyncio, copy, decimal, json, logging, os, re, socket, sys, traceback, time
|
import asyncio, copy, decimal, inspect, json, logging, os, re, socket, sys, traceback, time
|
||||||
from importlib.resources import read_text as get_resource_as_string
|
from importlib.resources import read_text as get_resource_as_string
|
||||||
from collections.abc import Callable, Sized
|
from collections.abc import Callable
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from gettext import gettext as _
|
||||||
from types import FrameType, ModuleType, TracebackType
|
from types import FrameType, ModuleType, TracebackType
|
||||||
from typing import Any, Final, TypeVar
|
from typing import Any, Final, TypeVar
|
||||||
|
|
||||||
import coloredlogs
|
import coloredlogs
|
||||||
from ruamel.yaml import YAML
|
from ruamel.yaml import YAML
|
||||||
|
from .i18n import get_translating_logger
|
||||||
|
|
||||||
LOG_ROOT:Final[logging.Logger] = logging.getLogger()
|
LOG_ROOT:Final[logging.Logger] = logging.getLogger()
|
||||||
LOG:Final[logging.Logger] = logging.getLogger("kleinanzeigen_bot.utils")
|
LOG:Final[logging.Logger] = get_translating_logger(__name__)
|
||||||
|
|
||||||
# https://mypy.readthedocs.io/en/stable/generics.html#generic-functions
|
# https://mypy.readthedocs.io/en/stable/generics.html#generic-functions
|
||||||
T = TypeVar('T')
|
T = TypeVar('T')
|
||||||
@@ -45,7 +47,7 @@ def ensure(condition:Any | bool | Callable[[], bool], error_message:str, timeout
|
|||||||
if not isinstance(condition, Callable): # type: ignore[arg-type] # https://github.com/python/mypy/issues/6864
|
if not isinstance(condition, Callable): # type: ignore[arg-type] # https://github.com/python/mypy/issues/6864
|
||||||
if condition:
|
if condition:
|
||||||
return
|
return
|
||||||
raise AssertionError(error_message)
|
raise AssertionError(_(error_message))
|
||||||
|
|
||||||
if timeout < 0:
|
if timeout < 0:
|
||||||
raise AssertionError("[timeout] must be >= 0")
|
raise AssertionError("[timeout] must be >= 0")
|
||||||
@@ -56,10 +58,21 @@ def ensure(condition:Any | bool | Callable[[], bool], error_message:str, timeout
|
|||||||
while not condition(): # type: ignore[operator]
|
while not condition(): # type: ignore[operator]
|
||||||
elapsed = time.time() - start_at
|
elapsed = time.time() - start_at
|
||||||
if elapsed >= timeout:
|
if elapsed >= timeout:
|
||||||
raise AssertionError(error_message)
|
raise AssertionError(_(error_message))
|
||||||
time.sleep(poll_requency)
|
time.sleep(poll_requency)
|
||||||
|
|
||||||
|
|
||||||
|
def get_caller(depth: int = 1) -> inspect.FrameInfo | None:
|
||||||
|
stack = inspect.stack()
|
||||||
|
try:
|
||||||
|
for frame in stack[depth + 1:]:
|
||||||
|
if frame.function and frame.function != "<lambda>":
|
||||||
|
return frame
|
||||||
|
return None
|
||||||
|
finally:
|
||||||
|
del stack # Clean up the stack to avoid reference cycles
|
||||||
|
|
||||||
|
|
||||||
def is_frozen() -> bool:
|
def is_frozen() -> bool:
|
||||||
"""
|
"""
|
||||||
>>> is_frozen()
|
>>> is_frozen()
|
||||||
@@ -140,17 +153,30 @@ def safe_get(a_map:dict[Any, Any], *keys:str) -> Any:
|
|||||||
|
|
||||||
|
|
||||||
def configure_console_logging() -> None:
|
def configure_console_logging() -> None:
|
||||||
|
|
||||||
|
class LevelTranslatingFormatter(coloredlogs.ColoredFormatter): # type: ignore
|
||||||
|
|
||||||
|
def format(self, record:logging.LogRecord) -> str:
|
||||||
|
msg:str = super().format(record)
|
||||||
|
if record.levelno > logging.DEBUG:
|
||||||
|
levelname = _(record.levelname)
|
||||||
|
if levelname != record.levelname:
|
||||||
|
msg = msg.replace(record.levelname, levelname, 1)
|
||||||
|
return msg
|
||||||
|
|
||||||
|
formatter = LevelTranslatingFormatter("[%(levelname)s] %(message)s")
|
||||||
|
|
||||||
stdout_log = logging.StreamHandler(sys.stderr)
|
stdout_log = logging.StreamHandler(sys.stderr)
|
||||||
stdout_log.setLevel(logging.DEBUG)
|
stdout_log.setLevel(logging.DEBUG)
|
||||||
stdout_log.setFormatter(coloredlogs.ColoredFormatter("[%(levelname)s] %(message)s"))
|
stdout_log.addFilter(type("", (logging.Filter,), {
|
||||||
stdout_log.addFilter(type("", (logging.Filter,), { # pyright: ignore
|
|
||||||
"filter": lambda rec: rec.levelno <= logging.INFO
|
"filter": lambda rec: rec.levelno <= logging.INFO
|
||||||
}))
|
}))
|
||||||
|
stdout_log.setFormatter(formatter)
|
||||||
LOG_ROOT.addHandler(stdout_log)
|
LOG_ROOT.addHandler(stdout_log)
|
||||||
|
|
||||||
stderr_log = logging.StreamHandler(sys.stderr)
|
stderr_log = logging.StreamHandler(sys.stderr)
|
||||||
stderr_log.setLevel(logging.WARNING)
|
stderr_log.setLevel(logging.WARNING)
|
||||||
stderr_log.setFormatter(coloredlogs.ColoredFormatter("[%(levelname)s] %(message)s"))
|
stderr_log.setFormatter(formatter)
|
||||||
LOG_ROOT.addHandler(stderr_log)
|
LOG_ROOT.addHandler(stderr_log)
|
||||||
|
|
||||||
|
|
||||||
@@ -175,29 +201,6 @@ def on_sigint(_sig:int, _frame:FrameType | None) -> None:
|
|||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
|
|
||||||
def pluralize(noun:str, count:int | Sized, prefix_with_count:bool = True) -> str:
|
|
||||||
"""
|
|
||||||
>>> pluralize("field", 1)
|
|
||||||
'1 field'
|
|
||||||
>>> pluralize("field", 2)
|
|
||||||
'2 fields'
|
|
||||||
>>> pluralize("field", 2, prefix_with_count = False)
|
|
||||||
'fields'
|
|
||||||
"""
|
|
||||||
if isinstance(count, Sized):
|
|
||||||
count = len(count)
|
|
||||||
|
|
||||||
prefix = f"{count} " if prefix_with_count else ""
|
|
||||||
|
|
||||||
if count == 1:
|
|
||||||
return f"{prefix}{noun}"
|
|
||||||
if noun.endswith('s') or noun.endswith('sh') or noun.endswith('ch') or noun.endswith('x') or noun.endswith('z'):
|
|
||||||
return f"{prefix}{noun}es"
|
|
||||||
if noun.endswith('y'):
|
|
||||||
return f"{prefix}{noun[:-1]}ies"
|
|
||||||
return f"{prefix}{noun}s"
|
|
||||||
|
|
||||||
|
|
||||||
def load_dict(filepath:str, content_label:str = "") -> dict[str, Any]:
|
def load_dict(filepath:str, content_label:str = "") -> dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
:raises FileNotFoundError
|
:raises FileNotFoundError
|
||||||
@@ -209,12 +212,12 @@ def load_dict(filepath:str, content_label:str = "") -> dict[str, Any]:
|
|||||||
|
|
||||||
|
|
||||||
def load_dict_if_exists(filepath:str, content_label:str = "") -> dict[str, Any] | None:
|
def load_dict_if_exists(filepath:str, content_label:str = "") -> dict[str, Any] | None:
|
||||||
filepath = os.path.abspath(filepath)
|
abs_filepath = os.path.abspath(filepath)
|
||||||
LOG.info("Loading %s[%s]...", content_label and content_label + " from " or "", filepath)
|
LOG.info("Loading %s[%s]...", content_label and content_label + _(" from ") or "", abs_filepath)
|
||||||
|
|
||||||
_, file_ext = os.path.splitext(filepath)
|
__, file_ext = os.path.splitext(filepath)
|
||||||
if file_ext not in [".json", ".yaml", ".yml"]:
|
if file_ext not in (".json", ".yaml", ".yml"):
|
||||||
raise ValueError(f'Unsupported file type. The file name "{filepath}" must end with *.json, *.yaml, or *.yml')
|
raise ValueError(_('Unsupported file type. The filename "%s" must end with *.json, *.yaml, or *.yml') % filepath)
|
||||||
|
|
||||||
if not os.path.exists(filepath):
|
if not os.path.exists(filepath):
|
||||||
return None
|
return None
|
||||||
@@ -229,9 +232,9 @@ def load_dict_from_module(module:ModuleType, filename:str, content_label:str = "
|
|||||||
"""
|
"""
|
||||||
LOG.debug("Loading %s[%s.%s]...", content_label and content_label + " from " or "", module.__name__, filename)
|
LOG.debug("Loading %s[%s.%s]...", content_label and content_label + " from " or "", module.__name__, filename)
|
||||||
|
|
||||||
_, file_ext = os.path.splitext(filename)
|
__, file_ext = os.path.splitext(filename)
|
||||||
if file_ext not in (".json", ".yaml", ".yml"):
|
if file_ext not in (".json", ".yaml", ".yml"):
|
||||||
raise ValueError(f'Unsupported file type. The file name "{filename}" must end with *.json, *.yaml, or *.yml')
|
raise ValueError(f'Unsupported file type. The filename "{filename}" must end with *.json, *.yaml, or *.yml')
|
||||||
|
|
||||||
content = get_resource_as_string(module, filename) # pylint: disable=deprecated-method
|
content = get_resource_as_string(module, filename) # pylint: disable=deprecated-method
|
||||||
return json.loads(content) if filename.endswith(".json") else YAML().load(content) # type: ignore[no-any-return] # mypy
|
return json.loads(content) if filename.endswith(".json") else YAML().load(content) # type: ignore[no-any-return] # mypy
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanze
|
|||||||
"""
|
"""
|
||||||
import asyncio, enum, inspect, json, logging, os, platform, secrets, shutil, time
|
import asyncio, enum, inspect, json, logging, os, platform, secrets, shutil, time
|
||||||
from collections.abc import Callable, Coroutine, Iterable
|
from collections.abc import Callable, Coroutine, Iterable
|
||||||
|
from gettext import gettext as _
|
||||||
from typing import cast, Any, Final
|
from typing import cast, Any, Final
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -18,12 +19,9 @@ from nodriver.core.config import Config
|
|||||||
from nodriver.core.element import Element
|
from nodriver.core.element import Element
|
||||||
from nodriver.core.tab import Tab as Page
|
from nodriver.core.tab import Tab as Page
|
||||||
|
|
||||||
|
from .i18n import get_translating_logger
|
||||||
from .utils import ensure, is_port_open, T
|
from .utils import ensure, is_port_open, T
|
||||||
|
|
||||||
|
|
||||||
LOG:Final[logging.Logger] = logging.getLogger("kleinanzeigen_bot.selenium_mixin")
|
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"Browser",
|
"Browser",
|
||||||
"BrowserConfig",
|
"BrowserConfig",
|
||||||
@@ -31,9 +29,11 @@ __all__ = [
|
|||||||
"Element",
|
"Element",
|
||||||
"Page",
|
"Page",
|
||||||
"Is",
|
"Is",
|
||||||
"WebScrapingMixin"
|
"WebScrapingMixin",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
LOG:Final[logging.Logger] = get_translating_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class By(enum.Enum):
|
class By(enum.Enum):
|
||||||
ID = enum.auto()
|
ID = enum.auto()
|
||||||
@@ -77,7 +77,7 @@ class WebScrapingMixin:
|
|||||||
ensure(os.path.exists(self.browser_config.binary_location), f"Specified browser binary [{self.browser_config.binary_location}] does not exist.")
|
ensure(os.path.exists(self.browser_config.binary_location), f"Specified browser binary [{self.browser_config.binary_location}] does not exist.")
|
||||||
else:
|
else:
|
||||||
self.browser_config.binary_location = self.get_compatible_browser()
|
self.browser_config.binary_location = self.get_compatible_browser()
|
||||||
LOG.info(" -> Chrome binary location: %s", self.browser_config.binary_location)
|
LOG.info(" -> Browser binary location: %s", self.browser_config.binary_location)
|
||||||
|
|
||||||
########################################################
|
########################################################
|
||||||
# check if an existing browser instance shall be used...
|
# check if an existing browser instance shall be used...
|
||||||
@@ -92,9 +92,9 @@ class WebScrapingMixin:
|
|||||||
|
|
||||||
if remote_port > 0:
|
if remote_port > 0:
|
||||||
LOG.info("Using existing browser process at %s:%s", remote_host, remote_port)
|
LOG.info("Using existing browser process at %s:%s", remote_host, remote_port)
|
||||||
if not is_port_open(remote_host, remote_port):
|
ensure(is_port_open(remote_host, remote_port),
|
||||||
raise AssertionError(f"Browser process not reachable at {remote_host}:{remote_port}. "
|
f"Browser process not reachable at {remote_host}:{remote_port}. " +
|
||||||
+ f"Start the browser with --remote-debugging-port={remote_port} or remove this port from your config.yaml")
|
f"Start the browser with --remote-debugging-port={remote_port} or remove this port from your config.yaml")
|
||||||
cfg = Config(
|
cfg = Config(
|
||||||
browser_executable_path = self.browser_config.binary_location # actually not necessary but nodriver fails without
|
browser_executable_path = self.browser_config.binary_location # actually not necessary but nodriver fails without
|
||||||
)
|
)
|
||||||
@@ -140,7 +140,7 @@ class WebScrapingMixin:
|
|||||||
browser_args.append(f"--profile-directory={self.browser_config.profile_name}")
|
browser_args.append(f"--profile-directory={self.browser_config.profile_name}")
|
||||||
|
|
||||||
for browser_arg in self.browser_config.arguments:
|
for browser_arg in self.browser_config.arguments:
|
||||||
LOG.info(" -> Custom Chrome argument: %s", browser_arg)
|
LOG.info(" -> Custom Browser argument: %s", browser_arg)
|
||||||
browser_args.append(browser_arg)
|
browser_args.append(browser_arg)
|
||||||
|
|
||||||
if not LOG.isEnabledFor(logging.DEBUG):
|
if not LOG.isEnabledFor(logging.DEBUG):
|
||||||
@@ -163,8 +163,8 @@ class WebScrapingMixin:
|
|||||||
os.makedirs(profile_dir, exist_ok = True)
|
os.makedirs(profile_dir, exist_ok = True)
|
||||||
prefs_file = os.path.join(profile_dir, "Preferences")
|
prefs_file = os.path.join(profile_dir, "Preferences")
|
||||||
if not os.path.exists(prefs_file):
|
if not os.path.exists(prefs_file):
|
||||||
LOG.info("-> Setting chrome prefs [%s]...", prefs_file)
|
LOG.info(" -> Setting chrome prefs [%s]...", prefs_file)
|
||||||
with open(prefs_file, "w", encoding='UTF-8') as fd:
|
with open(prefs_file, "w", encoding = 'UTF-8') as fd:
|
||||||
json.dump({
|
json.dump({
|
||||||
"credentials_enable_service": False,
|
"credentials_enable_service": False,
|
||||||
"enable_do_not_track": True,
|
"enable_do_not_track": True,
|
||||||
@@ -195,7 +195,7 @@ class WebScrapingMixin:
|
|||||||
|
|
||||||
# load extensions
|
# load extensions
|
||||||
for crx_extension in self.browser_config.extensions:
|
for crx_extension in self.browser_config.extensions:
|
||||||
LOG.info(" -> Adding extension: [%s]", crx_extension)
|
LOG.info(" -> Adding Browser extension: [%s]", crx_extension)
|
||||||
ensure(os.path.exists(crx_extension), f"Configured extension-file [{crx_extension}] does not exist.")
|
ensure(os.path.exists(crx_extension), f"Configured extension-file [{crx_extension}] does not exist.")
|
||||||
cfg.add_extension(crx_extension)
|
cfg.add_extension(crx_extension)
|
||||||
|
|
||||||
@@ -250,15 +250,15 @@ class WebScrapingMixin:
|
|||||||
]
|
]
|
||||||
|
|
||||||
case _ as os_name:
|
case _ as os_name:
|
||||||
raise AssertionError(f"Installed browser for OS [{os_name}] could not be detected")
|
raise AssertionError(_("Installed browser for OS %s could not be detected") % os_name)
|
||||||
|
|
||||||
for browser_path in browser_paths:
|
for browser_path in browser_paths:
|
||||||
if browser_path and os.path.isfile(browser_path):
|
if browser_path and os.path.isfile(browser_path):
|
||||||
return browser_path
|
return browser_path
|
||||||
|
|
||||||
raise AssertionError("Installed browser could not be detected")
|
raise AssertionError(_("Installed browser could not be detected"))
|
||||||
|
|
||||||
async def web_await(self, condition: Callable[[], T | Never | Coroutine[Any,Any,T | Never]], *,
|
async def web_await(self, condition: Callable[[], T | Never | Coroutine[Any, Any, T | Never]], *,
|
||||||
timeout:int | float = 5, timeout_error_message: str = "") -> T:
|
timeout:int | float = 5, timeout_error_message: str = "") -> T:
|
||||||
"""
|
"""
|
||||||
Blocks/waits until the given condition is met.
|
Blocks/waits until the given condition is met.
|
||||||
@@ -307,6 +307,7 @@ class WebScrapingMixin:
|
|||||||
&& element.offsetHeight > 0
|
&& element.offsetHeight > 0
|
||||||
}
|
}
|
||||||
"""))
|
"""))
|
||||||
|
|
||||||
elem:Element = await self.web_find(selector_type, selector_value, timeout = timeout)
|
elem:Element = await self.web_find(selector_type, selector_value, timeout = timeout)
|
||||||
|
|
||||||
match attr:
|
match attr:
|
||||||
@@ -329,7 +330,7 @@ class WebScrapingMixin:
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
"""))
|
"""))
|
||||||
raise AssertionError(f"Unsupported attribute: {attr}")
|
raise AssertionError(_("Unsupported attribute: %s") % attr)
|
||||||
|
|
||||||
async def web_click(self, selector_type:By, selector_value:str, *, timeout:int | float = 5) -> Element:
|
async def web_click(self, selector_type:By, selector_value:str, *, timeout:int | float = 5) -> Element:
|
||||||
"""
|
"""
|
||||||
@@ -380,21 +381,19 @@ class WebScrapingMixin:
|
|||||||
timeout = timeout,
|
timeout = timeout,
|
||||||
timeout_error_message = f"No HTML element found using CSS selector '{selector_value}' within {timeout} seconds.")
|
timeout_error_message = f"No HTML element found using CSS selector '{selector_value}' within {timeout} seconds.")
|
||||||
case By.TEXT:
|
case By.TEXT:
|
||||||
if parent:
|
ensure(not parent, f"Specifying a parent element currently not supported with selector type: {selector_type}")
|
||||||
raise AssertionError(f"Specifying a parent element currently not supported with selector type: {selector_type}")
|
|
||||||
return await self.web_await(
|
return await self.web_await(
|
||||||
lambda: self.page.find_element_by_text(selector_value, True),
|
lambda: self.page.find_element_by_text(selector_value, True),
|
||||||
timeout = timeout,
|
timeout = timeout,
|
||||||
timeout_error_message = f"No HTML element found containing text '{selector_value}' within {timeout} seconds.")
|
timeout_error_message = f"No HTML element found containing text '{selector_value}' within {timeout} seconds.")
|
||||||
case By.XPATH:
|
case By.XPATH:
|
||||||
if parent:
|
ensure(not parent, f"Specifying a parent element currently not supported with selector type: {selector_type}")
|
||||||
raise AssertionError(f"Specifying a parent element currently not supported with selector type: {selector_type}")
|
|
||||||
return await self.web_await(
|
return await self.web_await(
|
||||||
lambda: self.page.find_element_by_text(selector_value, True),
|
lambda: self.page.find_element_by_text(selector_value, True),
|
||||||
timeout = timeout,
|
timeout = timeout,
|
||||||
timeout_error_message = f"No HTML element found using XPath '{selector_value}' within {timeout} seconds.")
|
timeout_error_message = f"No HTML element found using XPath '{selector_value}' within {timeout} seconds.")
|
||||||
|
|
||||||
raise AssertionError(f"Unsupported selector type: {selector_type}")
|
raise AssertionError(_("Unsupported selector type: %s") % selector_type)
|
||||||
|
|
||||||
async def web_find_all(self, selector_type:By, selector_value:str, *, parent:Element = None, timeout:int | float = 5) -> list[Element]:
|
async def web_find_all(self, selector_type:By, selector_value:str, *, parent:Element = None, timeout:int | float = 5) -> list[Element]:
|
||||||
"""
|
"""
|
||||||
@@ -420,21 +419,19 @@ class WebScrapingMixin:
|
|||||||
timeout = timeout,
|
timeout = timeout,
|
||||||
timeout_error_message = f"No HTML elements found of tag <{selector_value}> within {timeout} seconds.")
|
timeout_error_message = f"No HTML elements found of tag <{selector_value}> within {timeout} seconds.")
|
||||||
case By.TEXT:
|
case By.TEXT:
|
||||||
if parent:
|
ensure(not parent, f"Specifying a parent element currently not supported with selector type: {selector_type}")
|
||||||
raise AssertionError(f"Specifying a parent element currently not supported with selector type: {selector_type}")
|
|
||||||
return await self.web_await(
|
return await self.web_await(
|
||||||
lambda: self.page.find_elements_by_text(selector_value),
|
lambda: self.page.find_elements_by_text(selector_value),
|
||||||
timeout = timeout,
|
timeout = timeout,
|
||||||
timeout_error_message = f"No HTML elements found containing text '{selector_value}' within {timeout} seconds.")
|
timeout_error_message = f"No HTML elements found containing text '{selector_value}' within {timeout} seconds.")
|
||||||
case By.XPATH:
|
case By.XPATH:
|
||||||
if parent:
|
ensure(not parent, f"Specifying a parent element currently not supported with selector type: {selector_type}")
|
||||||
raise AssertionError(f"Specifying a parent element currently not supported with selector type: {selector_type}")
|
|
||||||
return await self.web_await(
|
return await self.web_await(
|
||||||
lambda: self.page.find_elements_by_text(selector_value),
|
lambda: self.page.find_elements_by_text(selector_value),
|
||||||
timeout = timeout,
|
timeout = timeout,
|
||||||
timeout_error_message = f"No HTML elements found using XPath '{selector_value}' within {timeout} seconds.")
|
timeout_error_message = f"No HTML elements found using XPath '{selector_value}' within {timeout} seconds.")
|
||||||
|
|
||||||
raise AssertionError(f"Unsupported selector type: {selector_type}")
|
raise AssertionError(_("Unsupported selector type: %s") % selector_type)
|
||||||
|
|
||||||
async def web_input(self, selector_type:By, selector_value:str, text:str | int, *, timeout:int | float = 5) -> Element:
|
async def web_input(self, selector_type:By, selector_value:str, text:str | int, *, timeout:int | float = 5) -> Element:
|
||||||
"""
|
"""
|
||||||
@@ -503,7 +500,7 @@ class WebScrapingMixin:
|
|||||||
content: responseText
|
content: responseText
|
||||||
}}
|
}}
|
||||||
}}))
|
}}))
|
||||||
""", await_promise=True))
|
""", await_promise = True))
|
||||||
if isinstance(valid_response_codes, int):
|
if isinstance(valid_response_codes, int):
|
||||||
valid_response_codes = [valid_response_codes]
|
valid_response_codes = [valid_response_codes]
|
||||||
ensure(
|
ensure(
|
||||||
|
|||||||
58
tests/test_i18n.py
Normal file
58
tests/test_i18n.py
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
"""
|
||||||
|
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
||||||
|
SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||||
|
"""
|
||||||
|
import pytest
|
||||||
|
from _pytest.monkeypatch import MonkeyPatch # pylint: disable=import-private-name
|
||||||
|
from kleinanzeigen_bot import i18n
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("lang, expected", [
|
||||||
|
(None, ("en", "US", "UTF-8")), # Test with no LANG variable (should default to ("en", "US", "UTF-8"))
|
||||||
|
("fr", ("fr", None, "UTF-8")), # Test with just a language code
|
||||||
|
("fr_CA", ("fr", "CA", "UTF-8")), # Test with language + region, no encoding
|
||||||
|
("pt_BR.iso8859-1", ("pt", "BR", "ISO8859-1")), # Test with language + region + encoding
|
||||||
|
])
|
||||||
|
def test_detect_locale(monkeypatch: MonkeyPatch, lang: str | None, expected: i18n.Locale) -> None:
|
||||||
|
"""
|
||||||
|
Pytest test case to verify detect_system_language() behavior under various LANG values.
|
||||||
|
"""
|
||||||
|
# Clear or set the LANG environment variable as needed.
|
||||||
|
if lang is None:
|
||||||
|
monkeypatch.delenv("LANG", raising = False)
|
||||||
|
else:
|
||||||
|
monkeypatch.setenv("LANG", lang)
|
||||||
|
|
||||||
|
# Call the function and compare the result to the expected output.
|
||||||
|
result = i18n._detect_locale() # pylint: disable=protected-access
|
||||||
|
assert result == expected, f"For LANG={lang}, expected {expected} but got {result}"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("lang, noun, count, prefix_with_count, expected", [
|
||||||
|
("en", "field", 1, True, "1 field"),
|
||||||
|
("en", "field", 2, True, "2 fields"),
|
||||||
|
("en", "field", 2, False, "fields"),
|
||||||
|
("en", "attribute", 2, False, "attributes"),
|
||||||
|
("en", "bus", 2, False, "buses"),
|
||||||
|
("en", "city", 2, False, "cities"),
|
||||||
|
("de", "Feld", 1, True, "1 Feld"),
|
||||||
|
("de", "Feld", 2, True, "2 Felder"),
|
||||||
|
("de", "Feld", 2, False, "Felder"),
|
||||||
|
("de", "Anzeige", 2, False, "Anzeigen"),
|
||||||
|
("de", "Attribute", 2, False, "Attribute"),
|
||||||
|
("de", "Bild", 2, False, "Bilder"),
|
||||||
|
("de", "Datei", 2, False, "Dateien"),
|
||||||
|
("de", "Kategorie", 2, False, "Kategorien")
|
||||||
|
])
|
||||||
|
def test_pluralize(
|
||||||
|
lang:str,
|
||||||
|
noun:str,
|
||||||
|
count:int,
|
||||||
|
prefix_with_count:bool,
|
||||||
|
expected: str
|
||||||
|
) -> None:
|
||||||
|
i18n.set_current_locale(i18n.Locale(lang, "US", "UTF_8"))
|
||||||
|
|
||||||
|
result = i18n.pluralize(noun, count, prefix_with_count)
|
||||||
|
assert result == expected, f"For LANG={lang}, expected {expected} but got {result}"
|
||||||
@@ -8,12 +8,13 @@ from typing import cast
|
|||||||
|
|
||||||
import nodriver, pytest
|
import nodriver, pytest
|
||||||
|
|
||||||
from kleinanzeigen_bot.web_scraping_mixin import WebScrapingMixin
|
|
||||||
from kleinanzeigen_bot.utils import ensure
|
from kleinanzeigen_bot.utils import ensure
|
||||||
|
from kleinanzeigen_bot.i18n import get_translating_logger
|
||||||
|
from kleinanzeigen_bot.web_scraping_mixin import WebScrapingMixin
|
||||||
|
|
||||||
if os.environ.get("CI"):
|
if os.environ.get("CI"):
|
||||||
logging.getLogger("kleinanzeigen_bot").setLevel(logging.DEBUG)
|
get_translating_logger("kleinanzeigen_bot").setLevel(logging.DEBUG)
|
||||||
logging.getLogger("nodriver").setLevel(logging.DEBUG)
|
get_translating_logger("nodriver").setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
|
||||||
async def atest_init() -> None:
|
async def atest_init() -> None:
|
||||||
|
|||||||
Reference in New Issue
Block a user