Files
kleinanzeigen-bot/src/kleinanzeigen_bot/i18n.py

208 lines
7.0 KiB
Python

"""
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import ctypes, gettext, inspect, locale, logging, os, sys
from collections.abc import Sized
from typing import Any, Final, NamedTuple
from . import resources, utils # pylint: disable=cyclic-import
__all__ = [
"Locale",
"get_translating_logger",
]
LOG_ROOT:Final[logging.Logger] = logging.getLogger()
LOG:Final[logging.Logger] = logging.getLogger(__name__)
class Locale(NamedTuple):
language:str # Language code (e.g., "en", "de")
region:str | None = None # Region code (e.g., "US", "DE")
encoding:str = "UTF-8" # Encoding format (e.g., "UTF-8")
def __str__(self) -> str:
"""
>>> str(Locale("en", "US", "UTF-8"))
'en_US.UTF-8'
>>> str(Locale("en", "US"))
'en_US.UTF-8'
>>> str(Locale("en"))
'en.UTF-8'
>>> str(Locale("de", None, "UTF-8"))
'de.UTF-8'
"""
region_part = f"_{self.region}" if self.region else ""
encoding_part = f".{self.encoding}" if self.encoding else ""
return f"{self.language}{region_part}{encoding_part}"
@staticmethod
def of(locale_string: str) -> 'Locale':
"""
>>> Locale.of("en_US.UTF-8")
Locale(language='en', region='US', encoding='UTF-8')
>>> Locale.of("de.UTF-8")
Locale(language='de', region=None, encoding='UTF-8')
>>> Locale.of("de_DE")
Locale(language='de', region='DE', encoding='UTF-8')
>>> Locale.of("en")
Locale(language='en', region=None, encoding='UTF-8')
>>> Locale.of("en.UTF-8")
Locale(language='en', region=None, encoding='UTF-8')
"""
parts = locale_string.split(".")
language_and_region = parts[0]
encoding = parts[1].upper() if len(parts) > 1 else "UTF-8"
parts = language_and_region.split("_")
language = parts[0]
region = parts[1].upper() if len(parts) > 1 else None
return Locale(language = language, region = region, encoding = encoding)
def _detect_locale() -> Locale:
"""
Detects the system language, returning a tuple of (language, region, encoding).
- On macOS/Linux, it uses the LANG environment variable.
- On Windows, it uses the Windows API via ctypes to get the default UI language.
Returns:
(language, region, encoding): e.g. ("en", "US", "UTF-8")
"""
lang = os.environ.get("LANG", None)
if not lang and os.name == "nt": # Windows
try:
lang = locale.windows_locale.get(ctypes.windll.kernel32.GetUserDefaultUILanguage(), "en_US") # type: ignore[attr-defined,unused-ignore] # mypy
except Exception:
LOG.warning("Error detecting language on Windows", exc_info = True)
return Locale.of(lang) if lang else Locale("en", "US", "UTF-8")
_CURRENT_LOCALE: Locale = _detect_locale()
_TRANSLATIONS: dict[str, Any] | None = None
def translate(text:object, caller: inspect.FrameInfo | None) -> str:
text = str(text)
if not caller:
return text
global _TRANSLATIONS
if _TRANSLATIONS is None:
try:
_TRANSLATIONS = utils.load_dict_from_module(resources, f"translations.{_CURRENT_LOCALE[0]}.yaml")
except FileNotFoundError:
_TRANSLATIONS = {}
if not _TRANSLATIONS:
return text
module_name = caller.frame.f_globals.get('__name__') # pylint: disable=redefined-outer-name
file_basename = os.path.splitext(os.path.basename(caller.filename))[0]
if module_name and module_name.endswith(f".{file_basename}"):
module_name = module_name[:-(len(file_basename) + 1)]
file_key = f"{file_basename}.py" if module_name == file_basename else f"{module_name}/{file_basename}.py"
translation = utils.safe_get(_TRANSLATIONS,
file_key,
caller.function,
text
)
return translation if translation else text
_original_gettext = gettext.gettext
gettext.gettext = lambda message: translate(_original_gettext(message), utils.get_caller())
for module_name, module in sys.modules.items():
if module is None or module_name in sys.builtin_module_names:
continue
if hasattr(module, '_') and getattr(module, '_') is _original_gettext:
setattr(module, '_', gettext.gettext)
if hasattr(module, 'gettext') and getattr(module, 'gettext') is _original_gettext:
setattr(module, 'gettext', gettext.gettext)
def get_translating_logger(name: str | None = None) -> logging.Logger:
class TranslatingLogger(logging.Logger):
def _log(self, level: int, msg: object, *args: Any, **kwargs: Any) -> None:
if level != logging.DEBUG: # debug messages should not be translated
msg = translate(msg, utils.get_caller(2))
super()._log(level, msg, *args, **kwargs)
logging.setLoggerClass(TranslatingLogger)
return logging.getLogger(name)
def get_current_locale() -> Locale:
return _CURRENT_LOCALE
def set_current_locale(new_locale:Locale) -> None:
global _CURRENT_LOCALE, _TRANSLATIONS
if new_locale.language != _CURRENT_LOCALE.language:
_TRANSLATIONS = None
_CURRENT_LOCALE = new_locale
def pluralize(noun:str, count:int | Sized, prefix_with_count:bool = True) -> str:
"""
>>> set_current_locale(Locale("en")) # Setup for doctests
>>> pluralize("field", 1)
'1 field'
>>> pluralize("field", 2)
'2 fields'
>>> pluralize("field", 2, prefix_with_count = False)
'fields'
"""
noun = translate(noun, utils.get_caller())
if isinstance(count, Sized):
count = len(count)
prefix = f"{count} " if prefix_with_count else ""
if count == 1:
return f"{prefix}{noun}"
# German
if _CURRENT_LOCALE.language == "de":
# Special cases
irregular_plurals = {
"Attribute": "Attribute",
"Bild": "Bilder",
"Feld": "Felder",
}
if noun in irregular_plurals:
return f"{prefix}{irregular_plurals[noun]}"
for singular_suffix, plural_suffix in irregular_plurals.items():
if noun.lower().endswith(singular_suffix):
pluralized = noun[:-len(singular_suffix)] + plural_suffix.lower()
return f"{prefix}{pluralized}"
# Very simplified German rules
if noun.endswith("ei"):
return f"{prefix}{noun}en" # Datei -> Dateien
if noun.endswith("e"):
return f"{prefix}{noun}n" # Blume -> Blumen
if noun.endswith(("el", "er", "en")):
return f"{prefix}{noun}" # Keller -> Keller
if noun[-1] in "aeiou":
return f"{prefix}{noun}s" # Auto -> Autos
return f"{prefix}{noun}e" # Hund -> Hunde
# English
if len(noun) < 2:
return f"{prefix}{noun}s"
if noun.endswith(('s', 'sh', 'ch', 'x', 'z')):
return f"{prefix}{noun}es"
if noun.endswith('y') and noun[-2].lower() not in "aeiou":
return f"{prefix}{noun[:-1]}ies"
return f"{prefix}{noun}s"