kleinanzeigen-bot/src/kleinanzeigen_bot/i18n.py

"""
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import ctypes, gettext, inspect, locale, logging, os, sys
from collections.abc import Sized
from typing import Any, Final, NamedTuple

from . import resources, utils  # pylint: disable=cyclic-import

__all__ = [
    "Locale",
    "get_translating_logger",
]

LOG_ROOT:Final[logging.Logger] = logging.getLogger()
LOG:Final[logging.Logger] = logging.getLogger(__name__)


class Locale(NamedTuple):

    language:str  # Language code (e.g., "en", "de")
    region:str | None = None  # Region code (e.g., "US", "DE")
    encoding:str = "UTF-8"  # Encoding format (e.g., "UTF-8")

    def __str__(self) -> str:
        """
        >>> str(Locale("en", "US", "UTF-8"))
        'en_US.UTF-8'
        >>> str(Locale("en", "US"))
        'en_US.UTF-8'
        >>> str(Locale("en"))
        'en.UTF-8'
        >>> str(Locale("de", None, "UTF-8"))
        'de.UTF-8'
        """
        region_part = f"_{self.region}" if self.region else ""
        encoding_part = f".{self.encoding}" if self.encoding else ""
        return f"{self.language}{region_part}{encoding_part}"

    @staticmethod
    def of(locale_string: str) -> 'Locale':
        """
        >>> Locale.of("en_US.UTF-8")
        Locale(language='en', region='US', encoding='UTF-8')
        >>> Locale.of("de.UTF-8")
        Locale(language='de', region=None, encoding='UTF-8')
        >>> Locale.of("de_DE")
        Locale(language='de', region='DE', encoding='UTF-8')
        >>> Locale.of("en")
        Locale(language='en', region=None, encoding='UTF-8')
        >>> Locale.of("en.UTF-8")
        Locale(language='en', region=None, encoding='UTF-8')
        """
        parts = locale_string.split(".")
        language_and_region = parts[0]
        encoding = parts[1].upper() if len(parts) > 1 else "UTF-8"

        parts = language_and_region.split("_")
        language = parts[0]
        region = parts[1].upper() if len(parts) > 1 else None

        return Locale(language = language, region = region, encoding = encoding)


def _detect_locale() -> Locale:
    """
    Detects the system language, returning a tuple of (language, region, encoding).
    - On macOS/Linux, it uses the LANG environment variable.
    - On Windows, it uses the Windows API via ctypes to get the default UI language.

    Returns:
        (language, region, encoding): e.g. ("en", "US", "UTF-8")
    """
    lang = os.environ.get("LANG", None)

    if not lang and os.name == "nt":  # Windows
        try:
            lang = locale.windows_locale.get(ctypes.windll.kernel32.GetUserDefaultUILanguage(), "en_US")  # type: ignore[attr-defined,unused-ignore] # mypy
        except Exception:
            LOG.warning("Error detecting language on Windows", exc_info = True)

    return Locale.of(lang) if lang else Locale("en", "US", "UTF-8")


_CURRENT_LOCALE: Locale = _detect_locale()
_TRANSLATIONS: dict[str, Any] | None = None


def translate(text:object, caller: inspect.FrameInfo | None) -> str:
    text = str(text)
    if not caller:
        return text

    global _TRANSLATIONS
    if _TRANSLATIONS is None:
        try:
            _TRANSLATIONS = utils.load_dict_from_module(resources, f"translations.{_CURRENT_LOCALE[0]}.yaml")
        except FileNotFoundError:
            _TRANSLATIONS = {}

    if not _TRANSLATIONS:
        return text

    module_name = caller.frame.f_globals.get('__name__')  # pylint: disable=redefined-outer-name
    file_basename = os.path.splitext(os.path.basename(caller.filename))[0]
    if module_name and module_name.endswith(f".{file_basename}"):
        module_name = module_name[:-(len(file_basename) + 1)]
    file_key = f"{file_basename}.py" if module_name == file_basename else f"{module_name}/{file_basename}.py"
    translation = utils.safe_get(_TRANSLATIONS,
        file_key,
        caller.function,
        text
    )
    return translation if translation else text


_original_gettext = gettext.gettext
gettext.gettext = lambda message: translate(_original_gettext(message), utils.get_caller())
for module_name, module in sys.modules.items():
    if module is None or module_name in sys.builtin_module_names:
        continue
    if hasattr(module, '_') and getattr(module, '_') is _original_gettext:
        setattr(module, '_', gettext.gettext)
    if hasattr(module, 'gettext') and getattr(module, 'gettext') is _original_gettext:
        setattr(module, 'gettext', gettext.gettext)


def get_translating_logger(name: str | None = None) -> logging.Logger:

    class TranslatingLogger(logging.Logger):

        def _log(self, level: int, msg: object, *args: Any, **kwargs: Any) -> None:
            if level != logging.DEBUG:  # debug messages should not be translated
                msg = translate(msg, utils.get_caller(2))
            super()._log(level, msg, *args, **kwargs)

    logging.setLoggerClass(TranslatingLogger)
    return logging.getLogger(name)


def get_current_locale() -> Locale:
    return _CURRENT_LOCALE


def set_current_locale(new_locale:Locale) -> None:
    global _CURRENT_LOCALE, _TRANSLATIONS
    if new_locale.language != _CURRENT_LOCALE.language:
        _TRANSLATIONS = None
    _CURRENT_LOCALE = new_locale


def pluralize(noun:str, count:int | Sized, prefix_with_count:bool = True) -> str:
    """
    >>> set_current_locale(Locale("en"))  # Setup for doctests
    >>> pluralize("field", 1)
    '1 field'
    >>> pluralize("field", 2)
    '2 fields'
    >>> pluralize("field", 2, prefix_with_count = False)
    'fields'
    """
    noun = translate(noun, utils.get_caller())

    if isinstance(count, Sized):
        count = len(count)

    prefix = f"{count} " if prefix_with_count else ""

    if count == 1:
        return f"{prefix}{noun}"

    # German
    if _CURRENT_LOCALE.language == "de":
        # Special cases
        irregular_plurals = {
            "Attribute": "Attribute",
            "Bild": "Bilder",
            "Feld": "Felder",
        }
        if noun in irregular_plurals:
            return f"{prefix}{irregular_plurals[noun]}"
        for singular_suffix, plural_suffix in irregular_plurals.items():
            if noun.lower().endswith(singular_suffix):
                pluralized = noun[:-len(singular_suffix)] + plural_suffix.lower()
                return f"{prefix}{pluralized}"

        # Very simplified German rules
        if noun.endswith("ei"):
            return f"{prefix}{noun}en"  # Datei -> Dateien
        if noun.endswith("e"):
            return f"{prefix}{noun}n"  # Blume -> Blumen
        if noun.endswith(("el", "er", "en")):
            return f"{prefix}{noun}"  # Keller -> Keller
        if noun[-1] in "aeiou":
            return f"{prefix}{noun}s"  # Auto -> Autos
        return f"{prefix}{noun}e"  # Hund -> Hunde

    # English
    if len(noun) < 2:
        return f"{prefix}{noun}s"
    if noun.endswith(('s', 'sh', 'ch', 'x', 'z')):
        return f"{prefix}{noun}es"
    if noun.endswith('y') and noun[-2].lower() not in "aeiou":
        return f"{prefix}{noun[:-1]}ies"
    return f"{prefix}{noun}s"