refact: apply consistent formatting

2026-03-12 02:31:45 +01:00 · 2025-04-27 23:54:22 +02:00
parent fe33a0e461
commit ef923a8337
21 changed files with 1020 additions and 709 deletions
--- a/scripts/post_autopep8.py
+++ b/scripts/post_autopep8.py
@@ -0,0 +1,317 @@
+# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
+import ast, logging, re, sys  # isort: skip
+from pathlib import Path
+from typing import Final, List, Protocol, Tuple
+
+from typing_extensions import override
+
+# Configure basic logging
+logging.basicConfig(level = logging.INFO, format = "%(levelname)s: %(message)s")
+LOG:Final[logging.Logger] = logging.getLogger(__name__)
+
+
+class FormatterRule(Protocol):
+    """
+    A code processor that can modify source lines based on the AST.
+    """
+
+    def apply(self, tree:ast.AST, lines:List[str], path:Path) -> List[str]:
+        ...
+
+
+class NoSpaceAfterColonInTypeAnnotationRule(FormatterRule):
+    """
+    Removes whitespace between the colon (:) and the type annotation in variable and function parameter declarations.
+
+    This rule enforces `a:int` instead of `a: int`.
+    It is the opposite behavior of autopep8 rule E231.
+
+    Example:
+        # Before
+        def foo(a: int, b : str) -> None:
+            pass
+
+        # After
+        def foo(a:int, b:str) -> None:
+            pass
+    """
+
+    @override
+    def apply(self, tree:ast.AST, lines:List[str], path:Path) -> List[str]:
+        ann_positions:List[Tuple[int, int]] = []
+        for node in ast.walk(tree):
+            if isinstance(node, ast.arg) and node.annotation is not None:
+                ann_positions.append((node.annotation.lineno - 1, node.annotation.col_offset))
+            elif isinstance(node, ast.AnnAssign) and node.annotation is not None:
+                ann = node.annotation
+                ann_positions.append((ann.lineno - 1, ann.col_offset))
+
+        if not ann_positions:
+            return lines
+
+        new_lines:List[str] = []
+        for idx, line in enumerate(lines):
+            if line.lstrip().startswith("#"):
+                new_lines.append(line)
+                continue
+
+            chars = list(line)
+            offsets = [col for (lin, col) in ann_positions if lin == idx]
+            for col in sorted(offsets, reverse = True):
+                prefix = "".join(chars[:col])
+                colon_idx = prefix.rfind(":")
+                if colon_idx == -1:
+                    continue
+                j = colon_idx + 1
+                while j < len(chars) and chars[j].isspace():
+                    del chars[j]
+            new_lines.append("".join(chars))
+
+        return new_lines
+
+
+class EqualSignSpacingInDefaultsAndNamedArgsRule(FormatterRule):
+    """
+    Ensures that the '=' sign in default values for function parameters and keyword arguments in function calls
+    is surrounded by exactly one space on each side.
+
+    This rule enforces `a:int = 3` instead of `a:int=3`, and `x = 42` instead of `x=42` or `x =42`.
+    It is the opposite behavior of autopep8 rule E251.
+
+    Example:
+        # Before
+        def foo(a:int=3, b :str=  "bar"):
+            pass
+
+        foo(x=42,y = "hello")
+
+        # After
+        def foo(a:int = 3, b:str = "bar"):
+            pass
+
+        foo(x = 42, y = "hello")
+    """
+
+    @override
+    def apply(self, tree:ast.AST, lines:List[str], path:Path) -> List[str]:
+        equals_positions:List[Tuple[int, int]] = []
+        for node in ast.walk(tree):
+            # --- Defaults in function definitions, async defs & lambdas ---
+            if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.Lambda)):
+                # positional defaults
+                equals_positions.extend(
+                    (d.lineno - 1, d.col_offset)
+                    for d in node.args.defaults
+                    if d is not None
+                )
+                # keyword-only defaults (only on defs, not lambdas)
+                if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
+                    equals_positions.extend(
+                        (d.lineno - 1, d.col_offset)
+                        for d in node.args.kw_defaults
+                        if d is not None
+                    )
+
+            # --- Keyword arguments in calls ---
+            if isinstance(node, ast.Call):
+                equals_positions.extend(
+                    (kw.value.lineno - 1, kw.value.col_offset)
+                    for kw in node.keywords
+                    if kw.arg is not None
+                )
+
+        if not equals_positions:
+            return lines
+
+        new_lines:List[str] = []
+        for line_idx, line in enumerate(lines):
+            if line.lstrip().startswith("#"):
+                new_lines.append(line)
+                continue
+
+            chars = list(line)
+            equals_offsets = [col for (lineno, col) in equals_positions if lineno == line_idx]
+            for col in sorted(equals_offsets, reverse = True):
+                prefix = "".join(chars[:col])
+                equal_sign_idx = prefix.rfind("=")
+                if equal_sign_idx == -1:
+                    continue
+
+                # remove spaces before '='
+                left_index = equal_sign_idx - 1
+                while left_index >= 0 and chars[left_index].isspace():
+                    del chars[left_index]
+                    equal_sign_idx -= 1
+                    left_index -= 1
+
+                # remove spaces after '='
+                right_index = equal_sign_idx + 1
+                while right_index < len(chars) and chars[right_index].isspace():
+                    del chars[right_index]
+
+                # insert single spaces
+                chars.insert(equal_sign_idx, " ")
+                chars.insert(equal_sign_idx + 2, " ")
+            new_lines.append("".join(chars))
+
+        return new_lines
+
+
+class PreferDoubleQuotesRule(FormatterRule):
+    """
+    Ensures string literals use double quotes unless the content contains a double quote.
+
+    Example:
+        # Before
+        foo = 'hello'
+        bar = 'a "quote" inside'
+
+        # After
+        foo = "hello"
+        bar = 'a "quote" inside'  # kept as-is, because it contains a double quote
+    """
+
+    @override
+    def apply(self, tree:ast.AST, lines:List[str], path:Path) -> List[str]:
+        new_lines = lines.copy()
+
+        # Track how much each line has shifted so far
+        line_shifts:dict[int, int] = dict.fromkeys(range(len(lines)), 0)
+
+        # Build a parent map for f-string detection
+        parent_map:dict[ast.AST, ast.AST] = {}
+        for parent in ast.walk(tree):
+            for child in ast.iter_child_nodes(parent):
+                parent_map[child] = parent
+
+        def is_in_fstring(node:ast.AST) -> bool:
+            p = parent_map.get(node)
+            while p:
+                if isinstance(p, ast.JoinedStr):
+                    return True
+                p = parent_map.get(p)
+            return False
+
+        # Regex to locate a single- or triple-quoted literal:
+        #   (?P<prefix>[rRbuUfF]*)  optional string flags (r, b, u, f, etc.), case-insensitive
+        #   (?P<quote>'{3}|')       the opening delimiter: either three single-quotes (''') or one ('),
+        #                           but never two in a row (so we won't mis-interpret adjacent quotes)
+        #   (?P<content>.*?)        the literal's content, non-greedy up to the next same delimiter
+        #   (?P=quote)              the matching closing delimiter (same length as the opener)
+        literal_re = re.compile(
+            r"(?P<prefix>[rRbuUfF]*)(?P<quote>'{3}|')(?P<content>.*?)(?P=quote)",
+            re.DOTALL,
+        )
+
+        for node in ast.walk(tree):
+            # only handle simple string constants
+            if not (isinstance(node, ast.Constant) and isinstance(node.value, str)):
+                continue
+
+            # skip anything inside an f-string, at any depth
+            if is_in_fstring(node):
+                continue
+
+            starting_line_number = getattr(node, "lineno", None)
+            starting_col_offset = getattr(node, "col_offset", None)
+            if starting_line_number is None or starting_col_offset is None:
+                continue
+
+            start_line = starting_line_number - 1
+            shift = line_shifts[start_line]
+            raw = new_lines[start_line]
+            # apply shift so we match against current edited line
+            idx = starting_col_offset + shift
+            if idx >= len(raw) or raw[idx] not in ("'", "r", "u", "b", "f", "R", "U", "B", "F"):
+                continue
+
+            # match literal at that column
+            m = literal_re.match(raw[idx:])
+            if not m:
+                continue
+
+            prefix = m.group("prefix")
+            quote = m.group("quote")  # either "'" or "'''"
+            content = m.group("content")  # what's inside
+
+            # skip if content has a double-quote already
+            if '"' in content:
+                continue
+
+            # build new literal with the same prefix, but double‐quote delimiter
+            delim = '"' * len(quote)
+            escaped = content.replace(delim, "\\" + delim)
+            new_literal = f"{prefix}{delim}{escaped}{delim}"
+
+            literal_len = m.end()  # how many chars we're replacing
+            before = raw[:idx]
+            after = raw[idx + literal_len:]
+            new_lines[start_line] = before + new_literal + after
+
+            # record shift delta for any further edits on this line
+            line_shifts[start_line] += len(new_literal) - literal_len
+
+        return new_lines
+
+
+FORMATTER_RULES:List[FormatterRule] = [
+    NoSpaceAfterColonInTypeAnnotationRule(),
+    EqualSignSpacingInDefaultsAndNamedArgsRule(),
+    PreferDoubleQuotesRule(),
+]
+
+
+def format_file(path:Path) -> None:
+    # Read without newline conversion
+    with path.open("r", encoding = "utf-8", newline = "") as rf:
+        original_text = rf.read()
+
+    # Initial parse
+    try:
+        tree = ast.parse(original_text)
+    except SyntaxError as e:
+        LOG.error(
+            "Syntax error parsing %s[%d:%d]: %r -> %s",
+            path, e.lineno, e.offset, (e.text or "").rstrip(), e.msg
+        )
+        return
+
+    lines = original_text.splitlines(keepends = True)
+    formatted_text = original_text
+    success = True
+    for rule in FORMATTER_RULES:
+        lines = rule.apply(tree, lines, path)
+        formatted_text = "".join(lines)
+
+        # Re-parse the updated text
+        try:
+            tree = ast.parse(formatted_text)
+        except SyntaxError as e:
+            LOG.error(
+                "Syntax error after %s at %s[%d:%d]: %r -> %s",
+                rule.__class__.__name__, path, e.lineno, e.offset, (e.text or "").rstrip(), e.msg
+            )
+            success = False
+            break
+
+    if success and formatted_text != original_text:
+        with path.open("w", encoding = "utf-8", newline = "") as wf:
+            wf.write(formatted_text)
+        LOG.info("Formatted [%s].", path)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:  # noqa: PLR2004 Magic value used in comparison
+        script_path = Path(sys.argv[0])
+        print(f"Usage: python {script_path} <directory1> [<directory2> ...]")
+        sys.exit(1)
+
+    for dir_arg in sys.argv[1:]:
+        root = Path(dir_arg)
+        if not root.exists():
+            LOG.warning("Directory [%s] does not exist, skipping...", root)
+            continue
+        for py_file in root.rglob("*.py"):
+            format_file(py_file)