refact: apply consistent formatting

This commit is contained in:
sebthom
2025-04-27 23:54:22 +02:00
parent fe33a0e461
commit ef923a8337
21 changed files with 1020 additions and 709 deletions

317
scripts/post_autopep8.py Normal file
View File

@@ -0,0 +1,317 @@
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
import ast, logging, re, sys # isort: skip
from pathlib import Path
from typing import Final, List, Protocol, Tuple
from typing_extensions import override
# Configure basic logging
logging.basicConfig(level = logging.INFO, format = "%(levelname)s: %(message)s")
LOG:Final[logging.Logger] = logging.getLogger(__name__)
class FormatterRule(Protocol):
"""
A code processor that can modify source lines based on the AST.
"""
def apply(self, tree:ast.AST, lines:List[str], path:Path) -> List[str]:
...
class NoSpaceAfterColonInTypeAnnotationRule(FormatterRule):
"""
Removes whitespace between the colon (:) and the type annotation in variable and function parameter declarations.
This rule enforces `a:int` instead of `a: int`.
It is the opposite behavior of autopep8 rule E231.
Example:
# Before
def foo(a: int, b : str) -> None:
pass
# After
def foo(a:int, b:str) -> None:
pass
"""
@override
def apply(self, tree:ast.AST, lines:List[str], path:Path) -> List[str]:
ann_positions:List[Tuple[int, int]] = []
for node in ast.walk(tree):
if isinstance(node, ast.arg) and node.annotation is not None:
ann_positions.append((node.annotation.lineno - 1, node.annotation.col_offset))
elif isinstance(node, ast.AnnAssign) and node.annotation is not None:
ann = node.annotation
ann_positions.append((ann.lineno - 1, ann.col_offset))
if not ann_positions:
return lines
new_lines:List[str] = []
for idx, line in enumerate(lines):
if line.lstrip().startswith("#"):
new_lines.append(line)
continue
chars = list(line)
offsets = [col for (lin, col) in ann_positions if lin == idx]
for col in sorted(offsets, reverse = True):
prefix = "".join(chars[:col])
colon_idx = prefix.rfind(":")
if colon_idx == -1:
continue
j = colon_idx + 1
while j < len(chars) and chars[j].isspace():
del chars[j]
new_lines.append("".join(chars))
return new_lines
class EqualSignSpacingInDefaultsAndNamedArgsRule(FormatterRule):
"""
Ensures that the '=' sign in default values for function parameters and keyword arguments in function calls
is surrounded by exactly one space on each side.
This rule enforces `a:int = 3` instead of `a:int=3`, and `x = 42` instead of `x=42` or `x =42`.
It is the opposite behavior of autopep8 rule E251.
Example:
# Before
def foo(a:int=3, b :str= "bar"):
pass
foo(x=42,y = "hello")
# After
def foo(a:int = 3, b:str = "bar"):
pass
foo(x = 42, y = "hello")
"""
@override
def apply(self, tree:ast.AST, lines:List[str], path:Path) -> List[str]:
equals_positions:List[Tuple[int, int]] = []
for node in ast.walk(tree):
# --- Defaults in function definitions, async defs & lambdas ---
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.Lambda)):
# positional defaults
equals_positions.extend(
(d.lineno - 1, d.col_offset)
for d in node.args.defaults
if d is not None
)
# keyword-only defaults (only on defs, not lambdas)
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
equals_positions.extend(
(d.lineno - 1, d.col_offset)
for d in node.args.kw_defaults
if d is not None
)
# --- Keyword arguments in calls ---
if isinstance(node, ast.Call):
equals_positions.extend(
(kw.value.lineno - 1, kw.value.col_offset)
for kw in node.keywords
if kw.arg is not None
)
if not equals_positions:
return lines
new_lines:List[str] = []
for line_idx, line in enumerate(lines):
if line.lstrip().startswith("#"):
new_lines.append(line)
continue
chars = list(line)
equals_offsets = [col for (lineno, col) in equals_positions if lineno == line_idx]
for col in sorted(equals_offsets, reverse = True):
prefix = "".join(chars[:col])
equal_sign_idx = prefix.rfind("=")
if equal_sign_idx == -1:
continue
# remove spaces before '='
left_index = equal_sign_idx - 1
while left_index >= 0 and chars[left_index].isspace():
del chars[left_index]
equal_sign_idx -= 1
left_index -= 1
# remove spaces after '='
right_index = equal_sign_idx + 1
while right_index < len(chars) and chars[right_index].isspace():
del chars[right_index]
# insert single spaces
chars.insert(equal_sign_idx, " ")
chars.insert(equal_sign_idx + 2, " ")
new_lines.append("".join(chars))
return new_lines
class PreferDoubleQuotesRule(FormatterRule):
"""
Ensures string literals use double quotes unless the content contains a double quote.
Example:
# Before
foo = 'hello'
bar = 'a "quote" inside'
# After
foo = "hello"
bar = 'a "quote" inside' # kept as-is, because it contains a double quote
"""
@override
def apply(self, tree:ast.AST, lines:List[str], path:Path) -> List[str]:
new_lines = lines.copy()
# Track how much each line has shifted so far
line_shifts:dict[int, int] = dict.fromkeys(range(len(lines)), 0)
# Build a parent map for f-string detection
parent_map:dict[ast.AST, ast.AST] = {}
for parent in ast.walk(tree):
for child in ast.iter_child_nodes(parent):
parent_map[child] = parent
def is_in_fstring(node:ast.AST) -> bool:
p = parent_map.get(node)
while p:
if isinstance(p, ast.JoinedStr):
return True
p = parent_map.get(p)
return False
# Regex to locate a single- or triple-quoted literal:
# (?P<prefix>[rRbuUfF]*) optional string flags (r, b, u, f, etc.), case-insensitive
# (?P<quote>'{3}|') the opening delimiter: either three single-quotes (''') or one ('),
# but never two in a row (so we won't mis-interpret adjacent quotes)
# (?P<content>.*?) the literal's content, non-greedy up to the next same delimiter
# (?P=quote) the matching closing delimiter (same length as the opener)
literal_re = re.compile(
r"(?P<prefix>[rRbuUfF]*)(?P<quote>'{3}|')(?P<content>.*?)(?P=quote)",
re.DOTALL,
)
for node in ast.walk(tree):
# only handle simple string constants
if not (isinstance(node, ast.Constant) and isinstance(node.value, str)):
continue
# skip anything inside an f-string, at any depth
if is_in_fstring(node):
continue
starting_line_number = getattr(node, "lineno", None)
starting_col_offset = getattr(node, "col_offset", None)
if starting_line_number is None or starting_col_offset is None:
continue
start_line = starting_line_number - 1
shift = line_shifts[start_line]
raw = new_lines[start_line]
# apply shift so we match against current edited line
idx = starting_col_offset + shift
if idx >= len(raw) or raw[idx] not in ("'", "r", "u", "b", "f", "R", "U", "B", "F"):
continue
# match literal at that column
m = literal_re.match(raw[idx:])
if not m:
continue
prefix = m.group("prefix")
quote = m.group("quote") # either "'" or "'''"
content = m.group("content") # what's inside
# skip if content has a double-quote already
if '"' in content:
continue
# build new literal with the same prefix, but doublequote delimiter
delim = '"' * len(quote)
escaped = content.replace(delim, "\\" + delim)
new_literal = f"{prefix}{delim}{escaped}{delim}"
literal_len = m.end() # how many chars we're replacing
before = raw[:idx]
after = raw[idx + literal_len:]
new_lines[start_line] = before + new_literal + after
# record shift delta for any further edits on this line
line_shifts[start_line] += len(new_literal) - literal_len
return new_lines
FORMATTER_RULES:List[FormatterRule] = [
NoSpaceAfterColonInTypeAnnotationRule(),
EqualSignSpacingInDefaultsAndNamedArgsRule(),
PreferDoubleQuotesRule(),
]
def format_file(path:Path) -> None:
# Read without newline conversion
with path.open("r", encoding = "utf-8", newline = "") as rf:
original_text = rf.read()
# Initial parse
try:
tree = ast.parse(original_text)
except SyntaxError as e:
LOG.error(
"Syntax error parsing %s[%d:%d]: %r -> %s",
path, e.lineno, e.offset, (e.text or "").rstrip(), e.msg
)
return
lines = original_text.splitlines(keepends = True)
formatted_text = original_text
success = True
for rule in FORMATTER_RULES:
lines = rule.apply(tree, lines, path)
formatted_text = "".join(lines)
# Re-parse the updated text
try:
tree = ast.parse(formatted_text)
except SyntaxError as e:
LOG.error(
"Syntax error after %s at %s[%d:%d]: %r -> %s",
rule.__class__.__name__, path, e.lineno, e.offset, (e.text or "").rstrip(), e.msg
)
success = False
break
if success and formatted_text != original_text:
with path.open("w", encoding = "utf-8", newline = "") as wf:
wf.write(formatted_text)
LOG.info("Formatted [%s].", path)
if __name__ == "__main__":
if len(sys.argv) < 2: # noqa: PLR2004 Magic value used in comparison
script_path = Path(sys.argv[0])
print(f"Usage: python {script_path} <directory1> [<directory2> ...]")
sys.exit(1)
for dir_arg in sys.argv[1:]:
root = Path(dir_arg)
if not root.exists():
LOG.warning("Directory [%s] does not exist, skipping...", root)
continue
for py_file in root.rglob("*.py"):
format_file(py_file)