Files
kleinanzeigen-bot/scripts/post_autopep8.py
2025-04-28 12:55:28 +02:00

318 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
import ast, logging, re, sys # isort: skip
from pathlib import Path
from typing import Final, List, Protocol, Tuple
from typing_extensions import override
# Configure basic logging
logging.basicConfig(level = logging.INFO, format = "%(levelname)s: %(message)s")
LOG:Final[logging.Logger] = logging.getLogger(__name__)
class FormatterRule(Protocol):
"""
A code processor that can modify source lines based on the AST.
"""
def apply(self, tree:ast.AST, lines:List[str], path:Path) -> List[str]:
...
class NoSpaceAfterColonInTypeAnnotationRule(FormatterRule):
"""
Removes whitespace between the colon (:) and the type annotation in variable and function parameter declarations.
This rule enforces `a:int` instead of `a: int`.
It is the opposite behavior of autopep8 rule E231.
Example:
# Before
def foo(a: int, b : str) -> None:
pass
# After
def foo(a:int, b:str) -> None:
pass
"""
@override
def apply(self, tree:ast.AST, lines:List[str], path:Path) -> List[str]:
ann_positions:List[Tuple[int, int]] = []
for node in ast.walk(tree):
if isinstance(node, ast.arg) and node.annotation is not None:
ann_positions.append((node.annotation.lineno - 1, node.annotation.col_offset))
elif isinstance(node, ast.AnnAssign) and node.annotation is not None:
ann = node.annotation
ann_positions.append((ann.lineno - 1, ann.col_offset))
if not ann_positions:
return lines
new_lines:List[str] = []
for idx, line in enumerate(lines):
if line.lstrip().startswith("#"):
new_lines.append(line)
continue
chars = list(line)
offsets = [col for (lin, col) in ann_positions if lin == idx]
for col in sorted(offsets, reverse = True):
prefix = "".join(chars[:col])
colon_idx = prefix.rfind(":")
if colon_idx == -1:
continue
j = colon_idx + 1
while j < len(chars) and chars[j].isspace():
del chars[j]
new_lines.append("".join(chars))
return new_lines
class EqualSignSpacingInDefaultsAndNamedArgsRule(FormatterRule):
"""
Ensures that the '=' sign in default values for function parameters and keyword arguments in function calls
is surrounded by exactly one space on each side.
This rule enforces `a:int = 3` instead of `a:int=3`, and `x = 42` instead of `x=42` or `x =42`.
It is the opposite behavior of autopep8 rule E251.
Example:
# Before
def foo(a:int=3, b :str= "bar"):
pass
foo(x=42,y = "hello")
# After
def foo(a:int = 3, b:str = "bar"):
pass
foo(x = 42, y = "hello")
"""
@override
def apply(self, tree:ast.AST, lines:List[str], path:Path) -> List[str]:
equals_positions:List[Tuple[int, int]] = []
for node in ast.walk(tree):
# --- Defaults in function definitions, async defs & lambdas ---
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.Lambda)):
# positional defaults
equals_positions.extend(
(d.lineno - 1, d.col_offset)
for d in node.args.defaults
if d is not None
)
# keyword-only defaults (only on defs, not lambdas)
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
equals_positions.extend(
(d.lineno - 1, d.col_offset)
for d in node.args.kw_defaults
if d is not None
)
# --- Keyword arguments in calls ---
if isinstance(node, ast.Call):
equals_positions.extend(
(kw.value.lineno - 1, kw.value.col_offset)
for kw in node.keywords
if kw.arg is not None
)
if not equals_positions:
return lines
new_lines:List[str] = []
for line_idx, line in enumerate(lines):
if line.lstrip().startswith("#"):
new_lines.append(line)
continue
chars = list(line)
equals_offsets = [col for (lineno, col) in equals_positions if lineno == line_idx]
for col in sorted(equals_offsets, reverse = True):
prefix = "".join(chars[:col])
equal_sign_idx = prefix.rfind("=")
if equal_sign_idx == -1:
continue
# remove spaces before '='
left_index = equal_sign_idx - 1
while left_index >= 0 and chars[left_index].isspace():
del chars[left_index]
equal_sign_idx -= 1
left_index -= 1
# remove spaces after '='
right_index = equal_sign_idx + 1
while right_index < len(chars) and chars[right_index].isspace():
del chars[right_index]
# insert single spaces
chars.insert(equal_sign_idx, " ")
chars.insert(equal_sign_idx + 2, " ")
new_lines.append("".join(chars))
return new_lines
class PreferDoubleQuotesRule(FormatterRule):
"""
Ensures string literals use double quotes unless the content contains a double quote.
Example:
# Before
foo = 'hello'
bar = 'a "quote" inside'
# After
foo = "hello"
bar = 'a "quote" inside' # kept as-is, because it contains a double quote
"""
@override
def apply(self, tree:ast.AST, lines:List[str], path:Path) -> List[str]:
new_lines = lines.copy()
# Track how much each line has shifted so far
line_shifts:dict[int, int] = dict.fromkeys(range(len(lines)), 0)
# Build a parent map for f-string detection
parent_map:dict[ast.AST, ast.AST] = {}
for parent in ast.walk(tree):
for child in ast.iter_child_nodes(parent):
parent_map[child] = parent
def is_in_fstring(node:ast.AST) -> bool:
p = parent_map.get(node)
while p:
if isinstance(p, ast.JoinedStr):
return True
p = parent_map.get(p)
return False
# Regex to locate a single- or triple-quoted literal:
# (?P<prefix>[rRbuUfF]*) optional string flags (r, b, u, f, etc.), case-insensitive
# (?P<quote>'{3}|') the opening delimiter: either three single-quotes (''') or one ('),
# but never two in a row (so we won't mis-interpret adjacent quotes)
# (?P<content>.*?) the literal's content, non-greedy up to the next same delimiter
# (?P=quote) the matching closing delimiter (same length as the opener)
literal_re = re.compile(
r"(?P<prefix>[rRbuUfF]*)(?P<quote>'{3}|')(?P<content>.*?)(?P=quote)",
re.DOTALL,
)
for node in ast.walk(tree):
# only handle simple string constants
if not (isinstance(node, ast.Constant) and isinstance(node.value, str)):
continue
# skip anything inside an f-string, at any depth
if is_in_fstring(node):
continue
starting_line_number = getattr(node, "lineno", None)
starting_col_offset = getattr(node, "col_offset", None)
if starting_line_number is None or starting_col_offset is None:
continue
start_line = starting_line_number - 1
shift = line_shifts[start_line]
raw = new_lines[start_line]
# apply shift so we match against current edited line
idx = starting_col_offset + shift
if idx >= len(raw) or raw[idx] not in ("'", "r", "u", "b", "f", "R", "U", "B", "F"):
continue
# match literal at that column
m = literal_re.match(raw[idx:])
if not m:
continue
prefix = m.group("prefix")
quote = m.group("quote") # either "'" or "'''"
content = m.group("content") # what's inside
# skip if content has a double-quote already
if '"' in content:
continue
# build new literal with the same prefix, but doublequote delimiter
delim = '"' * len(quote)
escaped = content.replace(delim, "\\" + delim)
new_literal = f"{prefix}{delim}{escaped}{delim}"
literal_len = m.end() # how many chars we're replacing
before = raw[:idx]
after = raw[idx + literal_len:]
new_lines[start_line] = before + new_literal + after
# record shift delta for any further edits on this line
line_shifts[start_line] += len(new_literal) - literal_len
return new_lines
FORMATTER_RULES:List[FormatterRule] = [
NoSpaceAfterColonInTypeAnnotationRule(),
EqualSignSpacingInDefaultsAndNamedArgsRule(),
PreferDoubleQuotesRule(),
]
def format_file(path:Path) -> None:
# Read without newline conversion
with path.open("r", encoding = "utf-8", newline = "") as rf:
original_text = rf.read()
# Initial parse
try:
tree = ast.parse(original_text)
except SyntaxError as e:
LOG.error(
"Syntax error parsing %s[%d:%d]: %r -> %s",
path, e.lineno, e.offset, (e.text or "").rstrip(), e.msg
)
return
lines = original_text.splitlines(keepends = True)
formatted_text = original_text
success = True
for rule in FORMATTER_RULES:
lines = rule.apply(tree, lines, path)
formatted_text = "".join(lines)
# Re-parse the updated text
try:
tree = ast.parse(formatted_text)
except SyntaxError as e:
LOG.error(
"Syntax error after %s at %s[%d:%d]: %r -> %s",
rule.__class__.__name__, path, e.lineno, e.offset, (e.text or "").rstrip(), e.msg
)
success = False
break
if success and formatted_text != original_text:
with path.open("w", encoding = "utf-8", newline = "") as wf:
wf.write(formatted_text)
LOG.info("Formatted [%s].", path)
if __name__ == "__main__":
if len(sys.argv) < 2: # noqa: PLR2004 Magic value used in comparison
script_path = Path(sys.argv[0])
print(f"Usage: python {script_path} <directory1> [<directory2> ...]")
sys.exit(1)
for dir_arg in sys.argv[1:]:
root = Path(dir_arg)
if not root.exists():
LOG.warning("Directory [%s] does not exist, skipping...", root)
continue
for py_file in root.rglob("*.py"):
format_file(py_file)