feat: unify pdm test defaults and verbosity controls (#836)

2026-03-12 02:31:45 +01:00 · 2026-02-23 16:44:13 +01:00
parent 6aab9761f1
commit 930b3f6028
7 changed files with 236 additions and 187 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -168,8 +168,12 @@ jobs:
      run: pdm run basedpyright
    - name: Prepare split coverage artifacts
      run: pdm run ci:coverage:prepare
    - name: Run unit tests
-      run: pdm run utest:cov -vv
+      run: pdm run ci:test:unit -vv
    - name: Run integration tests
@@ -180,15 +184,15 @@ jobs:
            ubuntu-*)
               sudo apt-get install --no-install-recommends -y xvfb
               # Run tests INSIDE xvfb context
-               xvfb-run bash -c 'pdm run itest:cov -vv'
+               xvfb-run bash -c 'pdm run ci:test:integration -vv'
               ;;
-           *) pdm run itest:cov -vv
+           *) pdm run ci:test:integration -vv
              ;;
        esac
    - name: Run smoke tests
-      run: pdm run smoke:cov -vv
+      run: pdm run ci:test:smoke -vv
    - name: Run app from source
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -33,7 +33,7 @@ Please read through this document before submitting any contributions to ensure
 1. Fork and clone the repository
 1. Install dependencies: `pdm install`
-1. Run tests to verify setup: `pdm run test:cov`
+1. Run tests to verify setup: `pdm run test`
 ## Development Notes
@@ -118,18 +118,15 @@ This project uses a comprehensive testing strategy with three test types:
 ### Running Tests
 ```bash
-# Run all tests in order (unit → integration → smoke)
+# Canonical unified run (quiet by default, coverage enabled)
-pdm run test:cov
+pdm run test
 pdm run test -v
 pdm run test -vv
 # Run specific test types
 pdm run utest      # Unit tests only
 pdm run itest      # Integration tests only
 pdm run smoke      # Smoke tests only
 # Run with coverage
 pdm run utest:cov  # Unit tests with coverage
 pdm run itest:cov  # Integration tests with coverage
 pdm run smoke:cov  # Smoke tests with coverage
 ```
 ### Adding New Tests
--- a/docs/TESTING.md
+++ b/docs/TESTING.md
@@ -49,30 +49,38 @@ async def test_bot_starts(smoke_bot):
    ...
 ```
-### Running Smoke, Unit, and Integration Tests
+### Running Tests
- **Unit tests:**
+- **Canonical unified command:**
-  - Run with: `pdm run utest` (excludes smoke and integration tests)
+  - `pdm run test` runs all tests in one invocation.
-  - Coverage: `pdm run utest:cov`
+  - Output is quiet by default.
- **Integration tests:**
+  - Coverage is enabled by default with `--cov-report=term-missing`.
-  - Run with: `pdm run itest` (excludes smoke tests)
+- **Verbosity controls:**
-  - Coverage: `pdm run itest:cov`
+  - `pdm run test -v` enables verbose pytest output and durations.
- **Smoke tests:**
+  - `pdm run test -vv` keeps pytest's second verbosity level and durations.
-  - Run with: `pdm run smoke`
+- **Split runs (targeted/stable):**
-  - Coverage: `pdm run smoke:cov`
+  - `pdm run utest` runs only unit tests.
- **All tests in order:**
+  - `pdm run itest` runs only integration tests and stays serial (`-n 0`) for browser stability.
-  - Run with: `pdm run test` (runs unit, then integration, then smoke)
+  - `pdm run smoke` runs only smoke tests.
  - Split runs also include coverage by default.
 ### Coverage
 - Local and CI-facing public commands (`test`, `utest`, `itest`, `smoke`) always enable coverage.
 - Default local report output remains `term-missing`.
 - CI still uploads split XML coverage files (unit/integration/smoke) to Codecov using internal `ci:*` runner commands.
 ### Parallel Execution and Slow-Test Tracking
- `pytest-xdist` runs every invocation with `-n auto`, so the suite is split across CPU cores automatically.
+- `test`, `utest`, and `smoke` run with `-n auto`.
- Pytest now reports the slowest 25 tests (`--durations=25 --durations-min=0.5`), making regressions easy to spot in CI logs.
+- `itest` runs with `-n 0` by design to avoid flaky browser parallelism.
 - Verbose runs (`-v`, `-vv`, `-vvv`) report the slowest 25 tests (`--durations=25 --durations-min=0.5`), while quiet/default runs omit durations.
 - Long-running scenarios are tagged with `@pytest.mark.slow` (smoke CLI checks and browser integrations). Keep them in CI, but skip locally via `pytest -m "not slow"` when you only need a quick signal.
 - Coverage commands (`pdm run test:cov`, etc.) remain compatible—`pytest-cov` merges the per-worker data transparently.
 ### CI Test Order
- CI runs unit tests first, then integration tests, then smoke tests.
+- Split suites run in this order: unit, integration, smoke.
 - Internal commands (`ci:coverage:prepare`, `ci:test:unit`, `ci:test:integration`, `ci:test:smoke`) are backed by `scripts/run_tests.py`.
 - Coverage for each group is uploaded separately to Codecov (with flags: `unit-tests`, `integration-tests`, `smoke-tests`).
 - This ensures that foundational failures are caught early and that test types are clearly separated.
@@ -89,22 +97,23 @@ async def test_bot_starts(smoke_bot):
 - **Coverage clarity:** You can see which code paths are covered by each test type in Codecov.
 See also: `pyproject.toml` for test script definitions and `.github/workflows/build.yml` for CI setup.
 For contributor workflow, setup, and submission expectations, see `CONTRIBUTING.md`.
-## Why Use Composite Test Groups?
+## Why Offer Both Unified and Split Runs?
-### Failing Fast and Early Feedback
+### Unified Runs (Default)
- **Failing fast:** By running unit tests first, then integration, then smoke tests, CI and contributors get immediate feedback if a foundational component is broken.
+- **Single summary:** See all failing tests in one run while developing locally.
- **Critical errors surface early:** If a unit test fails, the job stops before running slower or less critical tests, saving time and resources.
+- **Coverage included:** The default `pdm run test` command reports coverage without needing a second command.
- **CI efficiency:** This approach prevents running hundreds of integration/smoke tests if the application is fundamentally broken (e.g., cannot start, cannot load config, etc.).
+- **Lower command overhead:** One pytest startup for the whole suite.
 - **Clear separation:** Each test group (unit, integration, smoke) is reported and covered separately, making it easy to see which layer is failing.
-### Tradeoff: Unified Reporting vs. Fast Failure
+### Split Runs (CI and Targeted Debugging)
- **Unified reporting:** Running all tests in a single pytest invocation gives a single summary of all failures, but does not fail fast on critical errors.
+- **Fail-fast flow in CI:** Unit, integration, and smoke runs are executed in sequence for faster failure feedback.
- **Composite groups:** Running groups separately means you may only see the first group's failures, but you catch the most important issues as soon as possible.
+- **Stable browser integrations:** `pdm run itest` keeps serial execution with `-n 0`.
 - **Separate coverage uploads:** CI still uses per-group coverage files/flags for Codecov.
-### When to Use Which
+### Trade-off
- **CI:** Composite groups are preferred for CI to catch critical failures early and avoid wasting resources.
+- Unified default uses `-n auto`; this can increase integration-test flakiness compared to serial integration runs.
- **Local development:** You may prefer a unified run (`pdm run test`) to see all failures at once. Both options can be provided in `pyproject.toml` for flexibility.
+- When integration-test stability is a concern, run `pdm run itest` directly.
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -110,35 +110,26 @@ lint = { composite = ["lint:ruff", "lint:mypy", "lint:pyright"] }
 "lint:fix"     = {shell = "ruff check --preview --fix" }
 # tests
-# Run unit tests only (exclude smoke and itest)
+# Public test commands
-utest = "python -m pytest --capture=tee-sys -m \"not itest and not smoke\""
+# - Coverage is enabled by default for all public profiles.
-# Run integration tests only (exclude smoke)
+# - Quiet output is default; pass -v/-vv for more details and durations.
-# Uses -n 0 to disable xdist parallelization - browser tests are flaky with parallel workers
+test  = "python scripts/run_tests.py run test"
-itest = "python -m pytest --capture=tee-sys -m \"itest and not smoke\" -n 0"
+utest = "python scripts/run_tests.py run utest"
-# Run smoke tests only
+itest = "python scripts/run_tests.py run itest"
-smoke = "python -m pytest --capture=tee-sys -m smoke"
+smoke = "python scripts/run_tests.py run smoke"
-# Run all tests in order: unit, integration, smoke
+
-# (for CI: run these three scripts in sequence)
+# CI/internal split coverage commands (for Codecov artifact uploads)
-test = { composite = ["utest", "itest", "smoke"] }
+"ci:coverage:prepare" = "python scripts/run_tests.py ci-prepare"
-# Run all tests in a single invocation for unified summary (unit tests run first)
+"ci:test:unit" = "python scripts/run_tests.py ci-run --marker \"not itest and not smoke\" --coverage-file .temp/.coverage-unit.sqlite --xml-file .temp/coverage-unit.xml"
-"test:unified" = "python -m pytest --capture=tee-sys"
+"ci:test:integration" = "python scripts/run_tests.py ci-run --marker \"itest and not smoke\" --coverage-file .temp/.coverage-itest.sqlite --xml-file .temp/coverage-integration.xml --workers 0"
-#
+"ci:test:smoke" = "python scripts/run_tests.py ci-run --marker smoke --coverage-file .temp/.coverage-smoke.sqlite --xml-file .temp/coverage-smoke.xml"
 # Coverage scripts:
 # - Each group writes its own data file to .temp/.coverage.<group>.xml
 #
 "coverage:prepare" = { shell = "python scripts/coverage_helper.py prepare" }
 "test:cov"  = { composite = ["coverage:prepare", "utest:cov", "itest:cov", "smoke:cov", "coverage:combine"] }
 "utest:cov" = { shell = "python scripts/coverage_helper.py run .temp/.coverage-unit.sqlite .temp/coverage-unit.xml \"not itest and not smoke\"" }
 "itest:cov" = { shell = "python scripts/coverage_helper.py run .temp/.coverage-itest.sqlite .temp/coverage-integration.xml \"itest and not smoke\" -n 0" }
 "smoke:cov" = { shell = "python scripts/coverage_helper.py run .temp/.coverage-smoke.sqlite .temp/coverage-smoke.xml smoke" }
 "coverage:combine" = { shell = "python scripts/coverage_helper.py combine .temp/.coverage-unit.sqlite .temp/.coverage-itest.sqlite .temp/.coverage-smoke.sqlite" }
 # Run all tests with coverage in a single invocation
 "test:cov:unified" = "python -m pytest --capture=tee-sys --cov=src/kleinanzeigen_bot --cov-report=term-missing"
 # Test script structure:
-# - Composite test groups (unit, integration, smoke) are run in order to fail fast and surface critical errors early.
+# - `scripts/run_tests.py` is the single implementation for public and CI test execution.
-# - This prevents running all tests if a foundational component is broken, saving time.
+# - `test` is the canonical unified command.
-# - Each group is covered and reported separately.
+# - Split groups (`utest`, `itest`, `smoke`) remain for targeted runs.
 # - `itest` remains serial (-n 0) for browser stability.
 # - CI uses `ci:*` commands for per-suite XML outputs consumed by Codecov.
 #
 # See docs/TESTING.md for more details.
@@ -347,10 +338,8 @@ testpaths = [
 addopts = """
  --strict-markers
  --doctest-modules
  --cov=src/kleinanzeigen_bot
  --cov-report=term-missing
  -n auto
  --durations=25
  --durations-min=0.5
 """
 markers = [
  "slow: marks a test as long running",
@@ -371,6 +360,7 @@ filterwarnings = [
 data_file = ".temp/coverage.sqlite"
 branch = true  # track branch coverage
 relative_files = true
 disable_warnings = ["no-data-collected"]
 [tool.coverage.report]
 precision = 2
--- a/scripts/coverage_helper.py
+++ b/scripts/coverage_helper.py
@@ -1,116 +0,0 @@
 """Utility helpers for the coverage pipeline used by the pdm test scripts."""
 # SPDX-FileCopyrightText: © Jens Bergmann and contributors
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
 from __future__ import annotations
 import argparse
 import logging
 import os
 import subprocess  # noqa: S404 subprocess usage is limited to known internal binaries
 import sys
 from pathlib import Path
 ROOT = Path(__file__).resolve().parent.parent
 TEMP = ROOT / ".temp"
 logging.basicConfig(level = logging.INFO, format = "%(asctime)s %(levelname)s %(name)s %(message)s")
 logger = logging.getLogger(__name__)
 def prepare() -> None:
    logger.info("Preparing coverage artifacts in %s", TEMP)
    try:
        TEMP.mkdir(parents = True, exist_ok = True)
        removed_patterns = 0
        for pattern in ("coverage-*.xml", ".coverage-*.sqlite"):
            for coverage_file in TEMP.glob(pattern):
                coverage_file.unlink()
                removed_patterns += 1
        removed_paths = 0
        for path in (TEMP / "coverage.sqlite", ROOT / ".coverage"):
            if path.exists():
                path.unlink()
                removed_paths += 1
    except Exception as exc:  # noqa: S110 suppress to log
        logger.exception("Failed to clean coverage artifacts: %s", exc)
        raise
    logger.info(
        "Removed %d pattern-matching files and %d fixed paths during prepare",
        removed_patterns,
        removed_paths,
    )
 def run_suite(data_file:Path, xml_file:Path, marker:str, extra_args:list[str]) -> None:
    os.environ["COVERAGE_FILE"] = str(ROOT / data_file)
    cmd = [
        sys.executable,
        "-m",
        "pytest",
        "--capture=tee-sys",
        "-m",
        marker,
        "--cov=src/kleinanzeigen_bot",
        f"--cov-report=xml:{ROOT / xml_file}",
    ]
    if extra_args:
        cmd.extend(extra_args)
    logger.info("Running pytest marker=%s coverage_data=%s xml=%s", marker, data_file, xml_file)
    subprocess.run(cmd, cwd = ROOT, check = True)  # noqa: S603 arguments are constant and controlled
    logger.info("Pytest marker=%s finished", marker)
 def combine(data_files:list[Path]) -> None:
    combined = TEMP / "coverage.sqlite"
    os.environ["COVERAGE_FILE"] = str(combined)
    resolved = []
    missing = []
    for data in data_files:
        candidate = ROOT / data
        if not candidate.exists():
            missing.append(str(candidate))
        else:
            resolved.append(candidate)
    if missing:
        message = f"Coverage data files missing: {', '.join(missing)}"
        logger.error(message)
        raise FileNotFoundError(message)
    cmd = [sys.executable, "-m", "coverage", "combine"] + [str(path) for path in resolved]
    logger.info("Combining coverage data files: %s", ", ".join(str(path) for path in resolved))
    subprocess.run(cmd, cwd = ROOT, check = True)  # noqa: S603 arguments controlled by this script
    logger.info("Coverage combine completed, generating report")
    subprocess.run([sys.executable, "-m", "coverage", "report", "-m"], cwd = ROOT, check = True)  # noqa: S603
 def main() -> None:
    parser = argparse.ArgumentParser(description = "Coverage helper commands")
    subparsers = parser.add_subparsers(dest = "command", required = True)
    subparsers.add_parser("prepare", help = "Clean coverage artifacts")
    run_parser = subparsers.add_parser("run", help = "Run pytest with a custom coverage file")
    run_parser.add_argument("data_file", type = Path, help = "Coverage data file to write")
    run_parser.add_argument("xml_file", type = Path, help = "XML report path")
    run_parser.add_argument("marker", help = "pytest marker expression")
    combine_parser = subparsers.add_parser("combine", help = "Combine coverage data files")
    combine_parser.add_argument(
        "data_files",
        nargs = "+",
        type = Path,
        help = "List of coverage data files to combine",
    )
    args, extra_args = parser.parse_known_args()
    if args.command == "prepare":
        prepare()
    elif args.command == "run":
        run_suite(args.data_file, args.xml_file, args.marker, extra_args)
    else:
        combine(args.data_files)
 if __name__ == "__main__":
    main()
--- a/scripts/run_tests.py
+++ b/scripts/run_tests.py
@@ -0,0 +1,165 @@
 # SPDX-FileCopyrightText: © Jens Bergmann and contributors
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
 """Unified pytest runner for public and CI test execution.
 This module invokes pytest via ``pytest.main()``. Programmatic callers should
 avoid repeated in-process invocations because Python's import cache can retain
 test module state between runs. CLI usage via ``pdm run`` is unaffected because
 each invocation runs in a fresh process.
 """
 from __future__ import annotations
 import argparse
 import os
 import sys
 from pathlib import Path
 from typing import Final
 import pytest
 ROOT:Final = Path(__file__).resolve().parent.parent
 TEMP:Final = ROOT / ".temp"
 # Most tests are currently unmarked, so utest intentionally uses negative markers
 # to select the default "unit-like" population while excluding integration/smoke.
 PROFILE_CONFIGS:Final[dict[str, tuple[str | None, str]]] = {
    "test": (None, "auto"),
    "utest": ("not itest and not smoke", "auto"),
    "itest": ("itest and not smoke", "0"),
    "smoke": ("smoke", "auto"),
 }
 def _append_verbosity(pytest_args:list[str], verbosity:int) -> None:
    if verbosity == 0:
        pytest_args.append("-q")
    else:
        pytest_args.append("-" + ("v" * verbosity))
        pytest_args.extend([
            "--durations=25",
            "--durations-min=0.5",
        ])
 def _pytest_base_args(*, workers:str, verbosity:int) -> list[str]:
    # Stable pytest defaults (strict markers, doctest, coverage) live in pyproject addopts.
    # This runner only adds dynamic execution policy (workers and verbosity).
    pytest_args = [
        "-n",
        workers,
    ]
    _append_verbosity(pytest_args, verbosity)
    return pytest_args
 def _resolve_path(path:Path) -> Path:
    if path.is_absolute():
        return path
    return ROOT / path
 def _display_path(path:Path) -> str:
    try:
        return str(path.relative_to(ROOT))
    except ValueError:
        return str(path)
 def _cleanup_coverage_artifacts() -> None:
    TEMP.mkdir(parents = True, exist_ok = True)
    for pattern in ("coverage-*.xml", ".coverage-*.sqlite"):
        for stale_file in TEMP.glob(pattern):
            stale_file.unlink(missing_ok = True)
    for stale_path in (TEMP / "coverage.sqlite", ROOT / ".coverage"):
        stale_path.unlink(missing_ok = True)
 def _run_profile(*, profile:str, verbosity:int, passthrough:list[str]) -> int:
    marker, workers = PROFILE_CONFIGS[profile]
    pytest_args = _pytest_base_args(workers = workers, verbosity = verbosity)
    if marker is not None:
        pytest_args.extend(["-m", marker])
    pytest_args.extend(passthrough)
    return pytest.main(pytest_args)
 def _run_ci(*, marker:str, coverage_file:Path, xml_file:Path, workers:str, verbosity:int, passthrough:list[str]) -> int:
    resolved_coverage_file = _resolve_path(coverage_file)
    resolved_xml_file = _resolve_path(xml_file)
    resolved_coverage_file.parent.mkdir(parents = True, exist_ok = True)
    resolved_xml_file.parent.mkdir(parents = True, exist_ok = True)
    previous_coverage_file = os.environ.get("COVERAGE_FILE")
    os.environ["COVERAGE_FILE"] = str(resolved_coverage_file)
    pytest_args = _pytest_base_args(workers = workers, verbosity = verbosity)
    pytest_args.extend([
        "-m",
        marker,
        f"--cov-report=xml:{_display_path(resolved_xml_file)}",
    ])
    pytest_args.extend(passthrough)
    try:
        return pytest.main(pytest_args)
    finally:
        if previous_coverage_file is None:
            os.environ.pop("COVERAGE_FILE", None)
        else:
            os.environ["COVERAGE_FILE"] = previous_coverage_file
 def _build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(description = "Run project tests")
    subparsers = parser.add_subparsers(dest = "command", required = True)
    run_parser = subparsers.add_parser("run", help = "Run tests for a predefined profile")
    run_parser.add_argument("profile", choices = sorted(PROFILE_CONFIGS))
    run_parser.add_argument("-v", "--verbose", action = "count", default = 0)
    subparsers.add_parser("ci-prepare", help = "Clean stale coverage artifacts")
    ci_run_parser = subparsers.add_parser("ci-run", help = "Run tests with explicit coverage outputs")
    ci_run_parser.add_argument("--marker", required = True)
    ci_run_parser.add_argument("--coverage-file", type = Path, required = True)
    ci_run_parser.add_argument("--xml-file", type = Path, required = True)
    ci_run_parser.add_argument("-n", "--workers", default = "auto")
    ci_run_parser.add_argument("-v", "--verbose", action = "count", default = 0)
    return parser
 def main(argv:list[str] | None = None) -> int:
    os.chdir(ROOT)
    effective_argv = sys.argv[1:] if argv is None else argv
    parser = _build_parser()
    args, passthrough = parser.parse_known_args(effective_argv)
    # This entrypoint is intended for one-shot CLI usage, not same-process
    # repeated invocations that can reuse imports loaded by pytest.main().
    if args.command == "run":
        return _run_profile(profile = args.profile, verbosity = args.verbose, passthrough = passthrough)
    if args.command == "ci-prepare":
        _cleanup_coverage_artifacts()
        return 0
    if args.command == "ci-run":
        return _run_ci(
            marker = args.marker,
            coverage_file = args.coverage_file,
            xml_file = args.xml_file,
            workers = args.workers,
            verbosity = args.verbose,
            passthrough = passthrough,
        )
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())