From 398286bcbc001036b04f958b2724d7262ad3f51e Mon Sep 17 00:00:00 2001 From: Jens <1742418+1cu@users.noreply.github.com> Date: Mon, 16 Feb 2026 16:56:31 +0100 Subject: [PATCH] ci: check generated schema and default config artifacts (#825) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## ℹ️ Description - Link to the related issue(s): N/A - Add a CI guard that fails when generated artifacts are out of sync, motivated by preventing missing schema updates and keeping generated reference files current. - Add a committed `docs/config.default.yaml` as a user-facing default configuration reference. ## 📋 Changes Summary - Add `scripts/check_generated_artifacts.py` to regenerate schema artifacts and compare tracked outputs (`schemas/*.json` and `docs/config.default.yaml`) against generated content. - Run the new artifact consistency check in CI via `.github/workflows/build.yml`. - Add `pdm run generate-config` and `pdm run generate-artifacts` tasks, with a cross-platform-safe delete in `generate-config`. - Add generated `docs/config.default.yaml` and document it in `docs/CONFIGURATION.md`. - Update `schemas/config.schema.json` with the `diagnostics.timing_collection` property generated from the model. ### ⚙️ Type of Change Select the type(s) of change(s) included in this pull request: - [ ] 🐞 Bug fix (non-breaking change which fixes an issue) - [x] ✨ New feature (adds new functionality without breaking existing usage) - [ ] 💥 Breaking change (changes that might break existing user setups, scripts, or configurations) ## ✅ Checklist Before requesting a review, confirm the following: - [x] I have reviewed my changes to ensure they meet the project's standards. - [x] I have tested my changes and ensured that all tests pass (`pdm run test`). - [x] I have formatted the code (`pdm run format`). - [x] I have verified that linting passes (`pdm run lint`). - [x] I have updated documentation where necessary. By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. ## Summary by CodeRabbit * **Documentation** * Added a reference link to the default configuration snapshot for easier access to baseline settings. * **Chores** * Added a CI build-time check that validates generated schemas and the default config and alerts when regeneration is needed. * Added scripts to generate the default config and to sequence artifact generation. * Added a utility to produce standardized schema content and compare generated artifacts. * Minor tweak to schema generation success messaging. --- .github/workflows/build.yml | 5 + docs/CONFIGURATION.md | 2 + docs/config.default.yaml | 312 +++++++++++++++++++++++++++ pyproject.toml | 2 + schemas/config.schema.json | 6 + scripts/check_generated_artifacts.py | 143 ++++++++++++ scripts/generate_schemas.py | 17 +- scripts/schema_utils.py | 21 ++ 8 files changed, 497 insertions(+), 11 deletions(-) create mode 100644 docs/config.default.yaml create mode 100644 scripts/check_generated_artifacts.py create mode 100644 scripts/schema_utils.py diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6f143b6..55bdc03 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -145,6 +145,11 @@ jobs: run: pdm show + - name: Check generated schemas and default docs config + if: matrix.os == 'ubuntu-latest' && matrix.PYTHON_VERSION == '3.14' + run: pdm run python scripts/check_generated_artifacts.py + + - name: Check with pip-audit # until https://github.com/astral-sh/ruff/issues/8277 run: diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 208f8a0..67591b8 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -14,6 +14,8 @@ For full JSON schema with IDE autocompletion support, see: - [schemas/config.schema.json](https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot/main/schemas/config.schema.json) +A reference snapshot of default values is available at [docs/config.default.yaml](https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot/main/docs/config.default.yaml). + To enable IDE autocompletion in `config.yaml`, add this at the top of the file: ```yaml diff --git a/docs/config.default.yaml b/docs/config.default.yaml new file mode 100644 index 0000000..7bc4d81 --- /dev/null +++ b/docs/config.default.yaml @@ -0,0 +1,312 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/Second-Hand-Friends/kleinanzeigen-bot/main/schemas/config.schema.json + +# glob (wildcard) patterns to select ad configuration files +# if relative paths are specified, then they are relative to this configuration file +ad_files: + - ./**/ad_*.{json,yml,yaml} + +# ################################################################################ +# Default values for ads, can be overwritten in each ad configuration file +ad_defaults: + + # whether the ad should be published (false = skip this ad) + active: true + + # type of the ad listing + # Examples (choose one): + # • OFFER + # • WANTED + type: OFFER + + # text to prepend to each ad (optional) + description_prefix: '' + + # text to append to each ad (optional) + description_suffix: '' + + # pricing strategy for the listing + # Examples (choose one): + # • FIXED + # • NEGOTIABLE + # • GIVE_AWAY + # • NOT_APPLICABLE + price_type: NEGOTIABLE + + # automatic price reduction configuration for reposted ads + auto_price_reduction: + + # automatically lower the price of reposted ads + enabled: false + + # reduction strategy (required when enabled: true). PERCENTAGE = % of price, FIXED = absolute amount + # Examples (choose one): + # • PERCENTAGE + # • FIXED + strategy: + + # reduction amount (required when enabled: true). For PERCENTAGE: use percent value (e.g., 10 = 10%%). For FIXED: use currency amount + # Examples (choose one): + # • 10.0 + # • 5.0 + # • 20.0 + amount: + + # minimum price floor (required when enabled: true). Use 0 for no minimum + # Examples (choose one): + # • 1.0 + # • 5.0 + # • 10.0 + min_price: + + # number of reposts to wait before applying the first automatic price reduction + delay_reposts: 0 + + # number of days to wait after publication before applying automatic price reductions + delay_days: 0 + + # shipping method for the item + # Examples (choose one): + # • PICKUP + # • SHIPPING + # • NOT_APPLICABLE + shipping_type: SHIPPING + + # enable direct purchase option (only works when shipping_type is SHIPPING) + sell_directly: false + + # default image glob patterns (optional). Leave empty for no default images + # Example usage: + # images: + # - "images/*.jpg" + # - "photos/*.{png,jpg}" + images: [] + + # default contact information for ads + contact: + + # contact name displayed on the ad + name: '' + + # street address for the listing + street: '' + + # postal/ZIP code for the listing location + zipcode: '' + + # city or locality of the listing (can include multiple districts) + # Example: Sample Town - District One + location: '' + + # phone number for contact - only available for commercial accounts, personal accounts no longer support this + # Example: "01234 567890" + phone: '' + + # number of days between automatic republication of ads + republication_interval: 7 + +# ################################################################################ +# additional name to category ID mappings (optional). Leave as {} if not needed. See full list at: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/blob/main/src/kleinanzeigen_bot/resources/categories.yaml To add: use format 'Category > Subcategory': 'ID' +# Examples (choose one): +# • "Elektronik > Notebooks": "161/278" +# • "Jobs > Praktika": "102/125" +categories: {} + +# ################################################################################ +download: + + # if true, all shipping options matching the package size will be included + include_all_matching_shipping_options: false + + # shipping options to exclude (optional). Leave as [] to include all. Add items like 'DHL_2' to exclude specific carriers + # Example usage: + # excluded_shipping_options: + # - "DHL_2" + # - "DHL_5" + # - "Hermes" + excluded_shipping_options: [] + + # maximum length for folder names when downloading ads (default: 100) + folder_name_max_length: 100 + + # if true, rename existing folders without titles to include titles (default: false) + rename_existing_folders: false + +# ################################################################################ +publishing: + + # when to delete old versions of republished ads + # Examples (choose one): + # • BEFORE_PUBLISH + # • AFTER_PUBLISH + # • NEVER + delete_old_ads: AFTER_PUBLISH + + # match old ads by title when deleting (only works with BEFORE_PUBLISH) + delete_old_ads_by_title: true + +# ################################################################################ +# Browser configuration +browser: + + # additional Chromium command line switches (optional). Leave as [] for default behavior. See https://peter.sh/experiments/chromium-command-line-switches/ Common: --headless (no GUI), --disable-dev-shm-usage (Docker fix), --user-data-dir=/path + # Example usage: + # arguments: + # - "--headless" + # - "--disable-dev-shm-usage" + # - "--user-data-dir=/path/to/profile" + arguments: [] + + # path to custom browser executable (optional). Leave empty to use system default + binary_location: '' + + # Chrome extensions to load (optional). Leave as [] for no extensions. Add .crx file paths relative to config file + # Example usage: + # extensions: + # - "extensions/adblock.crx" + # - "/absolute/path/to/extension.crx" + extensions: [] + + # open browser in private/incognito mode (recommended to avoid cookie conflicts) + use_private_window: true + + # custom browser profile directory (optional). Leave empty for auto-configured default + user_data_dir: '' + + # browser profile name (optional). Leave empty for default profile + # Example: "Profile 1" + profile_name: '' + +# ################################################################################ +# Login credentials +login: + + # kleinanzeigen.de login email or username + username: changeme + + # kleinanzeigen.de login password + password: changeme + +# ################################################################################ +captcha: + + # if true, abort when captcha is detected and auto-retry after restart_delay (if false, wait for manual solving) + auto_restart: false + + # duration to wait before retrying after captcha detection (e.g., 1h30m, 6h, 30m) + # Examples (choose one): + # • 6h + # • 1h30m + # • 30m + restart_delay: 6h + +# ################################################################################ +# Update check configuration +update_check: + + # whether to check for updates on startup + enabled: true + + # which release channel to check (latest = stable, preview = prereleases) + # Examples (choose one): + # • latest + # • preview + channel: latest + + # how often to check for updates (e.g., 7d, 1d). If invalid, too short (<1d), or too long (>30d), uses defaults: 1d for 'preview' channel, 7d for 'latest' channel + # Examples (choose one): + # • 7d + # • 1d + # • 14d + interval: 7d + +# ################################################################################ +# Centralized timeout configuration. +timeouts: + + # Global multiplier applied to all timeout values. + multiplier: 1.0 + + # Baseline timeout for DOM interactions. + default: 5.0 + + # Page load timeout for web_open. + page_load: 15.0 + + # Timeout for captcha iframe detection. + captcha_detection: 2.0 + + # Timeout for SMS verification prompts. + sms_verification: 4.0 + + # Timeout for email verification prompts. + email_verification: 4.0 + + # Timeout for GDPR/consent dialogs. + gdpr_prompt: 10.0 + + # Timeout for detecting existing login session via DOM elements. + login_detection: 10.0 + + # Timeout for publishing result checks. + publishing_result: 300.0 + + # Timeout for publish confirmation redirect. + publishing_confirmation: 20.0 + + # Timeout for image upload and server-side processing. + image_upload: 30.0 + + # Timeout for initial pagination lookup. + pagination_initial: 10.0 + + # Timeout for subsequent pagination navigation. + pagination_follow_up: 5.0 + + # Generic short timeout for transient UI. + quick_dom: 2.0 + + # Timeout for GitHub update checks. + update_check: 10.0 + + # Timeout for local remote-debugging probes. + chrome_remote_probe: 2.0 + + # Timeout for remote debugging API calls. + chrome_remote_debugging: 5.0 + + # Timeout for chrome --version subprocesses. + chrome_binary_detection: 10.0 + + # Enable built-in retry/backoff for DOM operations. + retry_enabled: true + + # Max retry attempts when retry is enabled. + retry_max_attempts: 2 + + # Exponential factor applied per retry attempt. + retry_backoff_factor: 1.5 + +# ################################################################################ +# diagnostics capture configuration for troubleshooting +diagnostics: + + # Enable diagnostics capture for specific operations. + capture_on: + + # Capture screenshot and HTML when login state detection fails + login_detection: false + + # Capture screenshot, HTML, and JSON on publish failures + publish: false + + # If true, copy the entire bot log file when diagnostics are captured (may duplicate log content). + capture_log_copy: false + + # If true, pause (interactive runs only) after capturing login detection diagnostics so that user can inspect the browser. Requires capture_on.login_detection to be enabled. + pause_on_login_detection_failure: false + + # Optional output directory for diagnostics artifacts. If omitted, a safe default is used based on installation mode. + output_dir: + + # If true, collect local timeout timing data and write it to diagnostics JSON for troubleshooting and tuning. + timing_collection: true diff --git a/pyproject.toml b/pyproject.toml index b2e3cf1..863470c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -88,6 +88,8 @@ debug = "python -m pdb -m kleinanzeigen_bot" # build & packaging generate-schemas = "python scripts/generate_schemas.py" +generate-config = { shell = "python -c \"from pathlib import Path; Path('docs/config.default.yaml').unlink(missing_ok=True)\" && python -m kleinanzeigen_bot --config docs/config.default.yaml create-config" } +generate-artifacts = { composite = ["generate-schemas", "generate-config"] } compile.cmd = "python -O -m PyInstaller pyinstaller.spec --clean --workpath .temp" compile.env = {PYTHONHASHSEED = "1", SOURCE_DATE_EPOCH = "0"} # https://pyinstaller.org/en/stable/advanced-topics.html#creating-a-reproducible-build diff --git a/schemas/config.schema.json b/schemas/config.schema.json index c9b77a0..3fc690b 100644 --- a/schemas/config.schema.json +++ b/schemas/config.schema.json @@ -455,6 +455,12 @@ "default": null, "description": "Optional output directory for diagnostics artifacts. If omitted, a safe default is used based on installation mode.", "title": "Output Dir" + }, + "timing_collection": { + "default": true, + "description": "If true, collect local timeout timing data and write it to diagnostics JSON for troubleshooting and tuning.", + "title": "Timing Collection", + "type": "boolean" } }, "title": "DiagnosticsConfig", diff --git a/scripts/check_generated_artifacts.py b/scripts/check_generated_artifacts.py new file mode 100644 index 0000000..1e123c5 --- /dev/null +++ b/scripts/check_generated_artifacts.py @@ -0,0 +1,143 @@ +# SPDX-FileCopyrightText: © Jens Bergmann and contributors +# SPDX-License-Identifier: AGPL-3.0-or-later +# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ +"""CI guard: verifies generated schema and default-config artifacts are up-to-date.""" + +from __future__ import annotations + +import difflib +import subprocess # noqa: S404 +import sys +import tempfile +from pathlib import Path +from typing import TYPE_CHECKING, Final + +from schema_utils import generate_schema_content + +from kleinanzeigen_bot.model.ad_model import AdPartial +from kleinanzeigen_bot.model.config_model import Config + +if TYPE_CHECKING: + from pydantic import BaseModel + +SCHEMA_DEFINITIONS:Final[tuple[tuple[str, type[BaseModel], str], ...]] = ( + ("schemas/config.schema.json", Config, "Config"), + ("schemas/ad.schema.json", AdPartial, "Ad"), +) +DEFAULT_CONFIG_PATH:Final[Path] = Path("docs/config.default.yaml") + + +def generate_default_config_via_cli(path:Path, repo_root:Path) -> None: + """ + Run `python -m kleinanzeigen_bot --config create-config` to generate a default config snapshot. + """ + try: + subprocess.run( # noqa: S603 trusted, static command arguments + [ + sys.executable, + "-m", + "kleinanzeigen_bot", + "--config", + str(path), + "create-config", + ], + cwd = repo_root, + check = True, + timeout = 60, + capture_output = True, + text = True, + ) + except subprocess.CalledProcessError as error: + stderr = error.stderr.strip() if error.stderr else "" + stdout = error.stdout.strip() if error.stdout else "" + raise RuntimeError( + "Failed to generate default config via CLI.\n" + f"Return code: {error.returncode}\n" + f"stderr:\n{stderr}\n" + f"stdout:\n{stdout}" + ) from error + + +def get_schema_diffs(repo_root:Path) -> dict[str, str]: + """ + Compare committed schema files with freshly generated schema content and return unified diffs per path. + """ + diffs:dict[str, str] = {} + for schema_path, model, schema_name in SCHEMA_DEFINITIONS: + expected_schema_path = repo_root / schema_path + expected = expected_schema_path.read_text(encoding = "utf-8") if expected_schema_path.is_file() else "" + + generated = generate_schema_content(model, schema_name) + if expected == generated: + continue + + diffs[schema_path] = "".join( + difflib.unified_diff( + expected.splitlines(keepends = True), + generated.splitlines(keepends = True), + fromfile = schema_path, + tofile = f"", + ) + ) + + return diffs + + +def get_default_config_diff(repo_root:Path) -> str: + """ + Compare docs/config.default.yaml with a freshly generated config artifact and return a unified diff string. + """ + expected_config_path = repo_root / DEFAULT_CONFIG_PATH + if not expected_config_path.is_file(): + raise FileNotFoundError(f"Missing required default config file: {DEFAULT_CONFIG_PATH}") + + with tempfile.TemporaryDirectory() as tmpdir: + generated_config_path = Path(tmpdir) / "config.default.yaml" + generate_default_config_via_cli(generated_config_path, repo_root) + + expected = expected_config_path.read_text(encoding = "utf-8") + generated = generated_config_path.read_text(encoding = "utf-8") + + if expected == generated: + return "" + + return "".join( + difflib.unified_diff( + expected.splitlines(keepends = True), + generated.splitlines(keepends = True), + fromfile = str(DEFAULT_CONFIG_PATH), + tofile = "", + ) + ) + + +def main() -> None: + repo_root = Path(__file__).resolve().parent.parent + + schema_diffs = get_schema_diffs(repo_root) + default_config_diff = get_default_config_diff(repo_root) + + if schema_diffs or default_config_diff: + messages:list[str] = ["Generated artifacts are not up-to-date."] + + if schema_diffs: + messages.append("Outdated schema files detected:") + for path, schema_diff in schema_diffs.items(): + messages.append(f"- {path}") + messages.append(schema_diff) + + if default_config_diff: + messages.append("Outdated docs/config.default.yaml detected.") + messages.append(default_config_diff) + + messages.append("Regenerate with one of the following:") + messages.append("- Schema files: pdm run generate-schemas") + messages.append("- Default config snapshot: pdm run generate-config") + messages.append("- Both: pdm run generate-artifacts") + raise SystemExit("\n".join(messages)) + + print("Generated schemas and docs/config.default.yaml are up-to-date.") + + +if __name__ == "__main__": + main() diff --git a/scripts/generate_schemas.py b/scripts/generate_schemas.py index 889a819..619de1f 100644 --- a/scripts/generate_schemas.py +++ b/scripts/generate_schemas.py @@ -1,33 +1,28 @@ # SPDX-FileCopyrightText: © Sebastian Thomschke and contributors # SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ -import json from pathlib import Path -from typing import Type from pydantic import BaseModel +from schema_utils import generate_schema_content from kleinanzeigen_bot.model.ad_model import AdPartial from kleinanzeigen_bot.model.config_model import Config -def generate_schema(model:Type[BaseModel], name:str, out_dir:Path) -> None: +def generate_schema(model:type[BaseModel], name:str, out_dir:Path) -> None: """ Generate and write JSON schema for the given model. """ print(f"[+] Generating schema for model [{name}]...") - # Create JSON Schema dict - schema = model.model_json_schema(mode = "validation") - schema.setdefault("title", f"{name} Schema") - schema.setdefault("description", f"Auto-generated JSON Schema for {name}") + schema_content = generate_schema_content(model, name) # Write JSON json_path = out_dir / f"{name.lower()}.schema.json" - with json_path.open("w", encoding = "utf-8") as f_json: - json.dump(schema, f_json, indent = 2) - f_json.write("\n") - print(f"[✓] {json_path}") + with json_path.open("w", encoding = "utf-8") as json_file: + json_file.write(schema_content) + print(f"[OK] {json_path}") project_root = Path(__file__).parent.parent diff --git a/scripts/schema_utils.py b/scripts/schema_utils.py new file mode 100644 index 0000000..08445f5 --- /dev/null +++ b/scripts/schema_utils.py @@ -0,0 +1,21 @@ +# SPDX-FileCopyrightText: © Jens Bergmann and contributors +# SPDX-License-Identifier: AGPL-3.0-or-later +# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ + +from __future__ import annotations + +import json +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from pydantic import BaseModel + + +def generate_schema_content(model:type[BaseModel], name:str) -> str: + """ + Build normalized JSON schema output for project models. + """ + schema = model.model_json_schema(mode = "validation") + schema.setdefault("title", f"{name} Schema") + schema.setdefault("description", f"Auto-generated JSON Schema for {name}") + return json.dumps(schema, indent = 2) + "\n"