mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 10:31:50 +01:00
feat: collect timeout timing sessions for diagnostics (#814)
This commit is contained in:
@@ -24,6 +24,7 @@ from .utils.exceptions import CaptchaEncountered
|
||||
from .utils.files import abspath
|
||||
from .utils.i18n import Locale, get_current_locale, pluralize, set_current_locale
|
||||
from .utils.misc import ainput, ensure, is_frozen
|
||||
from .utils.timing_collector import TimingCollector
|
||||
from .utils.web_scraping_mixin import By, Element, Is, WebScrapingMixin
|
||||
|
||||
# W0406: possibly a bug, see https://github.com/PyCQA/pylint/issues/3933
|
||||
@@ -179,13 +180,14 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
|
||||
self._log_basename = os.path.splitext(os.path.basename(sys.executable))[0] if is_frozen() else self.__module__
|
||||
self.log_file_path:str | None = abspath(f"{self._log_basename}.log")
|
||||
self._logfile_arg:str | None = None
|
||||
self._logfile_explicitly_provided = False
|
||||
self._logfile_explicitly_provided:bool = False
|
||||
|
||||
self.command = "help"
|
||||
self.ads_selector = "due"
|
||||
self.keep_old_ads = False
|
||||
|
||||
self._login_detection_diagnostics_captured:bool = False
|
||||
self._timing_collector:TimingCollector | None = None
|
||||
|
||||
def __del__(self) -> None:
|
||||
if self.file_log:
|
||||
@@ -393,6 +395,12 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
|
||||
sys.exit(2)
|
||||
finally:
|
||||
self.close_browser_session()
|
||||
if self._timing_collector is not None:
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
await loop.run_in_executor(None, self._timing_collector.flush)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
LOG.warning("Timing collector flush failed: %s", exc)
|
||||
|
||||
def show_help(self) -> None:
|
||||
if is_frozen():
|
||||
@@ -613,6 +621,13 @@ class KleinanzeigenBot(WebScrapingMixin): # noqa: PLR0904
|
||||
config_yaml = dicts.load_dict_if_exists(self.config_file_path, _("config"))
|
||||
self.config = Config.model_validate(config_yaml, strict = True, context = self.config_file_path)
|
||||
|
||||
timing_enabled = self.config.diagnostics.timing_collection
|
||||
if timing_enabled and self.workspace:
|
||||
timing_dir = self.workspace.diagnostics_dir.parent / "timing"
|
||||
self._timing_collector = TimingCollector(timing_dir, self.command)
|
||||
else:
|
||||
self._timing_collector = None
|
||||
|
||||
# load built-in category mappings
|
||||
self.categories = dicts.load_dict_from_module(resources, "categories.yaml", "")
|
||||
LOG.debug("Loaded %s categories from categories.yaml", len(self.categories))
|
||||
|
||||
@@ -43,7 +43,7 @@ class AdExtractor(WebScrapingMixin):
|
||||
super().__init__()
|
||||
self.browser = browser
|
||||
self.config:Config = config
|
||||
self.download_dir = download_dir
|
||||
self.download_dir:Path = download_dir
|
||||
self.published_ads_by_id:dict[int, dict[str, Any]] = published_ads_by_id or {}
|
||||
|
||||
async def download_ad(self, ad_id:int) -> None:
|
||||
|
||||
@@ -265,6 +265,10 @@ class DiagnosticsConfig(ContextualModel):
|
||||
default = None,
|
||||
description = "Optional output directory for diagnostics artifacts. If omitted, a safe default is used based on installation mode.",
|
||||
)
|
||||
timing_collection:bool = Field(
|
||||
default = True,
|
||||
description = "If true, collect local timeout timing data and write it to diagnostics JSON for troubleshooting and tuning.",
|
||||
)
|
||||
|
||||
@model_validator(mode = "before")
|
||||
@classmethod
|
||||
|
||||
@@ -261,6 +261,7 @@ kleinanzeigen_bot/__init__.py:
|
||||
"You provided no ads selector. Defaulting to \"new\".": "Es wurden keine Anzeigen-Selektor angegeben. Es wird \"new\" verwendet."
|
||||
"You provided no ads selector. Defaulting to \"changed\".": "Es wurden keine Anzeigen-Selektor angegeben. Es wird \"changed\" verwendet."
|
||||
"Unknown command: %s": "Unbekannter Befehl: %s"
|
||||
"Timing collector flush failed: %s": "Zeitmessdaten konnten nicht gespeichert werden: %s"
|
||||
|
||||
fill_login_data_and_send:
|
||||
"Logging in as [%s]...": "Anmeldung als [%s]..."
|
||||
@@ -527,6 +528,9 @@ kleinanzeigen_bot/utils/web_scraping_mixin.py:
|
||||
"Last page reached (no enabled 'Naechste' button found).": "Letzte Seite erreicht (kein aktivierter 'Naechste'-Button gefunden)."
|
||||
"No pagination controls found. Assuming last page.": "Keine Paginierungssteuerung gefunden. Es wird von der letzten Seite ausgegangen."
|
||||
|
||||
_record_timing:
|
||||
"Timing collector failed for key=%s operation=%s: %s": "Zeitmessung fehlgeschlagen für key=%s operation=%s: %s"
|
||||
|
||||
close_browser_session:
|
||||
"Closing Browser session...": "Schließe Browser-Sitzung..."
|
||||
|
||||
@@ -685,6 +689,15 @@ kleinanzeigen_bot/utils/diagnostics.py:
|
||||
"Diagnostics capture attempted but no artifacts were saved (all captures failed)": "Diagnoseerfassung versucht, aber keine Artefakte gespeichert (alle Erfassungen fehlgeschlagen)"
|
||||
"Diagnostics capture failed: %s": "Diagnoseerfassung fehlgeschlagen: %s"
|
||||
|
||||
#################################################
|
||||
kleinanzeigen_bot/utils/timing_collector.py:
|
||||
#################################################
|
||||
_load_existing_sessions:
|
||||
"Unable to load timing collection data from %s: %s": "Zeitmessdaten aus %s konnten nicht geladen werden: %s"
|
||||
|
||||
flush:
|
||||
"Failed to flush timing collection data: %s": "Zeitmessdaten konnten nicht gespeichert werden: %s"
|
||||
|
||||
#################################################
|
||||
kleinanzeigen_bot/utils/xdg_paths.py:
|
||||
#################################################
|
||||
|
||||
168
src/kleinanzeigen_bot/utils/timing_collector.py
Normal file
168
src/kleinanzeigen_bot/utils/timing_collector.py
Normal file
@@ -0,0 +1,168 @@
|
||||
# SPDX-FileCopyrightText: © Jens Bergmann and contributors
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||
|
||||
"""Collect per-operation timeout timings and persist per-run JSON sessions.
|
||||
|
||||
`TimingCollector` records operation durations in seconds, grouped by a single bot run
|
||||
(`session_id`). Call `record(...)` during runtime and `flush()` once at command end to
|
||||
append the current session to `timing_data.json` with automatic 30-day retention.
|
||||
The collector is best-effort and designed for troubleshooting, not strict telemetry.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json, uuid # isort: skip
|
||||
import os
|
||||
from dataclasses import asdict, dataclass
|
||||
from datetime import timedelta
|
||||
from typing import TYPE_CHECKING, Any, Final
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pathlib import Path
|
||||
|
||||
from kleinanzeigen_bot.utils import loggers, misc
|
||||
|
||||
LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
|
||||
|
||||
RETENTION_DAYS:Final[int] = 30
|
||||
TIMING_FILE:Final[str] = "timing_data.json"
|
||||
|
||||
|
||||
@dataclass
|
||||
class TimingRecord:
|
||||
timestamp:str
|
||||
operation_key:str
|
||||
operation_type:str
|
||||
description:str
|
||||
configured_timeout_sec:float
|
||||
effective_timeout_sec:float
|
||||
actual_duration_sec:float
|
||||
attempt_index:int
|
||||
success:bool
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return asdict(self)
|
||||
|
||||
|
||||
class TimingCollector:
|
||||
def __init__(self, output_dir:Path, command:str) -> None:
|
||||
self.output_dir = output_dir.resolve()
|
||||
self.command = command
|
||||
self.session_id = uuid.uuid4().hex[:8]
|
||||
self.started_at = misc.now().isoformat()
|
||||
self.records:list[TimingRecord] = []
|
||||
self._flushed = False
|
||||
|
||||
LOG.debug("Timing collection initialized (session=%s, output_dir=%s, command=%s)", self.session_id, self.output_dir, command)
|
||||
|
||||
def record(
|
||||
self,
|
||||
*,
|
||||
key:str,
|
||||
operation_type:str,
|
||||
description:str,
|
||||
configured_timeout:float,
|
||||
effective_timeout:float,
|
||||
actual_duration:float,
|
||||
attempt_index:int,
|
||||
success:bool,
|
||||
) -> None:
|
||||
self.records.append(
|
||||
TimingRecord(
|
||||
timestamp = misc.now().isoformat(),
|
||||
operation_key = key,
|
||||
operation_type = operation_type,
|
||||
description = description,
|
||||
configured_timeout_sec = configured_timeout,
|
||||
effective_timeout_sec = effective_timeout,
|
||||
actual_duration_sec = actual_duration,
|
||||
attempt_index = attempt_index,
|
||||
success = success,
|
||||
)
|
||||
)
|
||||
LOG.debug(
|
||||
"Timing captured: %s [%s] duration=%.3fs timeout=%.3fs success=%s",
|
||||
operation_type,
|
||||
key,
|
||||
actual_duration,
|
||||
effective_timeout,
|
||||
success,
|
||||
)
|
||||
|
||||
def flush(self) -> Path | None:
|
||||
if self._flushed:
|
||||
LOG.debug("Timing collection already flushed for this run")
|
||||
return None
|
||||
if not self.records:
|
||||
LOG.debug("Timing collection enabled but no records captured in this run")
|
||||
return None
|
||||
|
||||
try:
|
||||
self.output_dir.mkdir(parents = True, exist_ok = True)
|
||||
data = self._load_existing_sessions()
|
||||
data.append(
|
||||
{
|
||||
"session_id": self.session_id,
|
||||
"command": self.command,
|
||||
"started_at": self.started_at,
|
||||
"ended_at": misc.now().isoformat(),
|
||||
"records": [record.to_dict() for record in self.records],
|
||||
}
|
||||
)
|
||||
|
||||
cutoff = misc.now() - timedelta(days = RETENTION_DAYS)
|
||||
retained:list[dict[str, Any]] = []
|
||||
dropped = 0
|
||||
for session in data:
|
||||
try:
|
||||
parsed = misc.parse_datetime(session.get("started_at"), add_timezone_if_missing = True)
|
||||
except ValueError:
|
||||
parsed = None
|
||||
if parsed is None:
|
||||
dropped += 1
|
||||
continue
|
||||
if parsed >= cutoff:
|
||||
retained.append(session)
|
||||
else:
|
||||
dropped += 1
|
||||
|
||||
if dropped > 0:
|
||||
LOG.debug("Timing collection pruned %d old or malformed sessions", dropped)
|
||||
|
||||
output_file = self.output_dir / TIMING_FILE
|
||||
temp_file = self.output_dir / f".{TIMING_FILE}.{self.session_id}.tmp"
|
||||
with temp_file.open("w", encoding = "utf-8") as fd:
|
||||
json.dump(retained, fd, indent = 2)
|
||||
fd.write("\n")
|
||||
fd.flush()
|
||||
os.fsync(fd.fileno())
|
||||
temp_file.replace(output_file)
|
||||
|
||||
LOG.debug(
|
||||
"Timing collection flushed to %s (%d sessions, %d current records, retention=%d days)",
|
||||
output_file,
|
||||
len(retained),
|
||||
len(self.records),
|
||||
RETENTION_DAYS,
|
||||
)
|
||||
self.records = []
|
||||
self._flushed = True
|
||||
return output_file
|
||||
except Exception as exc: # noqa: BLE001
|
||||
LOG.warning("Failed to flush timing collection data: %s", exc)
|
||||
return None
|
||||
|
||||
def _load_existing_sessions(self) -> list[dict[str, Any]]:
|
||||
file_path = self.output_dir / TIMING_FILE
|
||||
if not file_path.exists():
|
||||
return []
|
||||
|
||||
try:
|
||||
with file_path.open(encoding = "utf-8") as fd:
|
||||
payload = json.load(fd)
|
||||
if isinstance(payload, list):
|
||||
return [item for item in payload if isinstance(item, dict)]
|
||||
except Exception as exc: # noqa: BLE001
|
||||
LOG.warning("Unable to load timing collection data from %s: %s", file_path, exc)
|
||||
return []
|
||||
@@ -183,6 +183,36 @@ class WebScrapingMixin:
|
||||
# Always perform the initial attempt plus the configured number of retries.
|
||||
return 1 + cfg.retry_max_attempts
|
||||
|
||||
def _record_timing(
|
||||
self,
|
||||
*,
|
||||
key:str,
|
||||
description:str,
|
||||
configured_timeout:float,
|
||||
effective_timeout:float,
|
||||
actual_duration:float,
|
||||
attempt_index:int,
|
||||
success:bool,
|
||||
) -> None:
|
||||
collector = getattr(self, "_timing_collector", None)
|
||||
if collector is None:
|
||||
return
|
||||
|
||||
operation_type = description.split("(", 1)[0] if "(" in description else description
|
||||
try:
|
||||
collector.record(
|
||||
key = key,
|
||||
operation_type = operation_type,
|
||||
description = description,
|
||||
configured_timeout = configured_timeout,
|
||||
effective_timeout = effective_timeout,
|
||||
actual_duration = actual_duration,
|
||||
attempt_index = attempt_index,
|
||||
success = success,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
LOG.warning("Timing collector failed for key=%s operation=%s: %s", key, operation_type, exc)
|
||||
|
||||
async def _run_with_timeout_retries(
|
||||
self, operation:Callable[[float], Awaitable[T]], *, description:str, key:str = "default", override:float | None = None
|
||||
) -> T:
|
||||
@@ -190,12 +220,34 @@ class WebScrapingMixin:
|
||||
Execute an async callable with retry/backoff handling for TimeoutError.
|
||||
"""
|
||||
attempts = self._timeout_attempts()
|
||||
configured_timeout = self._timeout(key, override)
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
for attempt in range(attempts):
|
||||
effective_timeout = self._effective_timeout(key, override, attempt = attempt)
|
||||
attempt_started = loop.time()
|
||||
try:
|
||||
return await operation(effective_timeout)
|
||||
result = await operation(effective_timeout)
|
||||
self._record_timing(
|
||||
key = key,
|
||||
description = description,
|
||||
configured_timeout = configured_timeout,
|
||||
effective_timeout = effective_timeout,
|
||||
actual_duration = loop.time() - attempt_started,
|
||||
attempt_index = attempt,
|
||||
success = True,
|
||||
)
|
||||
return result
|
||||
except TimeoutError:
|
||||
self._record_timing(
|
||||
key = key,
|
||||
description = description,
|
||||
configured_timeout = configured_timeout,
|
||||
effective_timeout = effective_timeout,
|
||||
actual_duration = loop.time() - attempt_started,
|
||||
attempt_index = attempt,
|
||||
success = False,
|
||||
)
|
||||
if attempt >= attempts - 1:
|
||||
raise
|
||||
LOG.debug("Retrying %s after TimeoutError (attempt %d/%d, timeout %.1fs)", description, attempt + 1, attempts, effective_timeout)
|
||||
|
||||
Reference in New Issue
Block a user