mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 10:31:50 +01:00
fix: improve Chrome version detection to reuse existing browsers (#615)
This commit is contained in:
@@ -426,31 +426,27 @@ kleinanzeigen_bot/utils/web_scraping_mixin.py:
|
||||
"(fail) Browser binary is not executable": "(Fehler) Browser-Binärdatei ist nicht ausführbar"
|
||||
"(fail) No compatible browser found": "(Fehler) Kein kompatibler Browser gefunden"
|
||||
"(fail) User data directory permissions issue": "(Fehler) Benutzerdatenverzeichnis-Berechtigungsproblem"
|
||||
"(fail) Remote debugging port is not open": "(Fehler) Remote-Debugging-Port ist nicht offen"
|
||||
"(fail) Running as root - this can cause browser connection issues": "(Fehler) Läuft als Root - dies kann Browser-Verbindungsprobleme verursachen"
|
||||
"(info) User data directory does not exist (will be created): %s": "(Info) Benutzerdatenverzeichnis existiert nicht (wird erstellt): %s"
|
||||
"(info) Remote debugging port configured: %d": "(Info) Remote-Debugging-Port konfiguriert: %d"
|
||||
"(info) Remote debugging port is not open": "(Info) Remote-Debugging-Port ist nicht offen"
|
||||
|
||||
"(info) No browser processes currently running": "(Info) Derzeit keine Browser-Prozesse aktiv"
|
||||
"(fail) Running as root - this can cause browser issues": "(Fehler) Läuft als Root - dies kann Browser-Probleme verursachen"
|
||||
|
||||
"(info) Found %d browser processes running": "(Info) %d Browser-Prozesse aktiv gefunden"
|
||||
"(info) Windows detected - check Windows Defender and antivirus software": "(Info) Windows erkannt - überprüfen Sie Windows Defender und Antivirensoftware"
|
||||
"(info) macOS detected - check Gatekeeper and security settings": "(Info) macOS erkannt - überprüfen Sie Gatekeeper und Sicherheitseinstellungen"
|
||||
"(info) Linux detected - check if running as root (not recommended)": "(Info) Linux erkannt - überprüfen Sie, ob als Root ausgeführt wird (nicht empfohlen)"
|
||||
" - PID %d: %s": " - PID %d: %s"
|
||||
" Make sure browser is started with: --remote-debugging-port=%d": " Stellen Sie sicher, dass der Browser gestartet wird mit: --remote-debugging-port=%d"
|
||||
" - PID %d: %s (remote debugging enabled)": " - PID %d: %s (Remote-Debugging aktiviert)"
|
||||
" - PID %d: %s (remote debugging NOT enabled)": " - PID %d: %s (Remote-Debugging NICHT aktiviert)"
|
||||
"(ok) Remote debugging API accessible - Browser: %s": "(ok) Remote-Debugging-API zugänglich - Browser: %s"
|
||||
"(fail) Remote debugging port is open but API not accessible: %s": "(Fehler) Remote-Debugging-Port ist offen, aber API nicht zugänglich: %s"
|
||||
" This might indicate a browser update issue or configuration problem": " Dies könnte auf ein Browser-Update-Problem oder Konfigurationsproblem hinweisen"
|
||||
|
||||
|
||||
|
||||
_validate_chrome_136_configuration:
|
||||
" -> %s 136+ configuration validation failed: %s": " -> %s 136+ Konfigurationsvalidierung fehlgeschlagen: %s"
|
||||
" -> %s 136+ configuration validation passed": " -> %s 136+ Konfigurationsvalidierung bestanden"
|
||||
|
||||
_validate_chrome_version_configuration:
|
||||
" -> %s 136+ detected: %s": " -> %s 136+ erkannt: %s"
|
||||
" -> %s 136+ configuration validation passed": " -> %s 136+ Konfigurationsvalidierung bestanden"
|
||||
" -> %s 136+ configuration validation failed: %s": " -> %s 136+ Konfigurationsvalidierung fehlgeschlagen: %s"
|
||||
" -> %s version detected: %s (pre-136, no special validation required)": " -> %s-Version erkannt: %s (vor 136, keine besondere Validierung erforderlich)"
|
||||
" -> Please update your configuration to include --user-data-dir for remote debugging": " -> Bitte aktualisieren Sie Ihre Konfiguration, um --user-data-dir für Remote-Debugging einzuschließen"
|
||||
" -> Skipping browser version validation in test environment": " -> Browser-Versionsvalidierung in Testumgebung wird übersprungen"
|
||||
" -> Browser version detection failed, skipping validation: %s": " -> Browser-Versionserkennung fehlgeschlagen, Validierung wird übersprungen: %s"
|
||||
" -> Unexpected error during browser version validation, skipping: %s": " -> Unerwarteter Fehler bei Browser-Versionsvalidierung, wird übersprungen: %s"
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
import json
|
||||
import re
|
||||
import subprocess # noqa: S404
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from typing import Any, Final
|
||||
|
||||
@@ -59,6 +60,24 @@ def parse_version_string(version_string:str) -> int:
|
||||
return int(match.group(1))
|
||||
|
||||
|
||||
def _normalize_browser_name(browser_name:str) -> str:
|
||||
"""
|
||||
Normalize browser name for consistent detection.
|
||||
|
||||
Args:
|
||||
browser_name: Raw browser name from detection
|
||||
|
||||
Returns:
|
||||
Normalized browser name
|
||||
"""
|
||||
browser_name_lower = browser_name.lower()
|
||||
if "edge" in browser_name_lower or "edg" in browser_name_lower:
|
||||
return "Edge"
|
||||
if "chromium" in browser_name_lower:
|
||||
return "Chromium"
|
||||
return "Chrome"
|
||||
|
||||
|
||||
def detect_chrome_version_from_binary(binary_path:str) -> ChromeVersionInfo | None:
|
||||
"""
|
||||
Detect Chrome version by running the browser binary.
|
||||
@@ -90,11 +109,7 @@ def detect_chrome_version_from_binary(binary_path:str) -> ChromeVersionInfo | No
|
||||
version_string = version_match.group(1) if version_match else output
|
||||
|
||||
# Determine browser name from binary path
|
||||
browser_name = "Chrome"
|
||||
if "edge" in binary_path.lower():
|
||||
browser_name = "Edge"
|
||||
elif "chromium" in binary_path.lower():
|
||||
browser_name = "Chromium"
|
||||
browser_name = _normalize_browser_name(binary_path)
|
||||
|
||||
return ChromeVersionInfo(version_string, major_version, browser_name)
|
||||
|
||||
@@ -125,7 +140,7 @@ def detect_chrome_version_from_remote_debugging(host:str = "127.0.0.1", port:int
|
||||
|
||||
# Extract version information
|
||||
user_agent = version_data.get("User-Agent", "")
|
||||
browser_name = version_data.get("Browser", "Unknown")
|
||||
browser_name = _normalize_browser_name(version_data.get("Browser", "Unknown"))
|
||||
|
||||
# Parse version from User-Agent string
|
||||
# Example: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.6778.0 Safari/537.36"
|
||||
@@ -139,6 +154,12 @@ def detect_chrome_version_from_remote_debugging(host:str = "127.0.0.1", port:int
|
||||
|
||||
return ChromeVersionInfo(version_string, major_version, browser_name)
|
||||
|
||||
except urllib.error.URLError as e:
|
||||
LOG.debug("Remote debugging API not accessible: %s", e)
|
||||
return None
|
||||
except json.JSONDecodeError as e:
|
||||
LOG.debug("Invalid JSON response from remote debugging API: %s", e)
|
||||
return None
|
||||
except Exception as e:
|
||||
LOG.debug("Failed to detect browser version from remote debugging: %s", str(e))
|
||||
return None
|
||||
@@ -148,7 +169,7 @@ def validate_chrome_136_configuration(browser_arguments:list[str], user_data_dir
|
||||
"""
|
||||
Validate configuration for Chrome/Edge 136+ security requirements.
|
||||
|
||||
Chrome/Edge 136+ requires --user-data-dir to be specified when using --remote-debugging-port.
|
||||
Chrome/Edge 136+ requires --user-data-dir to be specified for security reasons.
|
||||
|
||||
Args:
|
||||
browser_arguments: List of browser arguments
|
||||
@@ -157,15 +178,6 @@ def validate_chrome_136_configuration(browser_arguments:list[str], user_data_dir
|
||||
Returns:
|
||||
Tuple of (is_valid, error_message)
|
||||
"""
|
||||
# Check if remote debugging is enabled
|
||||
has_remote_debugging = any(
|
||||
arg.startswith("--remote-debugging-port=")
|
||||
for arg in browser_arguments
|
||||
)
|
||||
|
||||
if not has_remote_debugging:
|
||||
return True, "" # No remote debugging, no validation needed
|
||||
|
||||
# Check if user-data-dir is specified in arguments
|
||||
has_user_data_dir_arg = any(
|
||||
arg.startswith("--user-data-dir=")
|
||||
@@ -177,7 +189,7 @@ def validate_chrome_136_configuration(browser_arguments:list[str], user_data_dir
|
||||
|
||||
if not has_user_data_dir_arg and not has_user_data_dir_config:
|
||||
return False, (
|
||||
"Chrome/Edge 136+ requires --user-data-dir to be specified when using --remote-debugging-port. "
|
||||
"Chrome/Edge 136+ requires --user-data-dir to be specified. "
|
||||
"Add --user-data-dir=/path/to/directory to your browser arguments and "
|
||||
'user_data_dir: "/path/to/directory" to your configuration.'
|
||||
)
|
||||
|
||||
@@ -19,7 +19,9 @@ from nodriver.core.tab import Tab as Page
|
||||
|
||||
from . import loggers, net
|
||||
from .chrome_version_detector import (
|
||||
ChromeVersionInfo,
|
||||
detect_chrome_version_from_binary,
|
||||
detect_chrome_version_from_remote_debugging,
|
||||
get_chrome_version_diagnostic_info,
|
||||
validate_chrome_136_configuration,
|
||||
)
|
||||
@@ -343,14 +345,43 @@ class WebScrapingMixin:
|
||||
LOG.warning("(fail) Remote debugging port is open but API not accessible: %s", str(e))
|
||||
LOG.info(" This might indicate a browser update issue or configuration problem")
|
||||
else:
|
||||
LOG.error("(fail) Remote debugging port is not open")
|
||||
LOG.info(" Make sure browser is started with: --remote-debugging-port=%d", remote_port)
|
||||
LOG.info("(info) Remote debugging port is not open")
|
||||
|
||||
# Check for running browser processes
|
||||
browser_processes = []
|
||||
target_browser_name = ""
|
||||
|
||||
# Get the target browser name for comparison
|
||||
if self.browser_config.binary_location:
|
||||
target_browser_name = os.path.basename(self.browser_config.binary_location).lower()
|
||||
else:
|
||||
try:
|
||||
target_browser_path = self.get_compatible_browser()
|
||||
target_browser_name = os.path.basename(target_browser_path).lower()
|
||||
except (AssertionError, TypeError):
|
||||
target_browser_name = ""
|
||||
|
||||
for proc in psutil.process_iter(["pid", "name", "cmdline"]):
|
||||
try:
|
||||
if proc.info["name"] and any(browser in proc.info["name"].lower() for browser in ["chrome", "chromium", "edge"]):
|
||||
proc_name = proc.info["name"] or ""
|
||||
cmdline = proc.info["cmdline"] or []
|
||||
|
||||
# Check if this is a browser process relevant to our diagnostics
|
||||
is_relevant_browser = False
|
||||
|
||||
# Is this the target browser?
|
||||
is_target_browser = target_browser_name and target_browser_name in proc_name.lower()
|
||||
|
||||
# Does it have remote debugging?
|
||||
has_remote_debugging = cmdline and any(arg.startswith("--remote-debugging-port=") for arg in cmdline)
|
||||
|
||||
# Detect target browser processes for diagnostics
|
||||
if is_target_browser:
|
||||
is_relevant_browser = True
|
||||
# Add debugging status to the process info for better diagnostics
|
||||
proc.info["has_remote_debugging"] = has_remote_debugging
|
||||
|
||||
if is_relevant_browser:
|
||||
browser_processes.append(proc.info)
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
pass
|
||||
@@ -358,19 +389,17 @@ class WebScrapingMixin:
|
||||
if browser_processes:
|
||||
LOG.info("(info) Found %d browser processes running", len(browser_processes))
|
||||
for proc in browser_processes[:3]: # Show first 3
|
||||
LOG.info(" - PID %d: %s", proc["pid"], proc["name"])
|
||||
has_debugging = proc.get("has_remote_debugging", False)
|
||||
if has_debugging:
|
||||
LOG.info(" - PID %d: %s (remote debugging enabled)", proc["pid"], proc["name"])
|
||||
else:
|
||||
LOG.warning(" - PID %d: %s (remote debugging NOT enabled)", proc["pid"], proc["name"])
|
||||
else:
|
||||
LOG.info("(info) No browser processes currently running")
|
||||
|
||||
# Platform-specific checks
|
||||
if platform.system() == "Windows":
|
||||
LOG.info("(info) Windows detected - check Windows Defender and antivirus software")
|
||||
elif platform.system() == "Darwin":
|
||||
LOG.info("(info) macOS detected - check Gatekeeper and security settings")
|
||||
elif platform.system() == "Linux":
|
||||
LOG.info("(info) Linux detected - check if running as root (not recommended)")
|
||||
if platform.system() == "Linux":
|
||||
if _is_admin():
|
||||
LOG.error("(fail) Running as root - this can cause browser connection issues")
|
||||
LOG.error("(fail) Running as root - this can cause browser issues")
|
||||
|
||||
# Chrome version detection and validation
|
||||
self._diagnose_chrome_version_issues(remote_port)
|
||||
@@ -760,24 +789,45 @@ class WebScrapingMixin:
|
||||
return
|
||||
|
||||
try:
|
||||
# Detect Chrome version from binary
|
||||
binary_path = self.browser_config.binary_location
|
||||
version_info = detect_chrome_version_from_binary(binary_path) if binary_path else None
|
||||
# Get remote debugging configuration
|
||||
remote_host = "127.0.0.1"
|
||||
remote_port = 0
|
||||
for arg in self.browser_config.arguments:
|
||||
if arg.startswith("--remote-debugging-host="):
|
||||
remote_host = arg.split("=", maxsplit = 1)[1]
|
||||
if arg.startswith("--remote-debugging-port="):
|
||||
remote_port = int(arg.split("=", maxsplit = 1)[1])
|
||||
|
||||
version_info = None
|
||||
|
||||
# First, try to detect version from existing browser with remote debugging
|
||||
if remote_port > 0:
|
||||
LOG.debug(" -> Checking for existing browser with remote debugging at %s:%s", remote_host, remote_port)
|
||||
# Reuse the same port checking logic as in create_browser_session
|
||||
port_available = await self._check_port_with_retry(remote_host, remote_port)
|
||||
if port_available:
|
||||
try:
|
||||
version_info = detect_chrome_version_from_remote_debugging(remote_host, remote_port)
|
||||
if version_info:
|
||||
LOG.debug(" -> Detected version from existing browser: %s", version_info)
|
||||
else:
|
||||
LOG.debug(" -> Port is open but remote debugging API not accessible")
|
||||
except Exception as e:
|
||||
LOG.debug(" -> Failed to detect version from existing browser: %s", e)
|
||||
else:
|
||||
LOG.debug(" -> No existing browser found at %s:%s", remote_host, remote_port)
|
||||
|
||||
# Only fall back to binary detection if no remote browser is running
|
||||
if not version_info:
|
||||
binary_path = self.browser_config.binary_location
|
||||
if binary_path:
|
||||
LOG.debug(" -> No remote browser detected, trying binary detection")
|
||||
version_info = detect_chrome_version_from_binary(binary_path)
|
||||
|
||||
# Validate if Chrome 136+ detected
|
||||
if version_info and version_info.is_chrome_136_plus:
|
||||
LOG.info(" -> %s 136+ detected: %s", version_info.browser_name, version_info)
|
||||
|
||||
# Validate configuration for Chrome/Edge 136+
|
||||
is_valid, error_message = validate_chrome_136_configuration(
|
||||
list(self.browser_config.arguments),
|
||||
self.browser_config.user_data_dir
|
||||
)
|
||||
|
||||
if not is_valid:
|
||||
LOG.error(" -> %s 136+ configuration validation failed: %s", version_info.browser_name, error_message)
|
||||
LOG.error(" -> Please update your configuration to include --user-data-dir for remote debugging")
|
||||
raise AssertionError(error_message)
|
||||
LOG.info(" -> %s 136+ configuration validation passed", version_info.browser_name)
|
||||
await self._validate_chrome_136_configuration(version_info)
|
||||
elif version_info:
|
||||
LOG.info(" -> %s version detected: %s (pre-136, no special validation required)", version_info.browser_name, version_info)
|
||||
else:
|
||||
@@ -789,6 +839,39 @@ class WebScrapingMixin:
|
||||
LOG.warning(" -> Unexpected error during browser version validation, skipping: %s", e)
|
||||
# Continue without validation rather than failing
|
||||
|
||||
async def _validate_chrome_136_configuration(self, version_info:ChromeVersionInfo) -> None:
|
||||
"""
|
||||
Validate Chrome 136+ configuration.
|
||||
|
||||
Chrome/Edge 136+ requires --user-data-dir to be specified for security reasons.
|
||||
|
||||
Args:
|
||||
version_info: Chrome version information
|
||||
|
||||
Raises:
|
||||
AssertionError: If configuration is invalid
|
||||
"""
|
||||
# Check if user-data-dir is specified in arguments or configuration
|
||||
has_user_data_dir_arg = any(
|
||||
arg.startswith("--user-data-dir=")
|
||||
for arg in self.browser_config.arguments
|
||||
)
|
||||
has_user_data_dir_config = (
|
||||
self.browser_config.user_data_dir is not None and
|
||||
self.browser_config.user_data_dir.strip()
|
||||
)
|
||||
|
||||
if not has_user_data_dir_arg and not has_user_data_dir_config:
|
||||
error_message = (
|
||||
f"{version_info.browser_name} 136+ requires --user-data-dir to be specified. "
|
||||
"Add --user-data-dir=/path/to/directory to browser arguments and "
|
||||
'user_data_dir: "/path/to/directory" to your configuration.'
|
||||
)
|
||||
LOG.error(" -> %s 136+ configuration validation failed: %s", version_info.browser_name, error_message)
|
||||
raise AssertionError(error_message)
|
||||
|
||||
LOG.info(" -> %s 136+ configuration validation passed", version_info.browser_name)
|
||||
|
||||
def _diagnose_chrome_version_issues(self, remote_port:int) -> None:
|
||||
"""
|
||||
Diagnose Chrome version issues and provide specific recommendations.
|
||||
|
||||
Reference in New Issue
Block a user