From c9d04da70d618f18c2799e43f3ff7e68d39ea3fe Mon Sep 17 00:00:00 2001
From: Jens Bergmann <1742418+1cu@users.noreply.github.com>
Date: Wed, 13 Aug 2025 09:29:25 +0200
Subject: [PATCH] feat: browser connection improvements (#601)
---
README.md | 42 +-
docs/BROWSER_TROUBLESHOOTING.md | 439 ++++++++++
src/kleinanzeigen_bot/__init__.py | 7 +
.../resources/translations.de.yaml | 51 ++
.../utils/web_scraping_mixin.py | 260 +++++-
tests/smoke/test_smoke_health.py | 8 +-
tests/unit/test_web_scraping_mixin.py | 781 +++++++++++++++++-
7 files changed, 1540 insertions(+), 48 deletions(-)
create mode 100644 docs/BROWSER_TROUBLESHOOTING.md
diff --git a/README.md b/README.md
index c9d599c..5c68876 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,7 @@
1. [Main configuration](#main-config)
1. [Ad configuration](#ad-config)
1. [Using an existing browser window](#existing-browser)
+ 1. [Browser Connection Issues](#browser-connection-issues)
1. [Development Notes](#development)
1. [Related Open-Source Projects](#related)
1. [License](#license)
@@ -194,6 +195,7 @@ Commands:
update-content-hash – recalculates each ad's content_hash based on the current ad_defaults;
use this after changing config.yaml/ad_defaults to avoid every ad being marked "changed" and republished
create-config - creates a new default configuration file if one does not exist
+ diagnose - diagnoses browser connection issues and shows troubleshooting information
--
help - displays this help (default command)
version - displays the application version
@@ -436,11 +438,31 @@ By default a new browser process will be launched. To reuse a manually launched
This runs the browser in debug mode which allows it to be remote controlled by the bot.
-1. In your config.yaml specify the same flag as browser argument, e.g.:
+ **⚠️ IMPORTANT: Chrome 136+ Security Requirement**
+
+ Starting with Chrome 136 (March 2025), Google has implemented security changes that require `--user-data-dir` to be specified when using `--remote-debugging-port`. This prevents attackers from accessing the default Chrome profile and stealing cookies/credentials.
+
+ **You must now use:**
+ ```bash
+ chrome --remote-debugging-port=9222 --user-data-dir=/path/to/custom/directory
+ ```
+
+ **And in your config.yaml:**
+ ```yaml
+ browser:
+ arguments:
+ - --remote-debugging-port=9222
+ - --user-data-dir=/path/to/custom/directory
+ user_data_dir: "/path/to/custom/directory"
+ ```
+
+1. In your config.yaml specify the same flags as browser arguments, e.g.:
```yaml
browser:
arguments:
- --remote-debugging-port=9222
+ - --user-data-dir=/tmp/chrome-debug-profile # Required for Chrome 136+
+ user_data_dir: "/tmp/chrome-debug-profile" # Must match the argument above
```
1. When now publishing ads the manually launched browser will be re-used.
@@ -448,6 +470,24 @@ By default a new browser process will be launched. To reuse a manually launched
> NOTE: If an existing browser is used all other settings configured under `browser` in your config.yaml file will ignored
because they are only used to programmatically configure/launch a dedicated browser instance.
+> **Security Note:** This change was implemented by Google to protect users from cookie theft attacks. The custom user data directory uses a different encryption key than the default profile, making it more secure for debugging purposes.
+
+### Browser Connection Issues
+
+If you encounter browser connection problems, the bot includes a diagnostic command to help identify issues:
+
+**For binary users:**
+```bash
+kleinanzeigen-bot diagnose
+```
+
+**For source users:**
+```bash
+pdm run app diagnose
+```
+
+This command will check your browser setup and provide troubleshooting information. For detailed solutions to common browser connection issues, see the [Browser Connection Troubleshooting Guide](docs/BROWSER_TROUBLESHOOTING.md).
+
## Development Notes
> Please read [CONTRIBUTING.md](CONTRIBUTING.md) before contributing code. Thank you!
diff --git a/docs/BROWSER_TROUBLESHOOTING.md b/docs/BROWSER_TROUBLESHOOTING.md
new file mode 100644
index 0000000..06e1d5f
--- /dev/null
+++ b/docs/BROWSER_TROUBLESHOOTING.md
@@ -0,0 +1,439 @@
+# Browser Connection Troubleshooting Guide
+
+This guide helps you resolve common browser connection issues with the kleinanzeigen-bot.
+
+## ⚠️ Important: Chrome 136+ Security Changes (March 2025)
+
+**If you're using Chrome 136 or later and remote debugging stopped working, this is likely the cause.**
+
+Google implemented security changes in Chrome 136 that require `--user-data-dir` to be specified when using `--remote-debugging-port`. This prevents attackers from accessing the default Chrome profile and stealing cookies/credentials.
+
+**Quick Fix:**
+```bash
+# Start Chrome with custom user data directory
+chrome --remote-debugging-port=9222 --user-data-dir=/tmp/chrome-debug-profile
+```
+
+**In your config.yaml:**
+```yaml
+browser:
+ arguments:
+ - --remote-debugging-port=9222
+ - --user-data-dir=/tmp/chrome-debug-profile # Required for Chrome 136+
+ user_data_dir: "/tmp/chrome-debug-profile" # Must match the argument above
+```
+
+For more details, see [Chrome 136+ Security Changes](#5-chrome-136-security-changes-march-2025) below.
+
+## Quick Diagnosis
+
+Run the diagnostic command to automatically check your setup:
+
+**For binary users:**
+```bash
+kleinanzeigen-bot diagnose
+```
+
+**For source users:**
+```bash
+pdm run app diagnose
+```
+
+This will check:
+- Browser binary availability and permissions
+- User data directory permissions
+- Remote debugging port status
+- Running browser processes
+- Platform-specific issues
+
+## Common Issues and Solutions
+
+### Issue 1: "Failed to connect to browser" with "root" error
+
+**Symptoms:**
+- Error message mentions "One of the causes could be when you are running as root"
+- Connection fails when using existing browser profiles
+
+**Causes:**
+1. Running the application as root user
+2. Browser profile is locked or in use by another process
+3. Insufficient permissions to access the browser profile
+4. Browser is not properly started with remote debugging enabled
+
+**Solutions:**
+
+#### 1. Don't run as root
+```bash
+# ❌ Don't do this
+sudo pdm run app publish
+
+# ✅ Do this instead
+pdm run app publish
+```
+
+#### 2. Close all browser instances
+```bash
+# On Linux/macOS
+pkill -f chrome
+pkill -f chromium
+pkill -f msedge
+
+# On Windows
+taskkill /f /im chrome.exe
+taskkill /f /im msedge.exe
+```
+
+#### 3. Remove user_data_dir temporarily
+Edit your `config.yaml` and comment out or remove the `user_data_dir` line:
+```yaml
+browser:
+ # user_data_dir: C:\Users\user\AppData\Local\Microsoft\Edge\User Data # Comment this out
+ profile_name: "Default"
+```
+
+#### 4. Start browser manually with remote debugging
+```bash
+# For Chrome (macOS)
+/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --remote-debugging-port=9222 --user-data-dir=/tmp/chrome-debug-profile
+
+# For Chrome (Linux)
+google-chrome --remote-debugging-port=9222 --user-data-dir=/tmp/chrome-debug-profile
+
+# For Chrome (Windows)
+"C:\Program Files\Google\Chrome\Application\chrome.exe" --remote-debugging-port=9222 --user-data-dir=C:\temp\chrome-debug-profile
+
+# For Edge (macOS)
+/Applications/Microsoft\ Edge.app/Contents/MacOS/Microsoft\ Edge --remote-debugging-port=9222 --user-data-dir=/tmp/edge-debug-profile
+
+# For Edge (Linux/Windows)
+msedge --remote-debugging-port=9222 --user-data-dir=/tmp/edge-debug-profile
+
+# For Chromium (Linux)
+chromium --remote-debugging-port=9222 --user-data-dir=/tmp/chromium-debug-profile
+```
+
+Then in your `config.yaml`:
+```yaml
+browser:
+ arguments:
+ - --remote-debugging-port=9222
+ - --user-data-dir=/tmp/chrome-debug-profile # Must match the command line
+ user_data_dir: "/tmp/chrome-debug-profile" # Must match the argument above
+```
+
+**⚠️ IMPORTANT: Chrome 136+ Security Requirement**
+
+Starting with Chrome 136 (March 2025), Google has implemented security changes that require `--user-data-dir` to be specified when using `--remote-debugging-port`. This prevents attackers from accessing the default Chrome profile and stealing cookies/credentials. See [Chrome's security announcement](https://developer.chrome.com/blog/remote-debugging-port?hl=de) for more details.
+
+### Issue 2: "Browser process not reachable at 127.0.0.1:9222"
+
+**Symptoms:**
+- Port check fails when trying to connect to existing browser
+- Browser appears to be running but connection fails
+
+**Causes:**
+1. Browser not started with remote debugging port
+2. Port is blocked by firewall
+3. Browser crashed or closed
+4. Timing issue - browser not fully started
+5. Browser update changed remote debugging behavior
+6. Existing Chrome instance conflicts with new debugging session
+7. **Chrome 136+ security requirement not met** (most common cause since March 2025)
+
+**Solutions:**
+
+#### 1. Verify browser is started with remote debugging
+Make sure your browser is started with the correct flag:
+```bash
+# Check if browser is running with remote debugging
+netstat -an | grep 9222 # Linux/macOS
+netstat -an | findstr 9222 # Windows
+```
+
+#### 2. Start browser manually first
+```bash
+# Start browser with remote debugging
+chrome --remote-debugging-port=9222 --user-data-dir=/tmp/chrome-debug
+
+# Then run the bot
+kleinanzeigen-bot publish # For binary users
+# or
+pdm run app publish # For source users
+```
+
+#### 3. macOS-specific: Chrome started but connection fails
+If you're on macOS and Chrome is started with remote debugging but the bot still can't connect:
+
+**⚠️ IMPORTANT: This is a Chrome/macOS security issue that requires a dedicated user data directory**
+
+```bash
+# Method 1: Use the full path to Chrome with dedicated user data directory
+/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome \
+ --remote-debugging-port=9222 \
+ --user-data-dir=/tmp/chrome-debug-profile \
+ --disable-dev-shm-usage
+
+# Method 2: Use open command with proper arguments
+open -a "Google Chrome" --args \
+ --remote-debugging-port=9222 \
+ --user-data-dir=/tmp/chrome-debug-profile \
+ --disable-dev-shm-usage
+
+# Method 3: Check if Chrome is actually listening on the port
+lsof -i :9222
+curl http://localhost:9222/json/version
+```
+
+**⚠️ CRITICAL: You must also configure the same user data directory in your config.yaml:**
+
+```yaml
+browser:
+ arguments:
+ - --remote-debugging-port=9222
+ - --user-data-dir=/tmp/chrome-debug-profile
+ - --disable-dev-shm-usage
+ user_data_dir: "/tmp/chrome-debug-profile"
+```
+
+**Common macOS issues:**
+- Chrome/macOS security restrictions require a dedicated user data directory
+- The `--user-data-dir` flag is **mandatory** for remote debugging on macOS
+- Use `--disable-dev-shm-usage` to avoid shared memory issues
+- The user data directory must match between manual Chrome startup and config.yaml
+
+#### 4. Browser update issues
+If it worked before but stopped working after a browser update:
+
+```bash
+# Check your browser version
+# macOS
+/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --version
+
+# Linux
+google-chrome --version
+
+# Windows
+"C:\Program Files\Google\Chrome\Application\chrome.exe" --version
+
+# Close all browser instances first
+pkill -f "Google Chrome" # macOS/Linux
+# or
+taskkill /f /im chrome.exe # Windows
+
+# Start fresh with proper flags (see macOS-specific section above for details)
+```
+
+**After browser updates:**
+- Chrome may have changed how remote debugging works
+- Security restrictions may have been updated
+- Try using a fresh user data directory to avoid conflicts
+- Ensure you're using the latest version of the bot
+
+#### 5. Chrome 136+ Security Changes (March 2025)
+If you're using Chrome 136 or later and remote debugging stopped working:
+
+**The Problem:**
+Google implemented security changes in Chrome 136 that prevent `--remote-debugging-port` from working with the default user data directory. This was done to protect users from cookie theft attacks.
+
+**The Solution:**
+You must now specify a custom `--user-data-dir` when using remote debugging:
+
+```bash
+# ❌ This will NOT work with Chrome 136+
+chrome --remote-debugging-port=9222
+
+# ✅ This WILL work with Chrome 136+
+chrome --remote-debugging-port=9222 --user-data-dir=/tmp/chrome-debug-profile
+```
+
+**In your config.yaml:**
+```yaml
+browser:
+ arguments:
+ - --remote-debugging-port=9222
+ - --user-data-dir=/tmp/chrome-debug-profile # Required for Chrome 136+
+ user_data_dir: "/tmp/chrome-debug-profile" # Must match the argument above
+```
+
+**Why this change was made:**
+- Prevents attackers from accessing the default Chrome profile
+- Protects cookies and login credentials
+- Uses a different encryption key for the custom profile
+- Makes debugging more secure
+
+**For more information:**
+- [Chrome's security announcement](https://developer.chrome.com/blog/remote-debugging-port?hl=de)
+- [GitHub issue discussion](https://github.com/Second-Hand-Friends/kleinanzeigen-bot/issues/604)
+
+#### 5. Check firewall settings
+- Windows: Check Windows Defender Firewall
+- macOS: Check System Preferences > Security & Privacy > Firewall
+- Linux: Check iptables or ufw settings
+
+#### 6. Use different port
+Try a different port in case 9222 is blocked:
+```yaml
+browser:
+ arguments:
+ - --remote-debugging-port=9223
+```
+
+### Issue 3: Profile directory issues
+
+**Symptoms:**
+- Errors about profile directory not found
+- Permission denied errors
+- Profile locked errors
+
+**Solutions:**
+
+#### 1. Use temporary profile
+```yaml
+browser:
+ user_data_dir: "/tmp/chrome-temp" # Linux/macOS
+ # user_data_dir: "C:\\temp\\chrome-temp" # Windows
+ profile_name: "Default"
+```
+
+#### 2. Check profile permissions
+```bash
+# Linux/macOS
+ls -la ~/.config/google-chrome/
+chmod 755 ~/.config/google-chrome/
+
+# Windows
+# Check folder permissions in Properties > Security
+```
+
+#### 3. Remove profile temporarily
+```yaml
+browser:
+ # user_data_dir: "" # Comment out or remove
+ # profile_name: "" # Comment out or remove
+ use_private_window: true
+```
+
+### Issue 4: Platform-specific issues
+
+#### Windows
+- **Antivirus software**: Add browser executable to exclusions
+- **Windows Defender**: Add folder to exclusions
+- **UAC**: Run as administrator if needed (but not recommended)
+
+#### macOS
+- **Gatekeeper**: Allow browser in System Preferences > Security & Privacy
+- **SIP**: System Integrity Protection might block some operations
+- **Permissions**: Grant full disk access to terminal/IDE
+
+#### Linux
+- **Sandbox**: Add `--no-sandbox` to browser arguments
+- **Root user**: Never run as root, use regular user
+- **Display**: Ensure X11 or Wayland is properly configured
+
+## Configuration Examples
+
+### Basic working configuration
+```yaml
+browser:
+ arguments:
+ - --disable-dev-shm-usage
+ - --no-sandbox
+ use_private_window: true
+```
+
+### Using existing browser
+```yaml
+browser:
+ arguments:
+ - --remote-debugging-port=9222
+ - --user-data-dir=/tmp/chrome-debug-profile # Required for Chrome 136+
+ user_data_dir: "/tmp/chrome-debug-profile" # Must match the argument above
+ binary_location: "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe"
+```
+
+### Using existing browser on macOS (REQUIRED configuration)
+```yaml
+browser:
+ arguments:
+ - --remote-debugging-port=9222
+ - --user-data-dir=/tmp/chrome-debug-profile
+ - --disable-dev-shm-usage
+ user_data_dir: "/tmp/chrome-debug-profile"
+ binary_location: "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
+```
+
+### Using specific profile
+```yaml
+browser:
+ user_data_dir: "C:\\Users\\username\\AppData\\Local\\Google\\Chrome\\User Data"
+ profile_name: "Profile 1"
+ arguments:
+ - --disable-dev-shm-usage
+```
+
+## Advanced Troubleshooting
+
+### Check browser compatibility
+```bash
+# Test if browser can be started manually
+# macOS
+/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --version
+/Applications/Microsoft\ Edge.app/Contents/MacOS/Microsoft\ Edge --version
+
+# Linux
+google-chrome --version
+msedge --version
+chromium --version
+
+# Windows
+"C:\Program Files\Google\Chrome\Application\chrome.exe" --version
+msedge --version
+```
+
+### Monitor browser processes
+```bash
+# Linux/macOS
+ps aux | grep chrome
+lsof -i :9222
+
+# Windows
+tasklist | findstr chrome
+netstat -an | findstr 9222
+```
+
+### Debug with verbose logging
+```bash
+kleinanzeigen-bot -v publish # For binary users
+# or
+pdm run app -v publish # For source users
+```
+
+### Test browser connection manually
+```bash
+# Test if port is accessible
+curl http://localhost:9222/json/version
+```
+
+## Getting Help
+
+If you're still experiencing issues:
+
+1. Run the diagnostic command: `kleinanzeigen-bot diagnose` (binary) or `pdm run app diagnose` (source)
+2. Check the log file for detailed error messages
+3. Try the solutions above step by step
+4. Create an issue on GitHub with:
+ - Output from the diagnose command
+ - Your `config.yaml` (remove sensitive information)
+ - Error messages from the log file
+ - Operating system and browser version
+
+## Prevention
+
+To avoid browser connection issues:
+
+1. **Don't run as root** - Always use a regular user account
+2. **Close other browser instances** - Ensure no other browser processes are running
+3. **Use temporary profiles** - Avoid conflicts with existing browser sessions
+4. **Keep browser updated** - Use the latest stable version
+5. **Check permissions** - Ensure proper file and folder permissions
+6. **Monitor system resources** - Ensure sufficient memory and disk space
diff --git a/src/kleinanzeigen_bot/__init__.py b/src/kleinanzeigen_bot/__init__.py
index 8bc92d1..474e2fd 100644
--- a/src/kleinanzeigen_bot/__init__.py
+++ b/src/kleinanzeigen_bot/__init__.py
@@ -82,6 +82,11 @@ class KleinanzeigenBot(WebScrapingMixin):
case "create-config":
self.create_default_config()
return
+ case "diagnose":
+ self.configure_file_logging()
+ self.load_config()
+ self.diagnose_browser_issues()
+ return
case "verify":
self.configure_file_logging()
self.load_config()
@@ -207,6 +212,7 @@ class KleinanzeigenBot(WebScrapingMixin):
nach Änderungen an den config.yaml/ad_defaults verhindert es, dass alle Anzeigen als
"geändert" gelten und neu veröffentlicht werden.
create-config - Erstellt eine neue Standard-Konfigurationsdatei, falls noch nicht vorhanden
+ diagnose - Diagnostiziert Browser-Verbindungsprobleme und zeigt Troubleshooting-Informationen
--
help - Zeigt diese Hilfe an (Standardbefehl)
version - Zeigt die Version der Anwendung an
@@ -251,6 +257,7 @@ class KleinanzeigenBot(WebScrapingMixin):
update-content-hash – recalculates each ad's content_hash based on the current ad_defaults;
use this after changing config.yaml/ad_defaults to avoid every ad being marked "changed" and republished
create-config - creates a new default configuration file if one does not exist
+ diagnose - diagnoses browser connection issues and shows troubleshooting information
--
help - displays this help (default command)
version - displays the application version
diff --git a/src/kleinanzeigen_bot/resources/translations.de.yaml b/src/kleinanzeigen_bot/resources/translations.de.yaml
index 4a849c6..5d0b241 100644
--- a/src/kleinanzeigen_bot/resources/translations.de.yaml
+++ b/src/kleinanzeigen_bot/resources/translations.de.yaml
@@ -375,6 +375,21 @@ kleinanzeigen_bot/utils/web_scraping_mixin.py:
" -> Custom Browser argument: %s": " -> Benutzerdefiniertes Browser-Argument: %s"
" -> Setting chrome prefs [%s]...": " -> Setze Chrome-Einstellungen [%s]..."
" -> Adding Browser extension: [%s]": " -> Füge Browser-Erweiterung hinzu: [%s]"
+ "Failed to connect to browser. This error often occurs when:": "Fehler beim Verbinden mit dem Browser. Dieser Fehler tritt häufig auf, wenn:"
+ "Failed to start browser. This error often occurs when:": "Fehler beim Starten des Browsers. Dieser Fehler tritt häufig auf, wenn:"
+ "1. Running as root user (try running as regular user)": "1. Als Root-Benutzer ausgeführt wird (versuchen Sie es als normaler Benutzer)"
+ "2. Browser profile is locked or in use by another process": "2. Das Browser-Profil gesperrt oder von einem anderen Prozess verwendet wird"
+ "3. Insufficient permissions to access the browser profile": "3. Unzureichende Berechtigungen für den Zugriff auf das Browser-Profil"
+ "4. Browser is not properly started with remote debugging enabled": "4. Der Browser nicht ordnungsgemäß mit aktiviertem Remote-Debugging gestartet wurde"
+ "4. Browser binary is not executable or missing": "4. Die Browser-Binärdatei nicht ausführbar oder fehlend ist"
+ "5. Check if any antivirus or security software is blocking the browser": "5. Überprüfen Sie, ob Antiviren- oder Sicherheitssoftware den Browser blockiert"
+ "Troubleshooting steps:": "Schritte zur Fehlerbehebung:"
+ "1. Close all browser instances and try again": "1. Schließen Sie alle Browser-Instanzen und versuchen Sie es erneut"
+ "2. Remove the user_data_dir configuration temporarily": "2. Entfernen Sie die user_data_dir-Konfiguration vorübergehend"
+ "3. Start browser manually with: %s --remote-debugging-port=%d": "3. Starten Sie den Browser manuell mit: %s --remote-debugging-port=%d"
+ "3. Try running without profile configuration": "3. Versuchen Sie es ohne Profil-Konfiguration"
+ "4. Check browser binary permissions: %s": "4. Überprüfen Sie die Browser-Binärdatei-Berechtigungen: %s"
+ "4. Check if any antivirus or security software is blocking the connection": "4. Überprüfen Sie, ob Antiviren- oder Sicherheitssoftware die Verbindung blockiert"
web_check:
"Unsupported attribute: %s": "Nicht unterstütztes Attribut: %s"
@@ -398,6 +413,42 @@ kleinanzeigen_bot/utils/web_scraping_mixin.py:
web_request:
" -> HTTP %s [%s]...": " -> HTTP %s [%s]..."
+ diagnose_browser_issues:
+ "=== Browser Connection Diagnostics ===": "=== Browser-Verbindungsdiagnose ==="
+ "=== End Diagnostics ===": "=== Ende der Diagnose ==="
+ "(ok) Browser binary exists: %s": "(Ok) Browser-Binärdatei existiert: %s"
+ "(ok) Browser binary is executable": "(Ok) Browser-Binärdatei ist ausführbar"
+ "(ok) Auto-detected browser: %s": "(Ok) Automatisch erkannter Browser: %s"
+ "(ok) User data directory exists: %s": "(Ok) Benutzerdatenverzeichnis existiert: %s"
+ "(ok) User data directory is readable and writable": "(Ok) Benutzerdatenverzeichnis ist lesbar und beschreibbar"
+ "(ok) Remote debugging port is open": "(Ok) Remote-Debugging-Port ist offen"
+ "(fail) Browser binary not found: %s": "(Fehler) Browser-Binärdatei nicht gefunden: %s"
+ "(fail) Browser binary is not executable": "(Fehler) Browser-Binärdatei ist nicht ausführbar"
+ "(fail) No compatible browser found": "(Fehler) Kein kompatibler Browser gefunden"
+ "(fail) User data directory permissions issue": "(Fehler) Benutzerdatenverzeichnis-Berechtigungsproblem"
+ "(fail) Remote debugging port is not open": "(Fehler) Remote-Debugging-Port ist nicht offen"
+ "(fail) Running as root - this can cause browser connection issues": "(Fehler) Läuft als Root - dies kann Browser-Verbindungsprobleme verursachen"
+ "(info) User data directory does not exist (will be created): %s": "(Info) Benutzerdatenverzeichnis existiert nicht (wird erstellt): %s"
+ "(info) Remote debugging port configured: %d": "(Info) Remote-Debugging-Port konfiguriert: %d"
+ "(info) No browser processes currently running": "(Info) Derzeit keine Browser-Prozesse aktiv"
+ "(info) Found %d browser processes running": "(Info) %d Browser-Prozesse aktiv gefunden"
+ "(info) Windows detected - check Windows Defender and antivirus software": "(Info) Windows erkannt - überprüfen Sie Windows Defender und Antivirensoftware"
+ "(info) macOS detected - check Gatekeeper and security settings": "(Info) macOS erkannt - überprüfen Sie Gatekeeper und Sicherheitseinstellungen"
+ "(info) Linux detected - check if running as root (not recommended)": "(Info) Linux erkannt - überprüfen Sie, ob als Root ausgeführt wird (nicht empfohlen)"
+ " - PID %d: %s": " - PID %d: %s"
+ " Make sure browser is started with: --remote-debugging-port=%d": " Stellen Sie sicher, dass der Browser gestartet wird mit: --remote-debugging-port=%d"
+ "(ok) Remote debugging API accessible - Browser: %s": "(ok) Remote-Debugging-API zugänglich - Browser: %s"
+ "(fail) Remote debugging port is open but API not accessible: %s": "(Fehler) Remote-Debugging-Port ist offen, aber API nicht zugänglich: %s"
+ " This might indicate a browser update issue or configuration problem": " Dies könnte auf ein Browser-Update-Problem oder Konfigurationsproblem hinweisen"
+ ? " On macOS, try: /Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome --remote-debugging-port=%d --user-data-dir=/tmp/chrome-debug-profile --disable-dev-shm-usage"
+ : " Unter macOS versuchen Sie: /Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome --remote-debugging-port=%d --user-data-dir=/tmp/chrome-debug-profile --disable-dev-shm-usage"
+ ? " Or: open -a \"Google Chrome\" --args --remote-debugging-port=%d --user-data-dir=/tmp/chrome-debug-profile --disable-dev-shm-usage"
+ : " Oder: open -a \"Google Chrome\" --args --remote-debugging-port=%d --user-data-dir=/tmp/chrome-debug-profile --disable-dev-shm-usage"
+ " IMPORTANT: --user-data-dir is MANDATORY for macOS Chrome remote debugging": " WARNUNG: --user-data-dir ist PFLICHT für macOS Chrome Remote-Debugging"
+ " IMPORTANT: macOS Chrome remote debugging requires --user-data-dir flag": " WARNUNG: macOS Chrome Remote-Debugging erfordert --user-data-dir Flag"
+ " Add to your config.yaml: user_data_dir: \"/tmp/chrome-debug-profile\"": " Fügen Sie zu Ihrer config.yaml hinzu: user_data_dir: \"/tmp/chrome-debug-profile\""
+ " And to browser arguments: --user-data-dir=/tmp/chrome-debug-profile": " Und zu Browser-Argumenten: --user-data-dir=/tmp/chrome-debug-profile"
+
#################################################
kleinanzeigen_bot/update_checker.py:
#################################################
diff --git a/src/kleinanzeigen_bot/utils/web_scraping_mixin.py b/src/kleinanzeigen_bot/utils/web_scraping_mixin.py
index 2802760..7d30360 100644
--- a/src/kleinanzeigen_bot/utils/web_scraping_mixin.py
+++ b/src/kleinanzeigen_bot/utils/web_scraping_mixin.py
@@ -1,7 +1,7 @@
# SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
# SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
-import asyncio, enum, inspect, json, os, platform, secrets, shutil # isort: skip
+import asyncio, enum, inspect, json, os, platform, secrets, shutil, urllib.request # isort: skip
from collections.abc import Callable, Coroutine, Iterable
from gettext import gettext as _
from typing import Any, Final, cast
@@ -36,6 +36,17 @@ LOG:Final[loggers.Logger] = loggers.get_logger(__name__)
METACHAR_ESCAPER:Final[dict[int, str]] = str.maketrans({ch: f"\\{ch}" for ch in '!"#$%&\'()*+,./:;<=>?@[\\]^`{|}~'})
+def _is_admin() -> bool:
+ """Check if the current process is running with admin/root privileges."""
+ try:
+ if hasattr(os, "geteuid"):
+ result = os.geteuid() == 0
+ return bool(result)
+ return False
+ except AttributeError:
+ return False
+
+
class By(enum.Enum):
ID = enum.auto()
CLASS_NAME = enum.auto()
@@ -93,17 +104,39 @@ class WebScrapingMixin:
if remote_port > 0:
LOG.info("Using existing browser process at %s:%s", remote_host, remote_port)
- ensure(net.is_port_open(remote_host, remote_port),
+
+ # Enhanced port checking with retry logic
+ port_available = await self._check_port_with_retry(remote_host, remote_port)
+ ensure(port_available,
f"Browser process not reachable at {remote_host}:{remote_port}. "
- f"Start the browser with --remote-debugging-port={remote_port} or remove this port from your config.yaml")
- cfg = Config(
- browser_executable_path = self.browser_config.binary_location # actually not necessary but nodriver fails without
- )
- cfg.host = remote_host
- cfg.port = remote_port
- self.browser = await nodriver.start(cfg)
- LOG.info("New Browser session is %s", self.browser.websocket_url)
- return
+ f"Start the browser with --remote-debugging-port={remote_port} or remove this port from your config.yaml. "
+ f"Make sure the browser is running and the port is not blocked by firewall.")
+
+ try:
+ cfg = Config(
+ browser_executable_path = self.browser_config.binary_location # actually not necessary but nodriver fails without
+ )
+ cfg.host = remote_host
+ cfg.port = remote_port
+ self.browser = await nodriver.start(cfg)
+ LOG.info("New Browser session is %s", self.browser.websocket_url)
+ return
+ except Exception as e:
+ error_msg = str(e)
+ if "root" in error_msg.lower():
+ LOG.error("Failed to connect to browser. This error often occurs when:")
+ LOG.error("1. Running as root user (try running as regular user)")
+ LOG.error("2. Browser profile is locked or in use by another process")
+ LOG.error("3. Insufficient permissions to access the browser profile")
+ LOG.error("4. Browser is not properly started with remote debugging enabled")
+ LOG.error("")
+ LOG.error("Troubleshooting steps:")
+ LOG.error("1. Close all browser instances and try again")
+ LOG.error("2. Remove the user_data_dir configuration temporarily")
+ LOG.error("3. Start browser manually with: %s --remote-debugging-port=%d",
+ self.browser_config.binary_location, remote_port)
+ LOG.error("4. Check if any antivirus or security software is blocking the connection")
+ raise
########################################################
# configure and initialize new browser instance...
@@ -160,39 +193,41 @@ class WebScrapingMixin:
# already logged by nodriver:
# LOG.debug("-> Effective browser arguments: \n\t\t%s", "\n\t\t".join(cfg.browser_args))
- profile_dir = os.path.join(cfg.user_data_dir, self.browser_config.profile_name or "Default")
- os.makedirs(profile_dir, exist_ok = True)
- prefs_file = os.path.join(profile_dir, "Preferences")
- if not os.path.exists(prefs_file):
- LOG.info(" -> Setting chrome prefs [%s]...", prefs_file)
- with open(prefs_file, "w", encoding = "UTF-8") as fd:
- json.dump({
- "credentials_enable_service": False,
- "enable_do_not_track": True,
- "google": {
- "services": {
- "consented_to_sync": False
- }
- },
- "profile": {
- "default_content_setting_values": {
- "popups": 0,
- "notifications": 2 # 1 = allow, 2 = block browser notifications
+ # Enhanced profile directory handling
+ if cfg.user_data_dir:
+ profile_dir = os.path.join(cfg.user_data_dir, self.browser_config.profile_name or "Default")
+ os.makedirs(profile_dir, exist_ok = True)
+ prefs_file = os.path.join(profile_dir, "Preferences")
+ if not os.path.exists(prefs_file):
+ LOG.info(" -> Setting chrome prefs [%s]...", prefs_file)
+ with open(prefs_file, "w", encoding = "UTF-8") as fd:
+ json.dump({
+ "credentials_enable_service": False,
+ "enable_do_not_track": True,
+ "google": {
+ "services": {
+ "consented_to_sync": False
+ }
},
- "password_manager_enabled": False
- },
- "signin": {
- "allowed": False
- },
- "translate_site_blacklist": [
- "www.kleinanzeigen.de"
- ],
- "devtools": {
- "preferences": {
- "currentDockState": '"bottom"'
+ "profile": {
+ "default_content_setting_values": {
+ "popups": 0,
+ "notifications": 2 # 1 = allow, 2 = block browser notifications
+ },
+ "password_manager_enabled": False
+ },
+ "signin": {
+ "allowed": False
+ },
+ "translate_site_blacklist": [
+ "www.kleinanzeigen.de"
+ ],
+ "devtools": {
+ "preferences": {
+ "currentDockState": '"bottom"'
+ }
}
- }
- }, fd)
+ }, fd)
# load extensions
for crx_extension in self.browser_config.extensions:
@@ -200,8 +235,145 @@ class WebScrapingMixin:
ensure(os.path.exists(crx_extension), f"Configured extension-file [{crx_extension}] does not exist.")
cfg.add_extension(crx_extension)
- self.browser = await nodriver.start(cfg)
- LOG.info("New Browser session is %s", self.browser.websocket_url)
+ try:
+ self.browser = await nodriver.start(cfg)
+ LOG.info("New Browser session is %s", self.browser.websocket_url)
+ except Exception as e:
+ error_msg = str(e)
+ if "root" in error_msg.lower():
+ LOG.error("Failed to start browser. This error often occurs when:")
+ LOG.error("1. Running as root user (try running as regular user)")
+ LOG.error("2. Browser profile is locked or in use by another process")
+ LOG.error("3. Insufficient permissions to access the browser profile")
+ LOG.error("4. Browser binary is not executable or missing")
+ LOG.error("")
+ LOG.error("Troubleshooting steps:")
+ LOG.error("1. Close all browser instances and try again")
+ LOG.error("2. Remove the user_data_dir configuration temporarily")
+ LOG.error("3. Try running without profile configuration")
+ LOG.error("4. Check browser binary permissions: %s", self.browser_config.binary_location)
+ LOG.error("5. Check if any antivirus or security software is blocking the browser")
+ raise
+
+ async def _check_port_with_retry(self, host:str, port:int, max_retries:int = 3, retry_delay:float = 1.0) -> bool:
+ """
+ Check if a port is open with retry logic.
+
+ Args:
+ host: Host to check
+ port: Port to check
+ max_retries: Maximum number of retry attempts
+ retry_delay: Delay between retries in seconds
+
+ Returns:
+ True if port is open, False otherwise
+ """
+ for attempt in range(max_retries):
+ if net.is_port_open(host, port):
+ return True
+
+ if attempt < max_retries - 1:
+ LOG.debug("Port %s:%s not available, retrying in %.1f seconds (attempt %d/%d)",
+ host, port, retry_delay, attempt + 1, max_retries)
+ await asyncio.sleep(retry_delay)
+
+ return False
+
+ def diagnose_browser_issues(self) -> None:
+ """
+ Diagnose common browser connection issues and provide troubleshooting information.
+ """
+ LOG.info("=== Browser Connection Diagnostics ===")
+
+ # Check browser binary
+ if self.browser_config.binary_location:
+ if os.path.exists(self.browser_config.binary_location):
+ LOG.info("(ok) Browser binary exists: %s", self.browser_config.binary_location)
+ if os.access(self.browser_config.binary_location, os.X_OK):
+ LOG.info("(ok) Browser binary is executable")
+ else:
+ LOG.error("(fail) Browser binary is not executable")
+ else:
+ LOG.error("(fail) Browser binary not found: %s", self.browser_config.binary_location)
+ else:
+ browser_path = self.get_compatible_browser()
+ if browser_path:
+ LOG.info("(ok) Auto-detected browser: %s", browser_path)
+ else:
+ LOG.error("(fail) No compatible browser found")
+
+ # Check user data directory
+ if self.browser_config.user_data_dir:
+ if os.path.exists(self.browser_config.user_data_dir):
+ LOG.info("(ok) User data directory exists: %s", self.browser_config.user_data_dir)
+ if os.access(self.browser_config.user_data_dir, os.R_OK | os.W_OK):
+ LOG.info("(ok) User data directory is readable and writable")
+ else:
+ LOG.error("(fail) User data directory permissions issue")
+ else:
+ LOG.info("(info) User data directory does not exist (will be created): %s", self.browser_config.user_data_dir)
+
+ # Check for remote debugging port
+ remote_port = 0
+ for arg in self.browser_config.arguments:
+ if arg.startswith("--remote-debugging-port="):
+ remote_port = int(arg.split("=", maxsplit = 1)[1])
+ break
+
+ if remote_port > 0:
+ LOG.info("(info) Remote debugging port configured: %d", remote_port)
+ if net.is_port_open("127.0.0.1", remote_port):
+ LOG.info("(ok) Remote debugging port is open")
+ # Try to get more information about the debugging endpoint
+ try:
+ response = urllib.request.urlopen(f"http://127.0.0.1:{remote_port}/json/version", timeout = 2)
+ version_info = json.loads(response.read().decode())
+ LOG.info("(ok) Remote debugging API accessible - Browser: %s", version_info.get("Browser", "Unknown"))
+ except Exception as e:
+ LOG.warning("(fail) Remote debugging port is open but API not accessible: %s", str(e))
+ LOG.info(" This might indicate a browser update issue or configuration problem")
+ else:
+ LOG.error("(fail) Remote debugging port is not open")
+ LOG.info(" Make sure browser is started with: --remote-debugging-port=%d", remote_port)
+ if platform.system() == "Darwin":
+ LOG.info(" On macOS, try: /Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome "
+ "--remote-debugging-port=%d --user-data-dir=/tmp/chrome-debug-profile --disable-dev-shm-usage", remote_port)
+ LOG.info(' Or: open -a "Google Chrome" --args --remote-debugging-port=%d '
+ '--user-data-dir=/tmp/chrome-debug-profile --disable-dev-shm-usage', remote_port)
+ LOG.info(" IMPORTANT: --user-data-dir is MANDATORY for macOS Chrome remote debugging")
+
+ # Check for running browser processes
+ browser_processes = []
+ for proc in psutil.process_iter(["pid", "name", "cmdline"]):
+ try:
+ if proc.info["name"] and any(browser in proc.info["name"].lower() for browser in ["chrome", "chromium", "edge"]):
+ browser_processes.append(proc.info)
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
+ pass
+
+ if browser_processes:
+ LOG.info("(info) Found %d browser processes running", len(browser_processes))
+ for proc in browser_processes[:3]: # Show first 3
+ LOG.info(" - PID %d: %s", proc["pid"], proc["name"])
+ else:
+ LOG.info("(info) No browser processes currently running")
+
+ # Platform-specific checks
+ if platform.system() == "Windows":
+ LOG.info("(info) Windows detected - check Windows Defender and antivirus software")
+ elif platform.system() == "Darwin":
+ LOG.info("(info) macOS detected - check Gatekeeper and security settings")
+ # Check for macOS-specific Chrome remote debugging requirements
+ if remote_port > 0 and not self.browser_config.user_data_dir:
+ LOG.warning(" IMPORTANT: macOS Chrome remote debugging requires --user-data-dir flag")
+ LOG.info(' Add to your config.yaml: user_data_dir: "/tmp/chrome-debug-profile"')
+ LOG.info(" And to browser arguments: --user-data-dir=/tmp/chrome-debug-profile")
+ elif platform.system() == "Linux":
+ LOG.info("(info) Linux detected - check if running as root (not recommended)")
+ if _is_admin():
+ LOG.error("(fail) Running as root - this can cause browser connection issues")
+
+ LOG.info("=== End Diagnostics ===")
def close_browser_session(self) -> None:
if self.browser:
diff --git a/tests/smoke/test_smoke_health.py b/tests/smoke/test_smoke_health.py
index b1f479e..7c26d59 100644
--- a/tests/smoke/test_smoke_health.py
+++ b/tests/smoke/test_smoke_health.py
@@ -43,10 +43,11 @@ def test_app_starts(smoke_bot:SmokeKleinanzeigenBot) -> None:
"--help",
"help",
"version",
+ "diagnose",
])
def test_cli_subcommands_no_config(subcommand:str, tmp_path:Path) -> None:
"""
- Smoke: CLI subcommands that do not require a config file (--help, help, version).
+ Smoke: CLI subcommands that do not require a config file (--help, help, version, diagnose).
"""
args = [subcommand]
result = run_cli_subcommand(args, cwd = str(tmp_path))
@@ -56,6 +57,8 @@ def test_cli_subcommands_no_config(subcommand:str, tmp_path:Path) -> None:
assert "usage" in out or "help" in out, f"Expected help text in CLI output.\n{out}"
elif subcommand == "version":
assert re.match(r"^\s*\d{4}\+\w+", result.stdout.strip()), f"Output does not look like a version string: {result.stdout}"
+ elif subcommand == "diagnose":
+ assert "browser connection diagnostics" in out or "browser-verbindungsdiagnose" in out, f"Expected diagnostic output.\n{out}"
@pytest.mark.smoke
@@ -93,6 +96,7 @@ def test_cli_subcommands_create_config_fails_if_exists(tmp_path:Path) -> None:
("verify", "verify"),
("update-check", "update"),
("update-content-hash", "update-content-hash"),
+ ("diagnose", "diagnose"),
])
@pytest.mark.parametrize(("config_ext", "serializer"), [
("yaml", None),
@@ -131,3 +135,5 @@ def test_cli_subcommands_with_config_formats(
assert "no active ads found" in out, f"Expected 'no active ads found' in output for 'update-content-hash'.\n{out}"
elif subcommand == "update-check":
assert result.returncode == 0
+ elif subcommand == "diagnose":
+ assert "browser connection diagnostics" in out or "browser-verbindungsdiagnose" in out, f"Expected diagnostic output for 'diagnose'.\n{out}"
diff --git a/tests/unit/test_web_scraping_mixin.py b/tests/unit/test_web_scraping_mixin.py
index 16d3c41..f2f1e48 100644
--- a/tests/unit/test_web_scraping_mixin.py
+++ b/tests/unit/test_web_scraping_mixin.py
@@ -14,7 +14,7 @@ import shutil
import zipfile
from pathlib import Path
from typing import NoReturn, Protocol, cast
-from unittest.mock import AsyncMock, MagicMock, Mock, patch
+from unittest.mock import AsyncMock, MagicMock, Mock, mock_open, patch
import nodriver
import psutil
@@ -22,7 +22,8 @@ import pytest
from nodriver.core.element import Element
from nodriver.core.tab import Tab as Page
-from kleinanzeigen_bot.utils.web_scraping_mixin import By, Is, WebScrapingMixin
+from kleinanzeigen_bot.utils import loggers
+from kleinanzeigen_bot.utils.web_scraping_mixin import By, Is, WebScrapingMixin, _is_admin # noqa: PLC2701
class ConfigProtocol(Protocol):
@@ -882,3 +883,779 @@ class TestWebScrapingBrowserConfiguration:
print("[DEBUG] scraper.page after session creation:", scraper.page)
assert scraper.browser is not None
assert scraper.page is not None
+
+ def test_diagnose_browser_issues(self, caplog:pytest.LogCaptureFixture) -> None:
+ """Test that diagnose_browser_issues provides expected diagnostic output."""
+ # Configure logging to capture output
+ caplog.set_level(loggers.INFO)
+
+ # Create a WebScrapingMixin instance
+ mixin = WebScrapingMixin()
+
+ # Call the diagnose method
+ mixin.diagnose_browser_issues()
+
+ # Check that diagnostic output was produced
+ log_output = caplog.text.lower()
+ assert "browser connection diagnostics" in log_output or "browser-verbindungsdiagnose" in log_output
+ assert "end diagnostics" in log_output or "ende der diagnose" in log_output
+
+ # Check for platform-specific information
+ if platform.system() == "Windows":
+ assert "windows detected" in log_output or "windows erkannt" in log_output
+ elif platform.system() == "Darwin":
+ assert "macos detected" in log_output or "macos erkannt" in log_output
+ elif platform.system() == "Linux":
+ assert "linux detected" in log_output or "linux erkannt" in log_output
+
+
+class TestWebScrapingDiagnostics:
+ """Test the diagnose_browser_issues method."""
+
+ @pytest.fixture
+ def scraper_with_config(self) -> WebScrapingMixin:
+ """Create a WebScrapingMixin instance with browser config."""
+ scraper = WebScrapingMixin()
+ return scraper
+
+ def test_diagnose_browser_issues_binary_exists_executable(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
+ """Test diagnostic when browser binary exists and is executable."""
+ with patch("os.path.exists", return_value = True), \
+ patch("os.access", return_value = True):
+ scraper_with_config.browser_config.binary_location = "/usr/bin/chrome"
+ scraper_with_config.diagnose_browser_issues()
+
+ assert "(ok) Browser binary exists: /usr/bin/chrome" in caplog.text
+ assert "(ok) Browser binary is executable" in caplog.text
+
+ def test_diagnose_browser_issues_binary_exists_not_executable(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
+ """Test diagnostic when browser binary exists but is not executable."""
+ with patch("os.path.exists", return_value = True), \
+ patch("os.access", return_value = False):
+ scraper_with_config.browser_config.binary_location = "/usr/bin/chrome"
+ scraper_with_config.diagnose_browser_issues()
+
+ assert "(ok) Browser binary exists: /usr/bin/chrome" in caplog.text
+ assert "(fail) Browser binary is not executable" in caplog.text
+
+ def test_diagnose_browser_issues_binary_not_found(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
+ """Test diagnostic when browser binary is not found."""
+ with patch("os.path.exists", return_value = False):
+ scraper_with_config.browser_config.binary_location = "/usr/bin/chrome"
+ scraper_with_config.diagnose_browser_issues()
+
+ assert "(fail) Browser binary not found: /usr/bin/chrome" in caplog.text
+
+ def test_diagnose_browser_issues_auto_detect_success(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
+ """Test diagnostic when auto-detecting browser succeeds."""
+ with patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
+ scraper_with_config.browser_config.binary_location = None
+ scraper_with_config.diagnose_browser_issues()
+
+ assert "(ok) Auto-detected browser: /usr/bin/chrome" in caplog.text
+
+ def test_diagnose_browser_issues_auto_detect_failure(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
+ """Test diagnostic when auto-detecting browser fails."""
+ with patch.object(scraper_with_config, "get_compatible_browser", return_value = None):
+ scraper_with_config.browser_config.binary_location = None
+ scraper_with_config.diagnose_browser_issues()
+
+ assert "(fail) No compatible browser found" in caplog.text
+
+ def test_diagnose_browser_issues_user_data_dir_exists_readable(
+ self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture, tmp_path:Path
+ ) -> None:
+ """Test diagnostic when user data directory exists and is readable/writable."""
+ test_dir = str(tmp_path / "chrome-profile")
+ with patch("os.path.exists", return_value = True), \
+ patch("os.access", return_value = True), \
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
+ scraper_with_config.browser_config.user_data_dir = test_dir
+ scraper_with_config.diagnose_browser_issues()
+
+ assert f"(ok) User data directory exists: {test_dir}" in caplog.text
+ assert "(ok) User data directory is readable and writable" in caplog.text
+
+ def test_diagnose_browser_issues_user_data_dir_exists_not_readable(
+ self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture, tmp_path:Path
+ ) -> None:
+ """Test diagnostic when user data directory exists but is not readable/writable."""
+ test_dir = str(tmp_path / "chrome-profile")
+ with patch("os.path.exists", return_value = True), \
+ patch("os.access", return_value = False), \
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
+ scraper_with_config.browser_config.user_data_dir = test_dir
+ scraper_with_config.diagnose_browser_issues()
+
+ assert f"(ok) User data directory exists: {test_dir}" in caplog.text
+ assert "(fail) User data directory permissions issue" in caplog.text
+
+ def test_diagnose_browser_issues_user_data_dir_not_exists(
+ self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture, tmp_path:Path
+ ) -> None:
+ """Test diagnostic when user data directory does not exist."""
+ test_dir = str(tmp_path / "chrome-profile")
+ with patch("os.path.exists", side_effect = lambda path: path != test_dir), \
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
+ scraper_with_config.browser_config.user_data_dir = test_dir
+ scraper_with_config.diagnose_browser_issues()
+
+ assert f"(info) User data directory does not exist (will be created): {test_dir}" in caplog.text
+
+ def test_diagnose_browser_issues_remote_debugging_port_configured_open(
+ self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
+ """Test diagnostic when remote debugging port is configured and open."""
+ with patch("kleinanzeigen_bot.utils.net.is_port_open", return_value = True), \
+ patch("urllib.request.urlopen") as mock_urlopen:
+ mock_response = Mock()
+ mock_response.read.return_value = b'{"Browser": "Chrome/120.0.0.0"}'
+ mock_urlopen.return_value = mock_response
+
+ scraper_with_config.browser_config.arguments = ["--remote-debugging-port=9222"]
+ scraper_with_config.diagnose_browser_issues()
+
+ assert "(info) Remote debugging port configured: 9222" in caplog.text
+ assert "(ok) Remote debugging port is open" in caplog.text
+ assert "(ok) Remote debugging API accessible - Browser: Chrome/120.0.0.0" in caplog.text
+
+ def test_diagnose_browser_issues_remote_debugging_port_configured_open_api_fails(
+ self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
+ """Test diagnostic when remote debugging port is open but API is not accessible."""
+ with patch("kleinanzeigen_bot.utils.net.is_port_open", return_value = True), \
+ patch("urllib.request.urlopen", side_effect = Exception("Connection refused")):
+ scraper_with_config.browser_config.arguments = ["--remote-debugging-port=9222"]
+ scraper_with_config.diagnose_browser_issues()
+
+ assert "(info) Remote debugging port configured: 9222" in caplog.text
+ assert "(ok) Remote debugging port is open" in caplog.text
+ assert "(fail) Remote debugging port is open but API not accessible: Connection refused" in caplog.text
+ assert "This might indicate a browser update issue or configuration problem" in caplog.text
+
+ def test_diagnose_browser_issues_remote_debugging_port_configured_closed(
+ self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
+ """Test diagnostic when remote debugging port is configured but closed."""
+ with patch("kleinanzeigen_bot.utils.net.is_port_open", return_value = False):
+ scraper_with_config.browser_config.arguments = ["--remote-debugging-port=9222"]
+ scraper_with_config.diagnose_browser_issues()
+
+ assert "(info) Remote debugging port configured: 9222" in caplog.text
+ assert "(fail) Remote debugging port is not open" in caplog.text
+ assert "Make sure browser is started with: --remote-debugging-port=9222" in caplog.text
+
+ def test_diagnose_browser_issues_remote_debugging_port_not_configured(
+ self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
+ """Test diagnostic when remote debugging port is not configured."""
+ scraper_with_config.browser_config.arguments = ["--other-arg"]
+ scraper_with_config.diagnose_browser_issues()
+
+ # Should not log anything about remote debugging port
+ assert "Remote debugging port" not in caplog.text
+
+ def test_diagnose_browser_issues_browser_processes_found(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
+ """Test diagnostic when browser processes are found."""
+ mock_processes = [
+ Mock(info = {"pid": 1234, "name": "chrome"}),
+ Mock(info = {"pid": 5678, "name": "chromium"}),
+ Mock(info = {"pid": 9012, "name": "edge"}),
+ Mock(info = {"pid": 3456, "name": "chrome"})
+ ]
+
+ with patch("psutil.process_iter", return_value = mock_processes):
+ scraper_with_config.diagnose_browser_issues()
+
+ assert "(info) Found 4 browser processes running" in caplog.text
+ assert " - PID 1234: chrome" in caplog.text
+ assert " - PID 5678: chromium" in caplog.text
+ assert " - PID 9012: edge" in caplog.text
+
+ def test_diagnose_browser_issues_no_browser_processes(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
+ """Test diagnostic when no browser processes are found."""
+ with patch("psutil.process_iter", return_value = []):
+ scraper_with_config.diagnose_browser_issues()
+
+ assert "(info) No browser processes currently running" in caplog.text
+
+ def test_diagnose_browser_issues_windows_platform(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
+ """Test diagnostic on Windows platform."""
+ with patch("platform.system", return_value = "Windows"), \
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
+ scraper_with_config.diagnose_browser_issues()
+
+ assert "(info) Windows detected - check Windows Defender and antivirus software" in caplog.text
+
+ def test_diagnose_browser_issues_macos_platform_no_user_data_dir(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
+ """Test diagnostic on macOS platform without user data directory."""
+ with patch("platform.system", return_value = "Darwin"), \
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
+ scraper_with_config.browser_config.arguments = ["--remote-debugging-port=9222"]
+ scraper_with_config.browser_config.user_data_dir = None
+ scraper_with_config.diagnose_browser_issues()
+
+ assert "(info) macOS detected - check Gatekeeper and security settings" in caplog.text
+ assert " IMPORTANT: macOS Chrome remote debugging requires --user-data-dir flag" in caplog.text
+ assert ' Add to your config.yaml: user_data_dir: "/tmp/chrome-debug-profile"' in caplog.text
+ assert " And to browser arguments: --user-data-dir=/tmp/chrome-debug-profile" in caplog.text
+
+ def test_diagnose_browser_issues_macos_platform_with_user_data_dir(
+ self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture, tmp_path:Path
+ ) -> None:
+ """Test diagnostic on macOS platform with user data directory."""
+ test_dir = str(tmp_path / "chrome-profile")
+ with patch("platform.system", return_value = "Darwin"), \
+ patch("os.path.exists", return_value = True), \
+ patch("os.access", return_value = True), \
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
+ scraper_with_config.browser_config.arguments = ["--remote-debugging-port=9222"]
+ scraper_with_config.browser_config.user_data_dir = test_dir
+ scraper_with_config.diagnose_browser_issues()
+
+ assert "(info) macOS detected - check Gatekeeper and security settings" in caplog.text
+ # Should not show the warning about user-data-dir being required
+ assert "IMPORTANT: macOS Chrome remote debugging requires --user-data-dir flag" not in caplog.text
+
+ def test_diagnose_browser_issues_linux_platform_not_root(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
+ """Test diagnostic on Linux platform when not running as root."""
+ with patch("platform.system", return_value = "Linux"), \
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"), \
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False):
+ scraper_with_config.diagnose_browser_issues()
+
+ assert "(info) Linux detected - check if running as root (not recommended)" in caplog.text
+ # Should not show error about running as root
+ assert "(fail) Running as root" not in caplog.text
+
+ def test_diagnose_browser_issues_linux_platform_root(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
+ """Test diagnostic on Linux platform when running as root."""
+ with patch("platform.system", return_value = "Linux"), \
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"), \
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = True):
+ scraper_with_config.diagnose_browser_issues()
+
+ assert "(info) Linux detected - check if running as root (not recommended)" in caplog.text
+ assert "(fail) Running as root - this can cause browser connection issues" in caplog.text
+
+ def test_diagnose_browser_issues_unknown_platform(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
+ """Test diagnostic on unknown platform."""
+ with patch("platform.system", return_value = "UnknownOS"), \
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
+ scraper_with_config.diagnose_browser_issues()
+
+ # Should not show any platform-specific messages
+ assert "Windows detected" not in caplog.text
+ assert "macOS detected" not in caplog.text
+ assert "Linux detected" not in caplog.text
+
+ def test_diagnose_browser_issues_macos_remote_debugging_instructions(self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture) -> None:
+ """Test diagnostic shows macOS-specific remote debugging instructions."""
+ with patch("platform.system", return_value = "Darwin"), \
+ patch("kleinanzeigen_bot.utils.net.is_port_open", return_value = False), \
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
+ scraper_with_config.browser_config.arguments = ["--remote-debugging-port=9222"]
+ scraper_with_config.diagnose_browser_issues()
+
+ assert "On macOS, try: /Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome" in caplog.text
+ assert "--remote-debugging-port=9222 --user-data-dir=/tmp/chrome-debug-profile --disable-dev-shm-usage" in caplog.text
+ assert 'Or: open -a "Google Chrome" --args --remote-debugging-port=9222' in caplog.text
+ assert " IMPORTANT: --user-data-dir is MANDATORY for macOS Chrome remote debugging" in caplog.text
+
+ def test_diagnose_browser_issues_complete_diagnostic_flow(
+ self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture, tmp_path:Path
+ ) -> None:
+ """Test complete diagnostic flow with all components."""
+ test_dir = str(tmp_path / "chrome-profile")
+ with patch("os.path.exists", return_value = True), \
+ patch("os.access", return_value = True), \
+ patch("kleinanzeigen_bot.utils.net.is_port_open", return_value = True), \
+ patch("urllib.request.urlopen") as mock_urlopen, \
+ patch("psutil.process_iter", return_value = []), \
+ patch("platform.system", return_value = "Linux"), \
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False):
+
+ mock_response = Mock()
+ mock_response.read.return_value = b'{"Browser": "Chrome/120.0.0.0"}'
+ mock_urlopen.return_value = mock_response
+
+ scraper_with_config.browser_config.binary_location = "/usr/bin/chrome"
+ scraper_with_config.browser_config.user_data_dir = test_dir
+ scraper_with_config.browser_config.arguments = ["--remote-debugging-port=9222"]
+
+ scraper_with_config.diagnose_browser_issues()
+
+ # Check that all diagnostic sections are present
+ assert "=== Browser Connection Diagnostics ===" in caplog.text
+ assert "(ok) Browser binary exists: /usr/bin/chrome" in caplog.text
+ assert "(ok) Browser binary is executable" in caplog.text
+ assert f"(ok) User data directory exists: {test_dir}" in caplog.text
+ assert "(ok) User data directory is readable and writable" in caplog.text
+ assert "(info) Remote debugging port configured: 9222" in caplog.text
+ assert "(ok) Remote debugging port is open" in caplog.text
+ assert "(ok) Remote debugging API accessible - Browser: Chrome/120.0.0.0" in caplog.text
+ assert "(info) No browser processes currently running" in caplog.text
+ assert "(info) Linux detected - check if running as root (not recommended)" in caplog.text
+ assert "=== End Diagnostics ===" in caplog.text
+
+ def test_diagnose_browser_issues_remote_debugging_host_configured(
+ self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
+ ) -> None:
+ """Test diagnostic when remote debugging host is configured."""
+ with patch("os.path.exists", return_value = True), \
+ patch("os.access", return_value = True), \
+ patch("kleinanzeigen_bot.utils.net.is_port_open", return_value = True), \
+ patch("urllib.request.urlopen") as mock_urlopen, \
+ patch("psutil.process_iter", return_value = []), \
+ patch("platform.system", return_value = "Linux"), \
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False), \
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
+ mock_response = Mock()
+ mock_response.read.return_value = b'{"Browser": "Chrome/120.0.0.0"}'
+ mock_urlopen.return_value = mock_response
+
+ scraper_with_config.browser_config.arguments = [
+ "--remote-debugging-host=192.168.1.100",
+ "--remote-debugging-port=9222"
+ ]
+
+ scraper_with_config.diagnose_browser_issues()
+
+ assert "(info) Remote debugging port configured: 9222" in caplog.text
+ assert "(ok) Remote debugging port is open" in caplog.text
+
+ def test_diagnose_browser_issues_process_info_missing_name(
+ self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
+ ) -> None:
+ """Test diagnostic when process info is missing name."""
+ mock_process = Mock()
+ mock_process.info = {"pid": 1234, "name": None, "cmdline": []}
+
+ with patch("os.path.exists", return_value = True), \
+ patch("os.access", return_value = True), \
+ patch("psutil.process_iter", return_value = [mock_process]), \
+ patch("platform.system", return_value = "Linux"), \
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False), \
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
+ scraper_with_config.diagnose_browser_issues()
+
+ assert "(info) No browser processes currently running" in caplog.text
+
+ def test_diagnose_browser_issues_psutil_exception_handling(
+ self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
+ ) -> None:
+ """Test diagnostic when psutil raises an exception during process iteration."""
+ # Mock psutil.process_iter to return a list that will cause an exception when accessing proc.info
+ mock_process = Mock()
+ mock_process.info = {"name": "chrome"}
+ mock_processes = [mock_process]
+
+ with patch("os.path.exists", return_value = True), \
+ patch("os.access", return_value = True), \
+ patch("psutil.process_iter", return_value = mock_processes), \
+ patch("platform.system", return_value = "Linux"), \
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False), \
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"), \
+ patch.object(mock_process, "info", side_effect = psutil.AccessDenied):
+ scraper_with_config.diagnose_browser_issues()
+
+ # Should handle the exception gracefully and continue
+ assert "=== Browser Connection Diagnostics ===" in caplog.text
+ assert "=== End Diagnostics ===" in caplog.text
+
+ def test_diagnose_browser_issues_browser_not_executable(
+ self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
+ ) -> None:
+ """Test diagnostic when browser binary exists but is not executable."""
+ scraper_with_config.browser_config.binary_location = "/usr/bin/chrome"
+ with patch("os.path.exists", return_value = True), \
+ patch("os.access", return_value = False), \
+ patch("platform.system", return_value = "Linux"), \
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False), \
+ patch("psutil.process_iter", return_value = []):
+ scraper_with_config.diagnose_browser_issues()
+
+ assert "(fail) Browser binary is not executable" in caplog.text
+
+ def test_diagnose_browser_issues_browser_not_found(
+ self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
+ ) -> None:
+ """Test diagnostic when browser binary does not exist."""
+ scraper_with_config.browser_config.binary_location = "/usr/bin/chrome"
+ with patch("os.path.exists", return_value = False), \
+ patch("platform.system", return_value = "Linux"), \
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False), \
+ patch("psutil.process_iter", return_value = []):
+ scraper_with_config.diagnose_browser_issues()
+
+ assert "(fail) Browser binary not found:" in caplog.text
+
+ def test_diagnose_browser_issues_no_browser_auto_detection(
+ self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
+ ) -> None:
+ """Test diagnostic when no browser binary is configured and auto-detection fails."""
+ scraper_with_config.browser_config.binary_location = None
+ with patch("platform.system", return_value = "Linux"), \
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False), \
+ patch("psutil.process_iter", return_value = []), \
+ patch.object(scraper_with_config, "get_compatible_browser", side_effect = AssertionError("No browser found")), \
+ pytest.raises(AssertionError, match = "No browser found"):
+ scraper_with_config.diagnose_browser_issues()
+
+ def test_diagnose_browser_issues_user_data_dir_permissions_issue(
+ self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture, tmp_path:Path
+ ) -> None:
+ """Test diagnostic when user data directory has permission issues."""
+ test_dir = str(tmp_path / "chrome-profile")
+ scraper_with_config.browser_config.user_data_dir = test_dir
+
+ with patch("os.path.exists", return_value = True), \
+ patch("os.access", return_value = False), \
+ patch("platform.system", return_value = "Linux"), \
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False), \
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
+ scraper_with_config.diagnose_browser_issues()
+
+ assert "(fail) User data directory permissions issue" in caplog.text
+
+ def test_diagnose_browser_issues_remote_debugging_api_inaccessible(
+ self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
+ ) -> None:
+ """Test diagnostic when remote debugging port is open but API is not accessible."""
+ scraper_with_config.browser_config.arguments = ["--remote-debugging-port=9222"]
+
+ with patch("os.path.exists", return_value = True), \
+ patch("os.access", return_value = True), \
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.net.is_port_open", return_value = True), \
+ patch("urllib.request.urlopen", side_effect = Exception("Connection refused")), \
+ patch("platform.system", return_value = "Linux"), \
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False), \
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
+ scraper_with_config.diagnose_browser_issues()
+
+ assert "(fail) Remote debugging port is open but API not accessible" in caplog.text
+ assert "This might indicate a browser update issue or configuration problem" in caplog.text
+
+ def test_diagnose_browser_issues_macos_chrome_warning(
+ self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
+ ) -> None:
+ """Test diagnostic when macOS Chrome remote debugging is configured without user_data_dir."""
+ scraper_with_config.browser_config.arguments = ["--remote-debugging-port=9222"]
+ scraper_with_config.browser_config.user_data_dir = None
+
+ with patch("os.path.exists", return_value = True), \
+ patch("os.access", return_value = True), \
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.net.is_port_open", return_value = False), \
+ patch("platform.system", return_value = "Darwin"), \
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = False), \
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
+ scraper_with_config.diagnose_browser_issues()
+
+ assert "IMPORTANT: macOS Chrome remote debugging requires --user-data-dir flag" in caplog.text
+ assert "Add to your config.yaml: user_data_dir:" in caplog.text
+
+ def test_diagnose_browser_issues_linux_root_user(
+ self, scraper_with_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
+ ) -> None:
+ """Test diagnostic when running as root on Linux."""
+ with patch("os.path.exists", return_value = True), \
+ patch("os.access", return_value = True), \
+ patch("platform.system", return_value = "Linux"), \
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin._is_admin", return_value = True), \
+ patch.object(scraper_with_config, "get_compatible_browser", return_value = "/usr/bin/chrome"):
+ scraper_with_config.diagnose_browser_issues()
+
+ assert "(fail) Running as root - this can cause browser connection issues" in caplog.text
+
+
+class TestWebScrapingMixinPortRetry:
+ """Test the _check_port_with_retry method."""
+
+ @pytest.fixture
+ def scraper_with_remote_config(self) -> WebScrapingMixin:
+ """Create a WebScrapingMixin instance with remote debugging configuration."""
+ scraper = WebScrapingMixin()
+ scraper.browser_config.binary_location = "/usr/bin/chrome"
+ scraper.browser_config.arguments = ["--remote-debugging-port=9222"]
+ return scraper
+
+ @pytest.mark.asyncio
+ async def test_browser_connection_error_handling(
+ self, scraper_with_remote_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
+ ) -> None:
+ """Test error handling when browser connection fails."""
+ with patch("os.path.exists", return_value = True), \
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.net.is_port_open", return_value = True), \
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Failed to connect as root user")), \
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class:
+
+ mock_config = Mock()
+ mock_config_class.return_value = mock_config
+
+ with pytest.raises(Exception, match = "Failed to connect as root user"):
+ await scraper_with_remote_config.create_browser_session()
+
+ # Check that the error handling was triggered
+ assert "Failed to connect to browser. This error often occurs when:" in caplog.text
+
+ @pytest.mark.asyncio
+ async def test_browser_connection_error_handling_non_root_error(
+ self, scraper_with_remote_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
+ ) -> None:
+ """Test error handling when browser connection fails with non-root error."""
+ with patch("os.path.exists", return_value = True), \
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.net.is_port_open", return_value = True), \
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Connection timeout")), \
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class:
+
+ mock_config = Mock()
+ mock_config_class.return_value = mock_config
+
+ with pytest.raises(Exception, match = "Connection timeout"):
+ await scraper_with_remote_config.create_browser_session()
+
+ # Should not trigger the root-specific error handling
+ assert "Failed to connect to browser. This error often occurs when:" not in caplog.text
+
+ @pytest.fixture
+ def scraper_with_startup_config(self) -> WebScrapingMixin:
+ """Create a WebScrapingMixin instance for testing browser startup (no remote debugging)."""
+ scraper = WebScrapingMixin()
+ scraper.browser_config.binary_location = "/usr/bin/chrome"
+ # No remote debugging port configured - will start new browser
+ return scraper
+
+ @pytest.mark.asyncio
+ async def test_browser_startup_error_handling_root_error(
+ self, scraper_with_startup_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
+ ) -> None:
+ """Test error handling when browser startup fails with root error."""
+ with patch("os.path.exists", return_value = True), \
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Failed to start as root user")), \
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class:
+
+ mock_config = Mock()
+ mock_config_class.return_value = mock_config
+
+ with pytest.raises(Exception, match = "Failed to start as root user"):
+ await scraper_with_startup_config.create_browser_session()
+
+ # Check that the root-specific error handling was triggered
+ assert "Failed to start browser. This error often occurs when:" in caplog.text
+
+ @pytest.mark.asyncio
+ async def test_browser_startup_error_handling_non_root_error(
+ self, scraper_with_startup_config:WebScrapingMixin, caplog:pytest.LogCaptureFixture
+ ) -> None:
+ """Test error handling when browser startup fails with non-root error."""
+ with patch("os.path.exists", return_value = True), \
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.start", side_effect = Exception("Browser binary not found")), \
+ patch("kleinanzeigen_bot.utils.web_scraping_mixin.nodriver.Config") as mock_config_class:
+
+ mock_config = Mock()
+ mock_config_class.return_value = mock_config
+
+ with pytest.raises(Exception, match = "Browser binary not found"):
+ await scraper_with_startup_config.create_browser_session()
+
+ # Should not trigger the root-specific error handling
+ assert "Failed to start browser. This error often occurs when:" not in caplog.text
+
+ @pytest.fixture
+ def scraper(self) -> WebScrapingMixin:
+ """Create a WebScrapingMixin instance."""
+ return WebScrapingMixin()
+
+ @pytest.mark.asyncio
+ async def test_check_port_with_retry_success_first_try(self, scraper:WebScrapingMixin) -> None:
+ """Test port check succeeds on first try."""
+ with patch("kleinanzeigen_bot.utils.net.is_port_open", return_value = True):
+ result = await scraper._check_port_with_retry("127.0.0.1", 9222)
+ assert result is True
+
+ @pytest.mark.asyncio
+ async def test_check_port_with_retry_success_after_retries(self, scraper:WebScrapingMixin) -> None:
+ """Test port check succeeds after some retries."""
+ with patch("kleinanzeigen_bot.utils.net.is_port_open", side_effect = [False, False, True]):
+ result = await scraper._check_port_with_retry("127.0.0.1", 9222, max_retries = 3, retry_delay = 0.1)
+ assert result is True
+
+ @pytest.mark.asyncio
+ async def test_check_port_with_retry_failure_after_max_retries(self, scraper:WebScrapingMixin) -> None:
+ """Test port check fails after max retries."""
+ with patch("kleinanzeigen_bot.utils.net.is_port_open", return_value = False):
+ result = await scraper._check_port_with_retry("127.0.0.1", 9222, max_retries = 2, retry_delay = 0.1)
+ assert result is False
+
+ @pytest.mark.asyncio
+ async def test_check_port_with_retry_custom_parameters(self, scraper:WebScrapingMixin) -> None:
+ """Test port check with custom retry parameters."""
+ with patch("kleinanzeigen_bot.utils.net.is_port_open", side_effect = [False, True]):
+ result = await scraper._check_port_with_retry("192.168.1.100", 8080, max_retries = 5, retry_delay = 0.05)
+ assert result is True
+
+
+class TestWebScrapingMixinProfileHandling:
+ """Test the enhanced profile directory handling."""
+
+ @pytest.fixture
+ def scraper_with_profile_config(self, tmp_path:Path) -> WebScrapingMixin:
+ """Create a WebScrapingMixin instance with profile configuration."""
+ scraper = WebScrapingMixin()
+ scraper.browser_config.user_data_dir = str(tmp_path / "test-profile")
+ scraper.browser_config.profile_name = "TestProfile"
+ return scraper
+
+ def test_profile_directory_creation_with_user_data_dir(
+ self, scraper_with_profile_config:WebScrapingMixin, tmp_path:Path
+ ) -> None:
+ """Test profile directory creation when user_data_dir is configured."""
+ test_dir = str(tmp_path / "test-profile")
+ scraper_with_profile_config.browser_config.user_data_dir = test_dir
+
+ with patch("os.path.join", return_value = os.path.join(test_dir, "TestProfile")), \
+ patch("os.makedirs") as mock_makedirs, \
+ patch("os.path.exists", return_value = False), \
+ patch("builtins.open", mock_open()), \
+ patch("json.dump"):
+
+ # This would be called during browser session creation
+ profile_dir = os.path.join(test_dir, "TestProfile")
+ mock_makedirs.assert_not_called() # Not called yet
+
+ # Simulate the profile creation logic
+ os.makedirs(profile_dir, exist_ok = True)
+ mock_makedirs.assert_called_with(profile_dir, exist_ok = True)
+
+ def test_profile_directory_creation_with_preferences_file(
+ self, scraper_with_profile_config:WebScrapingMixin, tmp_path:Path
+ ) -> None:
+ """Test profile directory creation with preferences file when it doesn't exist."""
+ test_dir = str(tmp_path / "test-profile")
+ scraper_with_profile_config.browser_config.user_data_dir = test_dir
+
+ with patch("os.makedirs") as mock_makedirs, \
+ patch("os.path.exists", return_value = False), \
+ patch("builtins.open", mock_open()) as mock_file, \
+ patch("json.dump") as mock_json_dump:
+
+ # Simulate the profile creation logic
+ profile_dir = os.path.join(test_dir, "TestProfile")
+ prefs_file = os.path.join(profile_dir, "Preferences")
+
+ # This would be called during browser session creation
+ os.makedirs(profile_dir, exist_ok = True)
+ mock_makedirs.assert_called_with(profile_dir, exist_ok = True)
+
+ # Simulate preferences file creation
+ with open(prefs_file, "w", encoding = "UTF-8") as fd:
+ json.dump({"test": "preferences"}, fd)
+
+ mock_file.assert_called_with(prefs_file, "w", encoding = "UTF-8")
+ mock_json_dump.assert_called()
+
+ def test_profile_directory_creation_with_existing_preferences_file(
+ self, scraper_with_profile_config:WebScrapingMixin, tmp_path:Path
+ ) -> None:
+ """Test profile directory creation when preferences file already exists."""
+ test_dir = str(tmp_path / "test-profile")
+ scraper_with_profile_config.browser_config.user_data_dir = test_dir
+
+ with patch("os.makedirs") as mock_makedirs, \
+ patch("os.path.exists", return_value = True), \
+ patch("builtins.open", mock_open()) as mock_file, \
+ patch("json.dump") as mock_json_dump:
+
+ # Simulate the profile creation logic
+ profile_dir = os.path.join(test_dir, "TestProfile")
+
+ # This would be called during browser session creation
+ os.makedirs(profile_dir, exist_ok = True)
+ mock_makedirs.assert_called_with(profile_dir, exist_ok = True)
+
+ # Preferences file exists, so it should not be created
+ mock_file.assert_not_called()
+ mock_json_dump.assert_not_called()
+
+ def test_profile_directory_creation_with_edge_browser(
+ self, scraper_with_profile_config:WebScrapingMixin, tmp_path:Path
+ ) -> None:
+ """Test profile directory creation with Edge browser configuration."""
+ test_dir = str(tmp_path / "test-profile")
+ scraper_with_profile_config.browser_config.user_data_dir = test_dir
+ scraper_with_profile_config.browser_config.binary_location = "/usr/bin/microsoft-edge"
+
+ with patch("os.makedirs") as mock_makedirs, \
+ patch("os.path.exists", return_value = False), \
+ patch("builtins.open", mock_open()), \
+ patch("json.dump"), \
+ patch("os.environ", {"MSEDGEDRIVER_TELEMETRY_OPTOUT": "1"}):
+
+ # Simulate the profile creation logic
+ profile_dir = os.path.join(test_dir, "TestProfile")
+
+ # This would be called during browser session creation
+ os.makedirs(profile_dir, exist_ok = True)
+ mock_makedirs.assert_called_with(profile_dir, exist_ok = True)
+
+ def test_profile_directory_creation_with_private_window(
+ self, scraper_with_profile_config:WebScrapingMixin, tmp_path:Path
+ ) -> None:
+ """Test profile directory creation with private window configuration."""
+ test_dir = str(tmp_path / "test-profile")
+ scraper_with_profile_config.browser_config.user_data_dir = test_dir
+ scraper_with_profile_config.browser_config.use_private_window = True
+
+ with patch("os.makedirs") as mock_makedirs, \
+ patch("os.path.exists", return_value = False), \
+ patch("builtins.open", mock_open()), \
+ patch("json.dump"):
+
+ # Simulate the profile creation logic
+ profile_dir = os.path.join(test_dir, "TestProfile")
+
+ # This would be called during browser session creation
+ os.makedirs(profile_dir, exist_ok = True)
+ mock_makedirs.assert_called_with(profile_dir, exist_ok = True)
+
+ def test_profile_directory_creation_without_user_data_dir(
+ self, scraper_with_profile_config:WebScrapingMixin
+ ) -> None:
+ """Test profile directory handling when user_data_dir is not configured."""
+ scraper_with_profile_config.browser_config.user_data_dir = None
+
+ # Should not create profile directories when user_data_dir is None
+ with patch("os.path.join") as mock_join, \
+ patch("os.makedirs") as mock_makedirs:
+
+ # The profile creation logic should not be called
+ mock_join.assert_not_called()
+ mock_makedirs.assert_not_called()
+
+
+class TestWebScrapingMixinAdminCheck:
+ """Test the _is_admin helper function."""
+
+ def test_is_admin_on_unix_system(self) -> None:
+ """Test _is_admin function on Unix-like system."""
+ # Create a mock os module with geteuid
+ mock_os = Mock()
+ mock_os.geteuid = Mock(return_value = 0)
+
+ with patch("kleinanzeigen_bot.utils.web_scraping_mixin.os", mock_os):
+ assert _is_admin() is True
+
+ def test_is_admin_on_unix_system_not_root(self) -> None:
+ """Test _is_admin function on Unix-like system when not root."""
+ # Create a mock os module with geteuid
+ mock_os = Mock()
+ mock_os.geteuid = Mock(return_value = 1000)
+
+ with patch("kleinanzeigen_bot.utils.web_scraping_mixin.os", mock_os):
+ assert _is_admin() is False
+
+ def test_is_admin_on_windows_system(self) -> None:
+ """Test _is_admin function on Windows system."""
+ # Create a mock os module without geteuid
+ mock_os = Mock()
+ # Remove geteuid attribute to simulate Windows
+ del mock_os.geteuid
+
+ with patch("kleinanzeigen_bot.utils.web_scraping_mixin.os", mock_os):
+ assert _is_admin() is False