From e7c7ba90bea7468a095b663d5efc8adb7bae0669 Mon Sep 17 00:00:00 2001 From: sebthom Date: Thu, 7 Mar 2024 23:07:23 +0100 Subject: [PATCH] support re-using already open browser window --- README.md | 36 ++++++++++++++++++--- src/kleinanzeigen_bot/utils.py | 14 +++++++- src/kleinanzeigen_bot/web_scraping_mixin.py | 32 +++++++++++++++++- 3 files changed, 76 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 94e2e26..bb23580 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,10 @@ 1. [About](#about) 1. [Installation](#installation) 1. [Usage](#usage) +1. [Configuration](#config) + 1. [Main configuration](#main-config) + 1. [Ad configuration](#ad-config) + 1. [Using an existing browser window](#existing-browser) 1. [Development Notes](#development) 1. [License](#license) @@ -206,11 +210,11 @@ Options: Limitation of `download`: It's only possible to extract the cheapest given shipping option. -### Configuration +## Configuration All configuration files can be in YAML or JSON format. -#### 1) Main configuration +### 1) Main configuration When executing the app it by default looks for a `config.yaml` file in the current directory. If it does not exist it will be created automatically. @@ -271,7 +275,7 @@ login: ``` -#### 2) Ad configuration +### 2) Ad configuration Each ad is described in a separate JSON or YAML file with prefix `ad_`. The prefix is configurable in config file. @@ -333,6 +337,30 @@ created_on: # set automatically updated_on: # set automatically ``` +### 3) Using an existing browser window + +By default a new browser process will be launched. To reuse a manually launched browser window/process follow these steps: + +1. Manually launch your browser from the command line with the `--remote-debugging-port=` flag. + You are free to choose an unused port number 1025 and 65535, e.g.: + - `chrome --remote-debugging-port=9222` + - `chromium --remote-debugging-port=9222` + - `msedge --remote-debugging-port=9222` + + This runs the browser in debug mode which allows it to be remote controlled by the bot. + +1. In your config.yaml specify the same flag as browser argument, e.g.: + ```yaml + browser: + arguments: + - --remote-debugging-port=9222 + ``` + +1. When now publishing ads the manually launched browser will be re-used. + +> NOTE: If an existing browser is used all other settings configured under `browser` in your config.yaml file will ignored + because they are only used to programmatically configure/launch a dedicated browser instance. + ## Development Notes > Please read [CONTRIBUTING.md](CONTRIBUTING.md) before contributing code. Thank you! @@ -342,7 +370,7 @@ updated_on: # set automatically - unit tests: `pdm run utest` - integration tests: `pdm run itest` - all tests: `pdm run test` -- Run linter: `pdm run lint` +- Run syntax checks: `pdm run lint` - Create platform-specific executable: `pdm run compile` - Application bootstrap works like this: ```python diff --git a/src/kleinanzeigen_bot/utils.py b/src/kleinanzeigen_bot/utils.py index f922182..6dad437 100644 --- a/src/kleinanzeigen_bot/utils.py +++ b/src/kleinanzeigen_bot/utils.py @@ -3,7 +3,7 @@ SPDX-FileCopyrightText: © Sebastian Thomschke and contributors SPDX-License-Identifier: AGPL-3.0-or-later SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ """ -import asyncio, copy, decimal, json, logging, os, re, sys, traceback, time +import asyncio, copy, decimal, json, logging, os, re, socket, sys, traceback, time from importlib.resources import read_text as get_resource_as_string from collections.abc import Callable, Sized from datetime import datetime @@ -76,6 +76,18 @@ def is_integer(obj:Any) -> bool: return False +def is_port_open(host:str, port:int) -> bool: + try: + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.settimeout(1) + s.connect((host, port)) + return True + except Exception: + return False + finally: + s.close() + + async def ainput(prompt: str) -> str: return await asyncio.to_thread(input, f'{prompt} ') diff --git a/src/kleinanzeigen_bot/web_scraping_mixin.py b/src/kleinanzeigen_bot/web_scraping_mixin.py index 33e4b3d..baef0d7 100644 --- a/src/kleinanzeigen_bot/web_scraping_mixin.py +++ b/src/kleinanzeigen_bot/web_scraping_mixin.py @@ -18,7 +18,7 @@ from nodriver.core.config import Config from nodriver.core.element import Element from nodriver.core.tab import Tab as Page -from .utils import ensure, T +from .utils import ensure, is_port_open, T LOG:Final[logging.Logger] = logging.getLogger("kleinanzeigen_bot.selenium_mixin") @@ -79,6 +79,35 @@ class WebScrapingMixin: self.browser_config.binary_location = self.get_compatible_browser() LOG.info(" -> Chrome binary location: %s", self.browser_config.binary_location) + ######################################################## + # check if an existing browser instance shall be used... + ######################################################## + remote_host = "127.0.0.1" + remote_port = 0 + for arg in self.browser_config.arguments: + if arg.startswith("--remote-debugging-host="): + remote_host = arg.split("=", 2)[1] + if arg.startswith("--remote-debugging-port="): + remote_port = int(arg.split("=", 2)[1]) + + if remote_port > 0: + LOG.info("Using existing browser process at %s:%s", remote_host, remote_port) + if not is_port_open(remote_host, remote_port): + raise AssertionError(f"Browser process not reachable at {remote_host}:{remote_port}. " + + f"Start the browser with --remote-debugging-port={remote_port} or remove this port from your config.yaml") + cfg = Config( + browser_executable_path = self.browser_config.binary_location # actually not necessary but nodriver fails without + ) + cfg.host = remote_host + cfg.port = remote_port + self.browser = await nodriver.start(cfg) + LOG.info("New Browser session is %s", self.browser.websocket_url) + return + + ######################################################## + # configure and initialize new browser instance... + ######################################################## + # default_browser_args: @ https://github.com/ultrafunkamsterdam/nodriver/blob/main/nodriver/core/config.py # https://peter.sh/experiments/chromium-command-line-switches/ # https://github.com/GoogleChrome/chrome-launcher/blob/main/docs/chrome-flags-for-tools.md @@ -125,6 +154,7 @@ class WebScrapingMixin: browser_args = browser_args, user_data_dir = self.browser_config.user_data_dir ) + # already logged by nodriver: # LOG.debug("-> Effective browser arguments: \n\t\t%s", "\n\t\t".join(cfg.browser_args))