mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 02:31:45 +01:00
support re-using already open browser window
This commit is contained in:
36
README.md
36
README.md
@@ -10,6 +10,10 @@
|
|||||||
1. [About](#about)
|
1. [About](#about)
|
||||||
1. [Installation](#installation)
|
1. [Installation](#installation)
|
||||||
1. [Usage](#usage)
|
1. [Usage](#usage)
|
||||||
|
1. [Configuration](#config)
|
||||||
|
1. [Main configuration](#main-config)
|
||||||
|
1. [Ad configuration](#ad-config)
|
||||||
|
1. [Using an existing browser window](#existing-browser)
|
||||||
1. [Development Notes](#development)
|
1. [Development Notes](#development)
|
||||||
1. [License](#license)
|
1. [License](#license)
|
||||||
|
|
||||||
@@ -206,11 +210,11 @@ Options:
|
|||||||
|
|
||||||
Limitation of `download`: It's only possible to extract the cheapest given shipping option.
|
Limitation of `download`: It's only possible to extract the cheapest given shipping option.
|
||||||
|
|
||||||
### Configuration
|
## <a name="config"></a>Configuration
|
||||||
|
|
||||||
All configuration files can be in YAML or JSON format.
|
All configuration files can be in YAML or JSON format.
|
||||||
|
|
||||||
#### 1) Main configuration
|
### <a name="main-config"></a>1) Main configuration
|
||||||
|
|
||||||
When executing the app it by default looks for a `config.yaml` file in the current directory. If it does not exist it will be created automatically.
|
When executing the app it by default looks for a `config.yaml` file in the current directory. If it does not exist it will be created automatically.
|
||||||
|
|
||||||
@@ -271,7 +275,7 @@ login:
|
|||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 2) Ad configuration
|
### <a name="ad-config"></a>2) Ad configuration
|
||||||
|
|
||||||
Each ad is described in a separate JSON or YAML file with prefix `ad_<filename>`. The prefix is configurable in config file.
|
Each ad is described in a separate JSON or YAML file with prefix `ad_<filename>`. The prefix is configurable in config file.
|
||||||
|
|
||||||
@@ -333,6 +337,30 @@ created_on: # set automatically
|
|||||||
updated_on: # set automatically
|
updated_on: # set automatically
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### <a name="existing-browser"></a>3) Using an existing browser window
|
||||||
|
|
||||||
|
By default a new browser process will be launched. To reuse a manually launched browser window/process follow these steps:
|
||||||
|
|
||||||
|
1. Manually launch your browser from the command line with the `--remote-debugging-port=<NUMBER>` flag.
|
||||||
|
You are free to choose an unused port number 1025 and 65535, e.g.:
|
||||||
|
- `chrome --remote-debugging-port=9222`
|
||||||
|
- `chromium --remote-debugging-port=9222`
|
||||||
|
- `msedge --remote-debugging-port=9222`
|
||||||
|
|
||||||
|
This runs the browser in debug mode which allows it to be remote controlled by the bot.
|
||||||
|
|
||||||
|
1. In your config.yaml specify the same flag as browser argument, e.g.:
|
||||||
|
```yaml
|
||||||
|
browser:
|
||||||
|
arguments:
|
||||||
|
- --remote-debugging-port=9222
|
||||||
|
```
|
||||||
|
|
||||||
|
1. When now publishing ads the manually launched browser will be re-used.
|
||||||
|
|
||||||
|
> NOTE: If an existing browser is used all other settings configured under `browser` in your config.yaml file will ignored
|
||||||
|
because they are only used to programmatically configure/launch a dedicated browser instance.
|
||||||
|
|
||||||
## <a name="development"></a>Development Notes
|
## <a name="development"></a>Development Notes
|
||||||
|
|
||||||
> Please read [CONTRIBUTING.md](CONTRIBUTING.md) before contributing code. Thank you!
|
> Please read [CONTRIBUTING.md](CONTRIBUTING.md) before contributing code. Thank you!
|
||||||
@@ -342,7 +370,7 @@ updated_on: # set automatically
|
|||||||
- unit tests: `pdm run utest`
|
- unit tests: `pdm run utest`
|
||||||
- integration tests: `pdm run itest`
|
- integration tests: `pdm run itest`
|
||||||
- all tests: `pdm run test`
|
- all tests: `pdm run test`
|
||||||
- Run linter: `pdm run lint`
|
- Run syntax checks: `pdm run lint`
|
||||||
- Create platform-specific executable: `pdm run compile`
|
- Create platform-specific executable: `pdm run compile`
|
||||||
- Application bootstrap works like this:
|
- Application bootstrap works like this:
|
||||||
```python
|
```python
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
|||||||
SPDX-License-Identifier: AGPL-3.0-or-later
|
SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||||
"""
|
"""
|
||||||
import asyncio, copy, decimal, json, logging, os, re, sys, traceback, time
|
import asyncio, copy, decimal, json, logging, os, re, socket, sys, traceback, time
|
||||||
from importlib.resources import read_text as get_resource_as_string
|
from importlib.resources import read_text as get_resource_as_string
|
||||||
from collections.abc import Callable, Sized
|
from collections.abc import Callable, Sized
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
@@ -76,6 +76,18 @@ def is_integer(obj:Any) -> bool:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def is_port_open(host:str, port:int) -> bool:
|
||||||
|
try:
|
||||||
|
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||||
|
s.settimeout(1)
|
||||||
|
s.connect((host, port))
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
finally:
|
||||||
|
s.close()
|
||||||
|
|
||||||
|
|
||||||
async def ainput(prompt: str) -> str:
|
async def ainput(prompt: str) -> str:
|
||||||
return await asyncio.to_thread(input, f'{prompt} ')
|
return await asyncio.to_thread(input, f'{prompt} ')
|
||||||
|
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ from nodriver.core.config import Config
|
|||||||
from nodriver.core.element import Element
|
from nodriver.core.element import Element
|
||||||
from nodriver.core.tab import Tab as Page
|
from nodriver.core.tab import Tab as Page
|
||||||
|
|
||||||
from .utils import ensure, T
|
from .utils import ensure, is_port_open, T
|
||||||
|
|
||||||
|
|
||||||
LOG:Final[logging.Logger] = logging.getLogger("kleinanzeigen_bot.selenium_mixin")
|
LOG:Final[logging.Logger] = logging.getLogger("kleinanzeigen_bot.selenium_mixin")
|
||||||
@@ -79,6 +79,35 @@ class WebScrapingMixin:
|
|||||||
self.browser_config.binary_location = self.get_compatible_browser()
|
self.browser_config.binary_location = self.get_compatible_browser()
|
||||||
LOG.info(" -> Chrome binary location: %s", self.browser_config.binary_location)
|
LOG.info(" -> Chrome binary location: %s", self.browser_config.binary_location)
|
||||||
|
|
||||||
|
########################################################
|
||||||
|
# check if an existing browser instance shall be used...
|
||||||
|
########################################################
|
||||||
|
remote_host = "127.0.0.1"
|
||||||
|
remote_port = 0
|
||||||
|
for arg in self.browser_config.arguments:
|
||||||
|
if arg.startswith("--remote-debugging-host="):
|
||||||
|
remote_host = arg.split("=", 2)[1]
|
||||||
|
if arg.startswith("--remote-debugging-port="):
|
||||||
|
remote_port = int(arg.split("=", 2)[1])
|
||||||
|
|
||||||
|
if remote_port > 0:
|
||||||
|
LOG.info("Using existing browser process at %s:%s", remote_host, remote_port)
|
||||||
|
if not is_port_open(remote_host, remote_port):
|
||||||
|
raise AssertionError(f"Browser process not reachable at {remote_host}:{remote_port}. "
|
||||||
|
+ f"Start the browser with --remote-debugging-port={remote_port} or remove this port from your config.yaml")
|
||||||
|
cfg = Config(
|
||||||
|
browser_executable_path = self.browser_config.binary_location # actually not necessary but nodriver fails without
|
||||||
|
)
|
||||||
|
cfg.host = remote_host
|
||||||
|
cfg.port = remote_port
|
||||||
|
self.browser = await nodriver.start(cfg)
|
||||||
|
LOG.info("New Browser session is %s", self.browser.websocket_url)
|
||||||
|
return
|
||||||
|
|
||||||
|
########################################################
|
||||||
|
# configure and initialize new browser instance...
|
||||||
|
########################################################
|
||||||
|
|
||||||
# default_browser_args: @ https://github.com/ultrafunkamsterdam/nodriver/blob/main/nodriver/core/config.py
|
# default_browser_args: @ https://github.com/ultrafunkamsterdam/nodriver/blob/main/nodriver/core/config.py
|
||||||
# https://peter.sh/experiments/chromium-command-line-switches/
|
# https://peter.sh/experiments/chromium-command-line-switches/
|
||||||
# https://github.com/GoogleChrome/chrome-launcher/blob/main/docs/chrome-flags-for-tools.md
|
# https://github.com/GoogleChrome/chrome-launcher/blob/main/docs/chrome-flags-for-tools.md
|
||||||
@@ -125,6 +154,7 @@ class WebScrapingMixin:
|
|||||||
browser_args = browser_args,
|
browser_args = browser_args,
|
||||||
user_data_dir = self.browser_config.user_data_dir
|
user_data_dir = self.browser_config.user_data_dir
|
||||||
)
|
)
|
||||||
|
|
||||||
# already logged by nodriver:
|
# already logged by nodriver:
|
||||||
# LOG.debug("-> Effective browser arguments: \n\t\t%s", "\n\t\t".join(cfg.browser_args))
|
# LOG.debug("-> Effective browser arguments: \n\t\t%s", "\n\t\t".join(cfg.browser_args))
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user