Support extended glob patterns

This commit is contained in:
sebthom
2022-06-11 18:55:13 +02:00
parent 4794aee88b
commit f924ed89d0
5 changed files with 50 additions and 31 deletions

View File

@@ -24,7 +24,7 @@ It is the spiritual successor to [Second-Hand-Friends/ebayKleinanzeigen](https:/
- config: - config:
- use YAML or JSON for config files - use YAML or JSON for config files
- one config file per ad - one config file per ad
- use globbing (wildcards) to select images from local disk - use globbing (wildcards) to select images from local disk via [wcmatch](https://facelessuser.github.io/wcmatch/glob/#syntax)
- reference categories by name (looked up from [categories.yaml](https://github.com/Second-Hand-Friends/kleinanzeigen-bot/blob/main/kleinanzeigen_bot/resources/categories.yaml)) - reference categories by name (looked up from [categories.yaml](https://github.com/Second-Hand-Friends/kleinanzeigen-bot/blob/main/kleinanzeigen_bot/resources/categories.yaml))
- logging is configurable and colorized - logging is configurable and colorized
- provided as self-contained executable for Windows, Linux and macOS - provided as self-contained executable for Windows, Linux and macOS
@@ -212,9 +212,7 @@ The following parameters can be configured:
# wild card patterns to select ad configuration files # wild card patterns to select ad configuration files
# if relative paths are specified, then they are relative to this configuration file # if relative paths are specified, then they are relative to this configuration file
ad_files: ad_files:
- "my_ads/**/ad_*.json" - "my_ads/**/ad_*.{json,yml,yaml}"
- "my_ads/**/ad_*.yml"
- "my_ads/**/ad_*.yaml"
# default values for ads, can be overwritten in each ad configuration file # default values for ads, can be overwritten in each ad configuration file
ad_defaults: ad_defaults:
@@ -292,8 +290,7 @@ shipping_costs: # e.g. 2.95
# list of wildcard patterns to select images # list of wildcard patterns to select images
# if relative paths are specified, then they are relative to this ad configuration file # if relative paths are specified, then they are relative to this ad configuration file
images: images:
#- laptop_*.jpg #- laptop_*.{jpg,png}
#- laptop_*.png
contact: contact:
name: name:

View File

@@ -2,11 +2,12 @@
Copyright (C) 2022 Sebastian Thomschke and contributors Copyright (C) 2022 Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later SPDX-License-Identifier: AGPL-3.0-or-later
""" """
import atexit, copy, getopt, glob, importlib.metadata, json, logging, os, signal, sys, textwrap, time, urllib import atexit, copy, getopt, importlib.metadata, json, logging, os, signal, sys, textwrap, time, urllib
from collections.abc import Iterable from collections.abc import Iterable
from datetime import datetime from datetime import datetime
from logging.handlers import RotatingFileHandler from logging.handlers import RotatingFileHandler
from typing import Any, Final from typing import Any, Final
from wcmatch import glob
from overrides import overrides from overrides import overrides
from ruamel.yaml import YAML from ruamel.yaml import YAML
@@ -192,7 +193,7 @@ class KleinanzeigenBot(SeleniumMixin):
ad_files = set() ad_files = set()
data_root_dir = os.path.dirname(self.config_file_path) data_root_dir = os.path.dirname(self.config_file_path)
for file_pattern in self.config["ad_files"]: for file_pattern in self.config["ad_files"]:
for ad_file in glob.glob(file_pattern, root_dir = data_root_dir, recursive = True): for ad_file in glob.glob(file_pattern, root_dir = data_root_dir, flags = glob.BRACE | glob.EXTGLOB):
ad_files.add(abspath(ad_file, relative_to = data_root_dir)) ad_files.add(abspath(ad_file, relative_to = data_root_dir))
LOG.info(" -> found %s", pluralize("ad config file", ad_files)) LOG.info(" -> found %s", pluralize("ad config file", ad_files))
if not ad_files: if not ad_files:
@@ -275,7 +276,7 @@ class KleinanzeigenBot(SeleniumMixin):
for image_pattern in ad_cfg["images"]: for image_pattern in ad_cfg["images"]:
pattern_images = set() pattern_images = set()
ad_dir = os.path.dirname(ad_file) ad_dir = os.path.dirname(ad_file)
for image_file in glob.glob(image_pattern, root_dir = ad_dir, recursive = True): for image_file in glob.glob(image_pattern, root_dir = ad_dir, flags = glob.BRACE | glob.EXTGLOB):
_, image_file_ext = os.path.splitext(image_file) _, image_file_ext = os.path.splitext(image_file)
ensure(image_file_ext.lower() in {".gif", ".jpg", ".jpeg", ".png"}, f"Unsupported image file type [{image_file}]") ensure(image_file_ext.lower() in {".gif", ".jpg", ".jpeg", ".png"}, f"Unsupported image file type [{image_file}]")
if os.path.isabs(image_file): if os.path.isabs(image_file):

View File

@@ -1,7 +1,5 @@
ad_files: ad_files:
- "**/ad_*.json" - "**/ad_*.{json,yml,yaml}"
- "**/ad_*.yml"
- "**/ad_*.yaml"
# default values for ads, can be overwritten in each ad configuration file # default values for ads, can be overwritten in each ad configuration file
ad_defaults: ad_defaults:

60
pdm.lock generated
View File

@@ -53,6 +53,12 @@ dependencies = [
"stevedore>=1.20.0", "stevedore>=1.20.0",
] ]
[[package]]
name = "bracex"
version = "2.3.post1"
requires_python = ">=3.7"
summary = "Bash style brace expander."
[[package]] [[package]]
name = "certifi" name = "certifi"
version = "2022.5.18.1" version = "2022.5.18.1"
@@ -269,7 +275,6 @@ summary = "Python style guide checker"
[[package]] [[package]]
name = "pycparser" name = "pycparser"
version = "2.21" version = "2.21"
requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
summary = "C parser in Python" summary = "C parser in Python"
[[package]] [[package]]
@@ -288,7 +293,7 @@ dependencies = [
[[package]] [[package]]
name = "pyinstaller-hooks-contrib" name = "pyinstaller-hooks-contrib"
version = "2022.6" version = "2022.7"
requires_python = ">=3.7" requires_python = ">=3.7"
summary = "Community maintained hooks for PyInstaller" summary = "Community maintained hooks for PyInstaller"
@@ -372,7 +377,7 @@ summary = "YAML parser and emitter for Python"
[[package]] [[package]]
name = "requests" name = "requests"
version = "2.27.1" version = "2.28.0"
requires_python = ">=3.7, <4" requires_python = ">=3.7, <4"
summary = "Python HTTP for Humans." summary = "Python HTTP for Humans."
dependencies = [ dependencies = [
@@ -419,7 +424,7 @@ dependencies = [
[[package]] [[package]]
name = "setuptools" name = "setuptools"
version = "62.3.2" version = "62.3.4"
requires_python = ">=3.7" requires_python = ">=3.7"
summary = "Easily download, build, install, upgrade, and uninstall Python packages" summary = "Easily download, build, install, upgrade, and uninstall Python packages"
@@ -463,7 +468,7 @@ summary = "A lil' TOML parser"
[[package]] [[package]]
name = "trio" name = "trio"
version = "0.20.0" version = "0.21.0"
requires_python = ">=3.7" requires_python = ">=3.7"
summary = "A friendly Python library for async concurrency and I/O" summary = "A friendly Python library for async concurrency and I/O"
dependencies = [ dependencies = [
@@ -517,7 +522,16 @@ dependencies = [
"cryptography>=1.3.4", "cryptography>=1.3.4",
"idna>=2.0.0", "idna>=2.0.0",
"pyOpenSSL>=0.14", "pyOpenSSL>=0.14",
"urllib3~=1.26", "urllib3",
]
[[package]]
name = "wcmatch"
version = "8.4"
requires_python = ">=3.7"
summary = "Wildcard/glob file name matcher."
dependencies = [
"bracex>=2.1.1",
] ]
[[package]] [[package]]
@@ -547,7 +561,7 @@ dependencies = [
[metadata] [metadata]
lock_version = "3.1" lock_version = "3.1"
content_hash = "sha256:0c031c9f1ac97efa967430e72897a18f672607d4acbbda8d7c3973520798cc0e" content_hash = "sha256:fc71d4d3b09f8b56d04adb73f69fcce816001a6f32d20023424aa39bab153778"
[metadata.files] [metadata.files]
"altgraph 0.17.2" = [ "altgraph 0.17.2" = [
@@ -578,6 +592,10 @@ content_hash = "sha256:0c031c9f1ac97efa967430e72897a18f672607d4acbbda8d7c3973520
{file = "bandit-1.7.4-py3-none-any.whl", hash = "sha256:412d3f259dab4077d0e7f0c11f50f650cc7d10db905d98f6520a95a18049658a"}, {file = "bandit-1.7.4-py3-none-any.whl", hash = "sha256:412d3f259dab4077d0e7f0c11f50f650cc7d10db905d98f6520a95a18049658a"},
{file = "bandit-1.7.4.tar.gz", hash = "sha256:2d63a8c573417bae338962d4b9b06fbc6080f74ecd955a092849e1e65c717bd2"}, {file = "bandit-1.7.4.tar.gz", hash = "sha256:2d63a8c573417bae338962d4b9b06fbc6080f74ecd955a092849e1e65c717bd2"},
] ]
"bracex 2.3.post1" = [
{file = "bracex-2.3.post1-py3-none-any.whl", hash = "sha256:351b7f20d56fb9ea91f9b9e9e7664db466eb234188c175fd943f8f755c807e73"},
{file = "bracex-2.3.post1.tar.gz", hash = "sha256:e7b23fc8b2cd06d3dec0692baabecb249dda94e06a617901ff03a6c56fd71693"},
]
"certifi 2022.5.18.1" = [ "certifi 2022.5.18.1" = [
{file = "certifi-2022.5.18.1-py3-none-any.whl", hash = "sha256:f1d53542ee8cbedbe2118b5686372fb33c297fcd6379b050cca0ef13a597382a"}, {file = "certifi-2022.5.18.1-py3-none-any.whl", hash = "sha256:f1d53542ee8cbedbe2118b5686372fb33c297fcd6379b050cca0ef13a597382a"},
{file = "certifi-2022.5.18.1.tar.gz", hash = "sha256:9c5705e395cd70084351dd8ad5c41e65655e08ce46f2ec9cf6c2c08390f71eb7"}, {file = "certifi-2022.5.18.1.tar.gz", hash = "sha256:9c5705e395cd70084351dd8ad5c41e65655e08ce46f2ec9cf6c2c08390f71eb7"},
@@ -867,9 +885,9 @@ content_hash = "sha256:0c031c9f1ac97efa967430e72897a18f672607d4acbbda8d7c3973520
{file = "pyinstaller-4.10-py3-none-win_amd64.whl", hash = "sha256:0dcaf6557cdb2da763c46e06e95a94a7634ab03fb09d91bc77988b01ee05c907"}, {file = "pyinstaller-4.10-py3-none-win_amd64.whl", hash = "sha256:0dcaf6557cdb2da763c46e06e95a94a7634ab03fb09d91bc77988b01ee05c907"},
{file = "pyinstaller-4.10.tar.gz", hash = "sha256:7749c868d2e2dc84df7d6f65437226183c8a366f3a99bb2737785625c3a3cca1"}, {file = "pyinstaller-4.10.tar.gz", hash = "sha256:7749c868d2e2dc84df7d6f65437226183c8a366f3a99bb2737785625c3a3cca1"},
] ]
"pyinstaller-hooks-contrib 2022.6" = [ "pyinstaller-hooks-contrib 2022.7" = [
{file = "pyinstaller_hooks_contrib-2022.6-py2.py3-none-any.whl", hash = "sha256:e38bf9266c57be19647762ee63b012683beb6919c6bc2ecdc66ca174edec44a0"}, {file = "pyinstaller_hooks_contrib-2022.7-py2.py3-none-any.whl", hash = "sha256:5fdb97dcae177955db7ab27840cba97b89dc0c7f4fd9142bba0f9b8d8df85c48"},
{file = "pyinstaller-hooks-contrib-2022.6.tar.gz", hash = "sha256:9dc611cf6667301e95384b4a0631b032bbffa16a0688f4cfa014d0c1e751d276"}, {file = "pyinstaller-hooks-contrib-2022.7.tar.gz", hash = "sha256:6675634279cfe9e475580fb310c3d557037baefb065e6cb5a69a124361b926fd"},
] ]
"pylint 2.12.1" = [ "pylint 2.12.1" = [
{file = "pylint-2.12.1-py3-none-any.whl", hash = "sha256:b4b5a7b6d04e914a11c198c816042af1fb2d3cda29bb0c98a9c637010da2a5c5"}, {file = "pylint-2.12.1-py3-none-any.whl", hash = "sha256:b4b5a7b6d04e914a11c198c816042af1fb2d3cda29bb0c98a9c637010da2a5c5"},
@@ -953,9 +971,9 @@ content_hash = "sha256:0c031c9f1ac97efa967430e72897a18f672607d4acbbda8d7c3973520
{file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"}, {file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"},
{file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"}, {file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"},
] ]
"requests 2.27.1" = [ "requests 2.28.0" = [
{file = "requests-2.27.1-py2.py3-none-any.whl", hash = "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"}, {file = "requests-2.28.0-py3-none-any.whl", hash = "sha256:bc7861137fbce630f17b03d3ad02ad0bf978c844f3536d0edda6499dafce2b6f"},
{file = "requests-2.27.1.tar.gz", hash = "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61"}, {file = "requests-2.28.0.tar.gz", hash = "sha256:d568723a7ebd25875d8d1eaf5dfa068cd2fc8194b2e483d7b1f7c81918dbec6b"},
] ]
"ruamel.yaml 0.17.21" = [ "ruamel.yaml 0.17.21" = [
{file = "ruamel.yaml-0.17.21-py3-none-any.whl", hash = "sha256:742b35d3d665023981bd6d16b3d24248ce5df75fdb4e2924e93a05c1f8b61ca7"}, {file = "ruamel.yaml-0.17.21-py3-none-any.whl", hash = "sha256:742b35d3d665023981bd6d16b3d24248ce5df75fdb4e2924e93a05c1f8b61ca7"},
@@ -994,9 +1012,9 @@ content_hash = "sha256:0c031c9f1ac97efa967430e72897a18f672607d4acbbda8d7c3973520
"selenium-stealth 1.0.6" = [ "selenium-stealth 1.0.6" = [
{file = "selenium_stealth-1.0.6-py3-none-any.whl", hash = "sha256:b62da5452aa4a84f29a4dfb21a9696aff20788a7c570dd0b81bc04a940848b97"}, {file = "selenium_stealth-1.0.6-py3-none-any.whl", hash = "sha256:b62da5452aa4a84f29a4dfb21a9696aff20788a7c570dd0b81bc04a940848b97"},
] ]
"setuptools 62.3.2" = [ "setuptools 62.3.4" = [
{file = "setuptools-62.3.2-py3-none-any.whl", hash = "sha256:68e45d17c9281ba25dc0104eadd2647172b3472d9e01f911efa57965e8d51a36"}, {file = "setuptools-62.3.4-py3-none-any.whl", hash = "sha256:30b6b0fbacc459c90d27a63e6173facfc8b8c99a48fb24b5044f459ba63cd6cf"},
{file = "setuptools-62.3.2.tar.gz", hash = "sha256:a43bdedf853c670e5fed28e5623403bad2f73cf02f9a2774e91def6bda8265a7"}, {file = "setuptools-62.3.4.tar.gz", hash = "sha256:1f5d3a1502812025cdb2e5609b6af2d207332e3f50febe6db10ed3a59b2f155f"},
] ]
"smmap 5.0.0" = [ "smmap 5.0.0" = [
{file = "smmap-5.0.0-py3-none-any.whl", hash = "sha256:2aba19d6a040e78d8b09de5c57e96207b09ed71d8e55ce0959eeee6c8e190d94"}, {file = "smmap-5.0.0-py3-none-any.whl", hash = "sha256:2aba19d6a040e78d8b09de5c57e96207b09ed71d8e55ce0959eeee6c8e190d94"},
@@ -1022,9 +1040,9 @@ content_hash = "sha256:0c031c9f1ac97efa967430e72897a18f672607d4acbbda8d7c3973520
{file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
{file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
] ]
"trio 0.20.0" = [ "trio 0.21.0" = [
{file = "trio-0.20.0-py3-none-any.whl", hash = "sha256:fb2d48e4eab0dfb786a472cd514aaadc71e3445b203bc300bad93daa75d77c1a"}, {file = "trio-0.21.0-py3-none-any.whl", hash = "sha256:4dc0bf9d5cc78767fc4516325b6d80cc0968705a31d0eec2ecd7cdda466265b0"},
{file = "trio-0.20.0.tar.gz", hash = "sha256:670a52d3115d0e879e1ac838a4eb999af32f858163e3a704fe4839de2a676070"}, {file = "trio-0.21.0.tar.gz", hash = "sha256:523f39b7b69eef73501cebfe1aafd400a9aad5b03543a0eded52952488ff1c13"},
] ]
"trio-websocket 0.9.2" = [ "trio-websocket 0.9.2" = [
{file = "trio_websocket-0.9.2-py3-none-any.whl", hash = "sha256:5b558f6e83cc20a37c3b61202476c5295d1addf57bd65543364e0337e37ed2bc"}, {file = "trio_websocket-0.9.2-py3-none-any.whl", hash = "sha256:5b558f6e83cc20a37c3b61202476c5295d1addf57bd65543364e0337e37ed2bc"},
@@ -1042,6 +1060,10 @@ content_hash = "sha256:0c031c9f1ac97efa967430e72897a18f672607d4acbbda8d7c3973520
{file = "urllib3-1.26.9-py2.py3-none-any.whl", hash = "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14"}, {file = "urllib3-1.26.9-py2.py3-none-any.whl", hash = "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14"},
{file = "urllib3-1.26.9.tar.gz", hash = "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"}, {file = "urllib3-1.26.9.tar.gz", hash = "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"},
] ]
"wcmatch 8.4" = [
{file = "wcmatch-8.4-py3-none-any.whl", hash = "sha256:dc7351e5a7f8bbf4c6828d51ad20c1770113f5f3fd3dfe2a03cfde2a63f03f98"},
{file = "wcmatch-8.4.tar.gz", hash = "sha256:ba4fc5558f8946bf1ffc7034b05b814d825d694112499c86035e0e4d398b6a67"},
]
"webdriver-manager 3.7.0" = [ "webdriver-manager 3.7.0" = [
{file = "webdriver_manager-3.7.0-py2.py3-none-any.whl", hash = "sha256:ee09f7c5d9c61ca9cf2b78a036355f617f5dede2d68b7d2d77877d1d48df1361"}, {file = "webdriver_manager-3.7.0-py2.py3-none-any.whl", hash = "sha256:ee09f7c5d9c61ca9cf2b78a036355f617f5dede2d68b7d2d77877d1d48df1361"},
{file = "webdriver_manager-3.7.0.tar.gz", hash = "sha256:4a7247086b181d3a077a7f0be71b2f8e297d213ddd563ecea263dcb17e4e865f"}, {file = "webdriver_manager-3.7.0.tar.gz", hash = "sha256:4a7247086b181d3a077a7f0be71b2f8e297d213ddd563ecea263dcb17e4e865f"},

View File

@@ -33,6 +33,7 @@ dependencies = [
"pywin32==303; sys_platform == 'win32'", "pywin32==303; sys_platform == 'win32'",
"selenium~=4.1", "selenium~=4.1",
"selenium_stealth~=1.0", "selenium_stealth~=1.0",
"wcmatch~=8.4",
"webdriver_manager~=3.7" "webdriver_manager~=3.7"
] ]