replace selenium with nodriver

This commit is contained in:
sebthom
2024-03-07 20:26:40 +01:00
parent ca539cdd92
commit a441c5de73
13 changed files with 1303 additions and 1303 deletions

View File

@@ -20,7 +20,7 @@
It is the spiritual successor to [Second-Hand-Friends/ebayKleinanzeigen](https://github.com/Second-Hand-Friends/ebayKleinanzeigen) with the following advantages: It is the spiritual successor to [Second-Hand-Friends/ebayKleinanzeigen](https://github.com/Second-Hand-Friends/ebayKleinanzeigen) with the following advantages:
- supports Microsoft Edge browser (Chromium based) - supports Microsoft Edge browser (Chromium based)
- compatible chromedriver is installed automatically - does not require selenium and chromedrivers
- better captcha handling - better captcha handling
- config: - config:
- use YAML or JSON for config files - use YAML or JSON for config files
@@ -29,7 +29,7 @@ It is the spiritual successor to [Second-Hand-Friends/ebayKleinanzeigen](https:/
- reference categories by name (looked up from [categories.yaml](https://github.com/Second-Hand-Friends/kleinanzeigen-bot/blob/main/kleinanzeigen_bot/resources/categories.yaml)) - reference categories by name (looked up from [categories.yaml](https://github.com/Second-Hand-Friends/kleinanzeigen-bot/blob/main/kleinanzeigen_bot/resources/categories.yaml))
- logging is configurable and colorized - logging is configurable and colorized
- provided as self-contained executable for Windows, Linux and macOS - provided as self-contained executable for Windows, Linux and macOS
- source code is pylint checked and uses Python type hints - source code is pylint/bandit/mypy checked and uses Python type hints
- CI builds - CI builds
@@ -290,7 +290,7 @@ description: # can be multiline, see syntax here https://yaml-multiline.info/
# or category ID (e.g. 161/27) # or category ID (e.g. 161/27)
category: Notebooks category: Notebooks
price: price: # without decimals, e.g. 75
price_type: # one of: FIXED, NEGOTIABLE, GIVE_AWAY price_type: # one of: FIXED, NEGOTIABLE, GIVE_AWAY
special_attributes: special_attributes:

476
pdm.lock generated
View File

@@ -5,7 +5,7 @@
groups = ["default", "dev"] groups = ["default", "dev"]
strategy = ["cross_platform"] strategy = ["cross_platform"]
lock_version = "4.4.1" lock_version = "4.4.1"
content_hash = "sha256:66695736c39c00414bd1e0c1d85a95957e00e058699d53eee716d6822214668f" content_hash = "sha256:9174562d901578582fe3f3a087416ba5f2def90f12ac79407c2e8c6a93e8b8ca"
[[package]] [[package]]
name = "altgraph" name = "altgraph"
@@ -16,16 +16,6 @@ files = [
{file = "altgraph-0.17.4.tar.gz", hash = "sha256:1b5afbb98f6c4dcadb2e2ae6ab9fa994bbb8c1d75f4fa96d340f9437ae454406"}, {file = "altgraph-0.17.4.tar.gz", hash = "sha256:1b5afbb98f6c4dcadb2e2ae6ab9fa994bbb8c1d75f4fa96d340f9437ae454406"},
] ]
[[package]]
name = "annotated-types"
version = "0.6.0"
requires_python = ">=3.8"
summary = "Reusable constraint types to use with typing.Annotated"
files = [
{file = "annotated_types-0.6.0-py3-none-any.whl", hash = "sha256:0641064de18ba7a25dee8f96403ebc39113d0cb953a01429249d5c7564666a43"},
{file = "annotated_types-0.6.0.tar.gz", hash = "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d"},
]
[[package]] [[package]]
name = "astroid" name = "astroid"
version = "3.1.0" version = "3.1.0"
@@ -39,16 +29,6 @@ files = [
{file = "astroid-3.1.0.tar.gz", hash = "sha256:ac248253bfa4bd924a0de213707e7ebeeb3138abeb48d798784ead1e56d419d4"}, {file = "astroid-3.1.0.tar.gz", hash = "sha256:ac248253bfa4bd924a0de213707e7ebeeb3138abeb48d798784ead1e56d419d4"},
] ]
[[package]]
name = "attrs"
version = "23.2.0"
requires_python = ">=3.7"
summary = "Classes Without Boilerplate"
files = [
{file = "attrs-23.2.0-py3-none-any.whl", hash = "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1"},
{file = "attrs-23.2.0.tar.gz", hash = "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30"},
]
[[package]] [[package]]
name = "autopep8" name = "autopep8"
version = "2.0.4" version = "2.0.4"
@@ -89,60 +69,6 @@ files = [
{file = "bracex-2.4.tar.gz", hash = "sha256:a27eaf1df42cf561fed58b7a8f3fdf129d1ea16a81e1fadd1d17989bc6384beb"}, {file = "bracex-2.4.tar.gz", hash = "sha256:a27eaf1df42cf561fed58b7a8f3fdf129d1ea16a81e1fadd1d17989bc6384beb"},
] ]
[[package]]
name = "certifi"
version = "2024.2.2"
requires_python = ">=3.6"
summary = "Python package for providing Mozilla's CA Bundle."
files = [
{file = "certifi-2024.2.2-py3-none-any.whl", hash = "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1"},
{file = "certifi-2024.2.2.tar.gz", hash = "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f"},
]
[[package]]
name = "cffi"
version = "1.16.0"
requires_python = ">=3.8"
summary = "Foreign Function Interface for Python calling C code."
dependencies = [
"pycparser",
]
files = [
{file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"},
{file = "cffi-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9"},
{file = "cffi-1.16.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673"},
{file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896"},
{file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684"},
{file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7"},
{file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614"},
{file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743"},
{file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d"},
{file = "cffi-1.16.0-cp310-cp310-win32.whl", hash = "sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a"},
{file = "cffi-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1"},
{file = "cffi-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404"},
{file = "cffi-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417"},
{file = "cffi-1.16.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627"},
{file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936"},
{file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d"},
{file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56"},
{file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e"},
{file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc"},
{file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb"},
{file = "cffi-1.16.0-cp311-cp311-win32.whl", hash = "sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab"},
{file = "cffi-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba"},
{file = "cffi-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956"},
{file = "cffi-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e"},
{file = "cffi-1.16.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e"},
{file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2"},
{file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357"},
{file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6"},
{file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969"},
{file = "cffi-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520"},
{file = "cffi-1.16.0-cp312-cp312-win32.whl", hash = "sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b"},
{file = "cffi-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235"},
{file = "cffi-1.16.0.tar.gz", hash = "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0"},
]
[[package]] [[package]]
name = "colorama" name = "colorama"
version = "0.4.6" version = "0.4.6"
@@ -166,6 +92,19 @@ files = [
{file = "coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0"}, {file = "coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0"},
] ]
[[package]]
name = "deprecated"
version = "1.2.14"
requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
summary = "Python @deprecated decorator to deprecate old python classes, functions or methods."
dependencies = [
"wrapt<2,>=1.10",
]
files = [
{file = "Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c"},
{file = "Deprecated-1.2.14.tar.gz", hash = "sha256:e5323eb936458dccc2582dc6f9c322c852a775a27065ff2b0c4970b9d53d01b3"},
]
[[package]] [[package]]
name = "dill" name = "dill"
version = "0.3.8" version = "0.3.8"
@@ -187,13 +126,13 @@ files = [
] ]
[[package]] [[package]]
name = "h11" name = "flaky"
version = "0.14.0" version = "3.7.0"
requires_python = ">=3.7" requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
summary = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" summary = "Plugin for nose or pytest that automatically reruns flaky tests."
files = [ files = [
{file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, {file = "flaky-3.7.0-py2.py3-none-any.whl", hash = "sha256:d6eda73cab5ae7364504b7c44670f70abed9e75f77dd116352f662817592ec9c"},
{file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, {file = "flaky-3.7.0.tar.gz", hash = "sha256:3ad100780721a1911f57a165809b7ea265a7863305acb66708220820caf8aa0d"},
] ]
[[package]] [[package]]
@@ -209,16 +148,6 @@ files = [
{file = "humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc"}, {file = "humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc"},
] ]
[[package]]
name = "idna"
version = "3.6"
requires_python = ">=3.5"
summary = "Internationalized Domain Names in Applications (IDNA)"
files = [
{file = "idna-3.6-py3-none-any.whl", hash = "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f"},
{file = "idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca"},
]
[[package]] [[package]]
name = "iniconfig" name = "iniconfig"
version = "2.0.0" version = "2.0.0"
@@ -284,6 +213,16 @@ files = [
{file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
] ]
[[package]]
name = "mss"
version = "9.0.1"
requires_python = ">=3.8"
summary = "An ultra fast cross-platform multiple screenshots module in pure python using ctypes."
files = [
{file = "mss-9.0.1-py3-none-any.whl", hash = "sha256:7ee44db7ab14cbea6a3eb63813c57d677a109ca5979d3b76046e4bddd3ca1a0b"},
{file = "mss-9.0.1.tar.gz", hash = "sha256:6eb7b9008cf27428811fa33aeb35f3334db81e3f7cc2dd49ec7c6e5a94b39f12"},
]
[[package]] [[package]]
name = "mypy" name = "mypy"
version = "1.8.0" version = "1.8.0"
@@ -325,16 +264,31 @@ files = [
] ]
[[package]] [[package]]
name = "outcome" name = "nodeenv"
version = "1.3.0.post0" version = "1.8.0"
requires_python = ">=3.7" requires_python = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*"
summary = "Capture the outcome of Python function calls." summary = "Node.js virtual environment builder"
dependencies = [ dependencies = [
"attrs>=19.2.0", "setuptools",
] ]
files = [ files = [
{file = "outcome-1.3.0.post0-py2.py3-none-any.whl", hash = "sha256:e771c5ce06d1415e356078d3bdd68523f284b4ce5419828922b6871e65eda82b"}, {file = "nodeenv-1.8.0-py2.py3-none-any.whl", hash = "sha256:df865724bb3c3adc86b3876fa209771517b0cfe596beff01a92700e0e8be4cec"},
{file = "outcome-1.3.0.post0.tar.gz", hash = "sha256:9dcf02e65f2971b80047b377468e72a268e15c0af3cf1238e6ff14f7f91143b8"}, {file = "nodeenv-1.8.0.tar.gz", hash = "sha256:d51e0c37e64fbf47d017feac3145cdbb58836d7eee8c6f6d3b6880c5456227d2"},
]
[[package]]
name = "nodriver"
version = "0.27rc1"
requires_python = ">=3.9"
summary = "* Official successor of Undetected Chromedriver"
dependencies = [
"deprecated",
"mss",
"websockets>=11",
]
files = [
{file = "nodriver-0.27rc1-py3-none-any.whl", hash = "sha256:d7e858417347628e53fc5bd3692b0608ac403ec9dc5027a3e6f27dce1074052e"},
{file = "nodriver-0.27rc1.tar.gz", hash = "sha256:f0b1019a07cc3da0386d363cf5e7f6c96a8fd10de76566d201f45b7fe96af7cb"},
] ]
[[package]] [[package]]
@@ -422,97 +376,6 @@ files = [
{file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"}, {file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"},
] ]
[[package]]
name = "pycparser"
version = "2.21"
requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
summary = "C parser in Python"
files = [
{file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"},
{file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"},
]
[[package]]
name = "pydantic"
version = "2.6.3"
requires_python = ">=3.8"
summary = "Data validation using Python type hints"
dependencies = [
"annotated-types>=0.4.0",
"pydantic-core==2.16.3",
"typing-extensions>=4.6.1",
]
files = [
{file = "pydantic-2.6.3-py3-none-any.whl", hash = "sha256:72c6034df47f46ccdf81869fddb81aade68056003900a8724a4f160700016a2a"},
{file = "pydantic-2.6.3.tar.gz", hash = "sha256:e07805c4c7f5c6826e33a1d4c9d47950d7eaf34868e2690f8594d2e30241f11f"},
]
[[package]]
name = "pydantic-core"
version = "2.16.3"
requires_python = ">=3.8"
summary = ""
dependencies = [
"typing-extensions!=4.7.0,>=4.6.0",
]
files = [
{file = "pydantic_core-2.16.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:75b81e678d1c1ede0785c7f46690621e4c6e63ccd9192af1f0bd9d504bbb6bf4"},
{file = "pydantic_core-2.16.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9c865a7ee6f93783bd5d781af5a4c43dadc37053a5b42f7d18dc019f8c9d2bd1"},
{file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:162e498303d2b1c036b957a1278fa0899d02b2842f1ff901b6395104c5554a45"},
{file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2f583bd01bbfbff4eaee0868e6fc607efdfcc2b03c1c766b06a707abbc856187"},
{file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b926dd38db1519ed3043a4de50214e0d600d404099c3392f098a7f9d75029ff8"},
{file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:716b542728d4c742353448765aa7cdaa519a7b82f9564130e2b3f6766018c9ec"},
{file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc4ad7f7ee1a13d9cb49d8198cd7d7e3aa93e425f371a68235f784e99741561f"},
{file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bd87f48924f360e5d1c5f770d6155ce0e7d83f7b4e10c2f9ec001c73cf475c99"},
{file = "pydantic_core-2.16.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0df446663464884297c793874573549229f9eca73b59360878f382a0fc085979"},
{file = "pydantic_core-2.16.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4df8a199d9f6afc5ae9a65f8f95ee52cae389a8c6b20163762bde0426275b7db"},
{file = "pydantic_core-2.16.3-cp310-none-win32.whl", hash = "sha256:456855f57b413f077dff513a5a28ed838dbbb15082ba00f80750377eed23d132"},
{file = "pydantic_core-2.16.3-cp310-none-win_amd64.whl", hash = "sha256:732da3243e1b8d3eab8c6ae23ae6a58548849d2e4a4e03a1924c8ddf71a387cb"},
{file = "pydantic_core-2.16.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:519ae0312616026bf4cedc0fe459e982734f3ca82ee8c7246c19b650b60a5ee4"},
{file = "pydantic_core-2.16.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b3992a322a5617ded0a9f23fd06dbc1e4bd7cf39bc4ccf344b10f80af58beacd"},
{file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8d62da299c6ecb04df729e4b5c52dc0d53f4f8430b4492b93aa8de1f541c4aac"},
{file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2acca2be4bb2f2147ada8cac612f8a98fc09f41c89f87add7256ad27332c2fda"},
{file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1b662180108c55dfbf1280d865b2d116633d436cfc0bba82323554873967b340"},
{file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e7c6ed0dc9d8e65f24f5824291550139fe6f37fac03788d4580da0d33bc00c97"},
{file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a6b1bb0827f56654b4437955555dc3aeeebeddc47c2d7ed575477f082622c49e"},
{file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e56f8186d6210ac7ece503193ec84104da7ceb98f68ce18c07282fcc2452e76f"},
{file = "pydantic_core-2.16.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:936e5db01dd49476fa8f4383c259b8b1303d5dd5fb34c97de194560698cc2c5e"},
{file = "pydantic_core-2.16.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:33809aebac276089b78db106ee692bdc9044710e26f24a9a2eaa35a0f9fa70ba"},
{file = "pydantic_core-2.16.3-cp311-none-win32.whl", hash = "sha256:ded1c35f15c9dea16ead9bffcde9bb5c7c031bff076355dc58dcb1cb436c4721"},
{file = "pydantic_core-2.16.3-cp311-none-win_amd64.whl", hash = "sha256:d89ca19cdd0dd5f31606a9329e309d4fcbb3df860960acec32630297d61820df"},
{file = "pydantic_core-2.16.3-cp311-none-win_arm64.whl", hash = "sha256:6162f8d2dc27ba21027f261e4fa26f8bcb3cf9784b7f9499466a311ac284b5b9"},
{file = "pydantic_core-2.16.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:0f56ae86b60ea987ae8bcd6654a887238fd53d1384f9b222ac457070b7ac4cff"},
{file = "pydantic_core-2.16.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c9bd22a2a639e26171068f8ebb5400ce2c1bc7d17959f60a3b753ae13c632975"},
{file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4204e773b4b408062960e65468d5346bdfe139247ee5f1ca2a378983e11388a2"},
{file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f651dd19363c632f4abe3480a7c87a9773be27cfe1341aef06e8759599454120"},
{file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aaf09e615a0bf98d406657e0008e4a8701b11481840be7d31755dc9f97c44053"},
{file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8e47755d8152c1ab5b55928ab422a76e2e7b22b5ed8e90a7d584268dd49e9c6b"},
{file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:500960cb3a0543a724a81ba859da816e8cf01b0e6aaeedf2c3775d12ee49cade"},
{file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cf6204fe865da605285c34cf1172879d0314ff267b1c35ff59de7154f35fdc2e"},
{file = "pydantic_core-2.16.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d33dd21f572545649f90c38c227cc8631268ba25c460b5569abebdd0ec5974ca"},
{file = "pydantic_core-2.16.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:49d5d58abd4b83fb8ce763be7794d09b2f50f10aa65c0f0c1696c677edeb7cbf"},
{file = "pydantic_core-2.16.3-cp312-none-win32.whl", hash = "sha256:f53aace168a2a10582e570b7736cc5bef12cae9cf21775e3eafac597e8551fbe"},
{file = "pydantic_core-2.16.3-cp312-none-win_amd64.whl", hash = "sha256:0d32576b1de5a30d9a97f300cc6a3f4694c428d956adbc7e6e2f9cad279e45ed"},
{file = "pydantic_core-2.16.3-cp312-none-win_arm64.whl", hash = "sha256:ec08be75bb268473677edb83ba71e7e74b43c008e4a7b1907c6d57e940bf34b6"},
{file = "pydantic_core-2.16.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:36fa178aacbc277bc6b62a2c3da95226520da4f4e9e206fdf076484363895d2c"},
{file = "pydantic_core-2.16.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:dcca5d2bf65c6fb591fff92da03f94cd4f315972f97c21975398bd4bd046854a"},
{file = "pydantic_core-2.16.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2a72fb9963cba4cd5793854fd12f4cfee731e86df140f59ff52a49b3552db241"},
{file = "pydantic_core-2.16.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b60cc1a081f80a2105a59385b92d82278b15d80ebb3adb200542ae165cd7d183"},
{file = "pydantic_core-2.16.3-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cbcc558401de90a746d02ef330c528f2e668c83350f045833543cd57ecead1ad"},
{file = "pydantic_core-2.16.3-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:fee427241c2d9fb7192b658190f9f5fd6dfe41e02f3c1489d2ec1e6a5ab1e04a"},
{file = "pydantic_core-2.16.3-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f4cb85f693044e0f71f394ff76c98ddc1bc0953e48c061725e540396d5c8a2e1"},
{file = "pydantic_core-2.16.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:b29eeb887aa931c2fcef5aa515d9d176d25006794610c264ddc114c053bf96fe"},
{file = "pydantic_core-2.16.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a425479ee40ff021f8216c9d07a6a3b54b31c8267c6e17aa88b70d7ebd0e5e5b"},
{file = "pydantic_core-2.16.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:5c5cbc703168d1b7a838668998308018a2718c2130595e8e190220238addc96f"},
{file = "pydantic_core-2.16.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99b6add4c0b39a513d323d3b93bc173dac663c27b99860dd5bf491b240d26137"},
{file = "pydantic_core-2.16.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75f76ee558751746d6a38f89d60b6228fa174e5172d143886af0f85aa306fd89"},
{file = "pydantic_core-2.16.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:00ee1c97b5364b84cb0bd82e9bbf645d5e2871fb8c58059d158412fee2d33d8a"},
{file = "pydantic_core-2.16.3-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:287073c66748f624be4cef893ef9174e3eb88fe0b8a78dc22e88eca4bc357ca6"},
{file = "pydantic_core-2.16.3-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:ed25e1835c00a332cb10c683cd39da96a719ab1dfc08427d476bce41b92531fc"},
{file = "pydantic_core-2.16.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:86b3d0033580bd6bbe07590152007275bd7af95f98eaa5bd36f3da219dcd93da"},
{file = "pydantic_core-2.16.3.tar.gz", hash = "sha256:1cac689f80a3abab2d3c0048b29eea5751114054f032a941a32de4c852c59cad"},
]
[[package]] [[package]]
name = "pygments" name = "pygments"
version = "2.17.2" version = "2.17.2"
@@ -598,18 +461,21 @@ files = [
] ]
[[package]] [[package]]
name = "pysocks" name = "pyright"
version = "1.7.1" version = "1.1.352"
requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" requires_python = ">=3.7"
summary = "A Python SOCKS client module. See https://github.com/Anorov/PySocks for more information." summary = "Command line wrapper for pyright"
dependencies = [
"nodeenv>=1.6.0",
]
files = [ files = [
{file = "PySocks-1.7.1-py3-none-any.whl", hash = "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5"}, {file = "pyright-1.1.352-py3-none-any.whl", hash = "sha256:0040cf173c6a60704e553bfd129dfe54de59cc76d0b2b80f77cfab4f50701d64"},
{file = "PySocks-1.7.1.tar.gz", hash = "sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0"}, {file = "pyright-1.1.352.tar.gz", hash = "sha256:a621c0dfbcf1291b3610641a07380fefaa1d0e182890a1b2a7f13b446e8109a9"},
] ]
[[package]] [[package]]
name = "pytest" name = "pytest"
version = "8.1.0" version = "8.0.2"
requires_python = ">=3.8" requires_python = ">=3.8"
summary = "pytest: simple powerful testing with Python" summary = "pytest: simple powerful testing with Python"
dependencies = [ dependencies = [
@@ -617,12 +483,12 @@ dependencies = [
"exceptiongroup>=1.0.0rc8; python_version < \"3.11\"", "exceptiongroup>=1.0.0rc8; python_version < \"3.11\"",
"iniconfig", "iniconfig",
"packaging", "packaging",
"pluggy<2.0,>=1.4", "pluggy<2.0,>=1.3.0",
"tomli>=1; python_version < \"3.11\"", "tomli>=1.0.0; python_version < \"3.11\"",
] ]
files = [ files = [
{file = "pytest-8.1.0-py3-none-any.whl", hash = "sha256:ee32db7af8de4629a455806befa90559f307424c07b8413ccfc30bf5b221dd7e"}, {file = "pytest-8.0.2-py3-none-any.whl", hash = "sha256:edfaaef32ce5172d5466b5127b42e0d6d35ebbe4453f0e3505d96afd93f6b096"},
{file = "pytest-8.1.0.tar.gz", hash = "sha256:f8fa04ab8f98d185113ae60ea6d79c22f8143b14bc1caeced44a0ab844928323"}, {file = "pytest-8.0.2.tar.gz", hash = "sha256:d4051d623a2e0b7e51960ba963193b09ce6daeb9759a451844a21e4ddedfc1bd"},
] ]
[[package]] [[package]]
@@ -742,35 +608,6 @@ files = [
{file = "ruamel.yaml.clib-0.2.8.tar.gz", hash = "sha256:beb2e0404003de9a4cab9753a8805a8fe9320ee6673136ed7f04255fe60bb512"}, {file = "ruamel.yaml.clib-0.2.8.tar.gz", hash = "sha256:beb2e0404003de9a4cab9753a8805a8fe9320ee6673136ed7f04255fe60bb512"},
] ]
[[package]]
name = "selenium"
version = "4.18.1"
requires_python = ">=3.8"
summary = ""
dependencies = [
"certifi>=2021.10.8",
"trio-websocket~=0.9",
"trio~=0.17",
"typing-extensions>=4.9.0",
"urllib3[socks]<3,>=1.26",
]
files = [
{file = "selenium-4.18.1-py3-none-any.whl", hash = "sha256:b24a3cdd2d47c29832e81345bfcde0c12bb608738013e53c781b211b418df241"},
{file = "selenium-4.18.1.tar.gz", hash = "sha256:a11f67afa8bfac6b77e148c987b33f6b14eb1cae4d352722a75de1f26e3f0ae2"},
]
[[package]]
name = "selenium-stealth"
version = "1.0.6"
requires_python = ">=3, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
summary = "Trying to make python selenium more stealthy."
dependencies = [
"selenium",
]
files = [
{file = "selenium_stealth-1.0.6-py3-none-any.whl", hash = "sha256:b62da5452aa4a84f29a4dfb21a9696aff20788a7c570dd0b81bc04a940848b97"},
]
[[package]] [[package]]
name = "setuptools" name = "setuptools"
version = "69.1.1" version = "69.1.1"
@@ -781,25 +618,6 @@ files = [
{file = "setuptools-69.1.1.tar.gz", hash = "sha256:5c0806c7d9af348e6dd3777b4f4dbb42c7ad85b190104837488eab9a7c945cf8"}, {file = "setuptools-69.1.1.tar.gz", hash = "sha256:5c0806c7d9af348e6dd3777b4f4dbb42c7ad85b190104837488eab9a7c945cf8"},
] ]
[[package]]
name = "sniffio"
version = "1.3.1"
requires_python = ">=3.7"
summary = "Sniff out which async library your code is running under"
files = [
{file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
{file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
]
[[package]]
name = "sortedcontainers"
version = "2.4.0"
summary = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set"
files = [
{file = "sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0"},
{file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"},
]
[[package]] [[package]]
name = "stevedore" name = "stevedore"
version = "5.2.0" version = "5.2.0"
@@ -843,40 +661,6 @@ files = [
{file = "tomlkit-0.12.4.tar.gz", hash = "sha256:7ca1cfc12232806517a8515047ba66a19369e71edf2439d0f5824f91032b6cc3"}, {file = "tomlkit-0.12.4.tar.gz", hash = "sha256:7ca1cfc12232806517a8515047ba66a19369e71edf2439d0f5824f91032b6cc3"},
] ]
[[package]]
name = "trio"
version = "0.24.0"
requires_python = ">=3.8"
summary = "A friendly Python library for async concurrency and I/O"
dependencies = [
"attrs>=20.1.0",
"cffi>=1.14; os_name == \"nt\" and implementation_name != \"pypy\"",
"exceptiongroup; python_version < \"3.11\"",
"idna",
"outcome",
"sniffio>=1.3.0",
"sortedcontainers",
]
files = [
{file = "trio-0.24.0-py3-none-any.whl", hash = "sha256:c3bd3a4e3e3025cd9a2241eae75637c43fe0b9e88b4c97b9161a55b9e54cd72c"},
{file = "trio-0.24.0.tar.gz", hash = "sha256:ffa09a74a6bf81b84f8613909fb0beaee84757450183a7a2e0b47b455c0cac5d"},
]
[[package]]
name = "trio-websocket"
version = "0.11.1"
requires_python = ">=3.7"
summary = "WebSocket library for Trio"
dependencies = [
"exceptiongroup; python_version < \"3.11\"",
"trio>=0.11",
"wsproto>=0.14",
]
files = [
{file = "trio-websocket-0.11.1.tar.gz", hash = "sha256:18c11793647703c158b1f6e62de638acada927344d534e3c7628eedcb746839f"},
{file = "trio_websocket-0.11.1-py3-none-any.whl", hash = "sha256:520d046b0d030cf970b8b2b2e00c4c2245b3807853ecd44214acd33d74581638"},
]
[[package]] [[package]]
name = "typing-extensions" name = "typing-extensions"
version = "4.10.0" version = "4.10.0"
@@ -887,31 +671,6 @@ files = [
{file = "typing_extensions-4.10.0.tar.gz", hash = "sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb"}, {file = "typing_extensions-4.10.0.tar.gz", hash = "sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb"},
] ]
[[package]]
name = "urllib3"
version = "2.2.1"
requires_python = ">=3.8"
summary = "HTTP library with thread-safe connection pooling, file post, and more."
files = [
{file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"},
{file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"},
]
[[package]]
name = "urllib3"
version = "2.2.1"
extras = ["socks"]
requires_python = ">=3.8"
summary = "HTTP library with thread-safe connection pooling, file post, and more."
dependencies = [
"pysocks!=1.5.7,<2.0,>=1.5.6",
"urllib3==2.2.1",
]
files = [
{file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"},
{file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"},
]
[[package]] [[package]]
name = "wcmatch" name = "wcmatch"
version = "8.5.1" version = "8.5.1"
@@ -926,14 +685,99 @@ files = [
] ]
[[package]] [[package]]
name = "wsproto" name = "websockets"
version = "1.2.0" version = "12.0"
requires_python = ">=3.7.0" requires_python = ">=3.8"
summary = "WebSockets state-machine based protocol implementation" summary = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)"
dependencies = [
"h11<1,>=0.9.0",
]
files = [ files = [
{file = "wsproto-1.2.0-py3-none-any.whl", hash = "sha256:b9acddd652b585d75b20477888c56642fdade28bdfd3579aa24a4d2c037dd736"}, {file = "websockets-12.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d554236b2a2006e0ce16315c16eaa0d628dab009c33b63ea03f41c6107958374"},
{file = "wsproto-1.2.0.tar.gz", hash = "sha256:ad565f26ecb92588a3e43bc3d96164de84cd9902482b130d0ddbaa9664a85065"}, {file = "websockets-12.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2d225bb6886591b1746b17c0573e29804619c8f755b5598d875bb4235ea639be"},
{file = "websockets-12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:eb809e816916a3b210bed3c82fb88eaf16e8afcf9c115ebb2bacede1797d2547"},
{file = "websockets-12.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c588f6abc13f78a67044c6b1273a99e1cf31038ad51815b3b016ce699f0d75c2"},
{file = "websockets-12.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5aa9348186d79a5f232115ed3fa9020eab66d6c3437d72f9d2c8ac0c6858c558"},
{file = "websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6350b14a40c95ddd53e775dbdbbbc59b124a5c8ecd6fbb09c2e52029f7a9f480"},
{file = "websockets-12.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:70ec754cc2a769bcd218ed8d7209055667b30860ffecb8633a834dde27d6307c"},
{file = "websockets-12.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6e96f5ed1b83a8ddb07909b45bd94833b0710f738115751cdaa9da1fb0cb66e8"},
{file = "websockets-12.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4d87be612cbef86f994178d5186add3d94e9f31cc3cb499a0482b866ec477603"},
{file = "websockets-12.0-cp310-cp310-win32.whl", hash = "sha256:befe90632d66caaf72e8b2ed4d7f02b348913813c8b0a32fae1cc5fe3730902f"},
{file = "websockets-12.0-cp310-cp310-win_amd64.whl", hash = "sha256:363f57ca8bc8576195d0540c648aa58ac18cf85b76ad5202b9f976918f4219cf"},
{file = "websockets-12.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5d873c7de42dea355d73f170be0f23788cf3fa9f7bed718fd2830eefedce01b4"},
{file = "websockets-12.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3f61726cae9f65b872502ff3c1496abc93ffbe31b278455c418492016e2afc8f"},
{file = "websockets-12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ed2fcf7a07334c77fc8a230755c2209223a7cc44fc27597729b8ef5425aa61a3"},
{file = "websockets-12.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e332c210b14b57904869ca9f9bf4ca32f5427a03eeb625da9b616c85a3a506c"},
{file = "websockets-12.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5693ef74233122f8ebab026817b1b37fe25c411ecfca084b29bc7d6efc548f45"},
{file = "websockets-12.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e9e7db18b4539a29cc5ad8c8b252738a30e2b13f033c2d6e9d0549b45841c04"},
{file = "websockets-12.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6e2df67b8014767d0f785baa98393725739287684b9f8d8a1001eb2839031447"},
{file = "websockets-12.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:bea88d71630c5900690fcb03161ab18f8f244805c59e2e0dc4ffadae0a7ee0ca"},
{file = "websockets-12.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dff6cdf35e31d1315790149fee351f9e52978130cef6c87c4b6c9b3baf78bc53"},
{file = "websockets-12.0-cp311-cp311-win32.whl", hash = "sha256:3e3aa8c468af01d70332a382350ee95f6986db479ce7af14d5e81ec52aa2b402"},
{file = "websockets-12.0-cp311-cp311-win_amd64.whl", hash = "sha256:25eb766c8ad27da0f79420b2af4b85d29914ba0edf69f547cc4f06ca6f1d403b"},
{file = "websockets-12.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0e6e2711d5a8e6e482cacb927a49a3d432345dfe7dea8ace7b5790df5932e4df"},
{file = "websockets-12.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:dbcf72a37f0b3316e993e13ecf32f10c0e1259c28ffd0a85cee26e8549595fbc"},
{file = "websockets-12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12743ab88ab2af1d17dd4acb4645677cb7063ef4db93abffbf164218a5d54c6b"},
{file = "websockets-12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b645f491f3c48d3f8a00d1fce07445fab7347fec54a3e65f0725d730d5b99cb"},
{file = "websockets-12.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9893d1aa45a7f8b3bc4510f6ccf8db8c3b62120917af15e3de247f0780294b92"},
{file = "websockets-12.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f38a7b376117ef7aff996e737583172bdf535932c9ca021746573bce40165ed"},
{file = "websockets-12.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:f764ba54e33daf20e167915edc443b6f88956f37fb606449b4a5b10ba42235a5"},
{file = "websockets-12.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:1e4b3f8ea6a9cfa8be8484c9221ec0257508e3a1ec43c36acdefb2a9c3b00aa2"},
{file = "websockets-12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9fdf06fd06c32205a07e47328ab49c40fc1407cdec801d698a7c41167ea45113"},
{file = "websockets-12.0-cp312-cp312-win32.whl", hash = "sha256:baa386875b70cbd81798fa9f71be689c1bf484f65fd6fb08d051a0ee4e79924d"},
{file = "websockets-12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ae0a5da8f35a5be197f328d4727dbcfafa53d1824fac3d96cdd3a642fe09394f"},
{file = "websockets-12.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:248d8e2446e13c1d4326e0a6a4e9629cb13a11195051a73acf414812700badbd"},
{file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f44069528d45a933997a6fef143030d8ca8042f0dfaad753e2906398290e2870"},
{file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c4e37d36f0d19f0a4413d3e18c0d03d0c268ada2061868c1e6f5ab1a6d575077"},
{file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d829f975fc2e527a3ef2f9c8f25e553eb7bc779c6665e8e1d52aa22800bb38b"},
{file = "websockets-12.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:2c71bd45a777433dd9113847af751aae36e448bc6b8c361a566cb043eda6ec30"},
{file = "websockets-12.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0bee75f400895aef54157b36ed6d3b308fcab62e5260703add87f44cee9c82a6"},
{file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:423fc1ed29f7512fceb727e2d2aecb952c46aa34895e9ed96071821309951123"},
{file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:27a5e9964ef509016759f2ef3f2c1e13f403725a5e6a1775555994966a66e931"},
{file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3181df4583c4d3994d31fb235dc681d2aaad744fbdbf94c4802485ececdecf2"},
{file = "websockets-12.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:b067cb952ce8bf40115f6c19f478dc71c5e719b7fbaa511359795dfd9d1a6468"},
{file = "websockets-12.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:00700340c6c7ab788f176d118775202aadea7602c5cc6be6ae127761c16d6b0b"},
{file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e469d01137942849cff40517c97a30a93ae79917752b34029f0ec72df6b46399"},
{file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffefa1374cd508d633646d51a8e9277763a9b78ae71324183693959cf94635a7"},
{file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba0cab91b3956dfa9f512147860783a1829a8d905ee218a9837c18f683239611"},
{file = "websockets-12.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2cb388a5bfb56df4d9a406783b7f9dbefb888c09b71629351cc6b036e9259370"},
{file = "websockets-12.0-py3-none-any.whl", hash = "sha256:dc284bbc8d7c78a6c69e0c7325ab46ee5e40bb4d50e494d8131a07ef47500e9e"},
{file = "websockets-12.0.tar.gz", hash = "sha256:81df9cbcbb6c260de1e007e58c011bfebe2dafc8435107b0537f393dd38c8b1b"},
]
[[package]]
name = "wrapt"
version = "1.16.0"
requires_python = ">=3.6"
summary = "Module for decorators, wrappers and monkey patching."
files = [
{file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"},
{file = "wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020"},
{file = "wrapt-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb2dee3874a500de01c93d5c71415fcaef1d858370d405824783e7a8ef5db440"},
{file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a88e6010048489cda82b1326889ec075a8c856c2e6a256072b28eaee3ccf487"},
{file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac83a914ebaf589b69f7d0a1277602ff494e21f4c2f743313414378f8f50a4cf"},
{file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:73aa7d98215d39b8455f103de64391cb79dfcad601701a3aa0dddacf74911d72"},
{file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:807cc8543a477ab7422f1120a217054f958a66ef7314f76dd9e77d3f02cdccd0"},
{file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bf5703fdeb350e36885f2875d853ce13172ae281c56e509f4e6eca049bdfb136"},
{file = "wrapt-1.16.0-cp310-cp310-win32.whl", hash = "sha256:f6b2d0c6703c988d334f297aa5df18c45e97b0af3679bb75059e0e0bd8b1069d"},
{file = "wrapt-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:decbfa2f618fa8ed81c95ee18a387ff973143c656ef800c9f24fb7e9c16054e2"},
{file = "wrapt-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a5db485fe2de4403f13fafdc231b0dbae5eca4359232d2efc79025527375b09"},
{file = "wrapt-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75ea7d0ee2a15733684badb16de6794894ed9c55aa5e9903260922f0482e687d"},
{file = "wrapt-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a452f9ca3e3267cd4d0fcf2edd0d035b1934ac2bd7e0e57ac91ad6b95c0c6389"},
{file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:43aa59eadec7890d9958748db829df269f0368521ba6dc68cc172d5d03ed8060"},
{file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72554a23c78a8e7aa02abbd699d129eead8b147a23c56e08d08dfc29cfdddca1"},
{file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d2efee35b4b0a347e0d99d28e884dfd82797852d62fcd7ebdeee26f3ceb72cf3"},
{file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:6dcfcffe73710be01d90cae08c3e548d90932d37b39ef83969ae135d36ef3956"},
{file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:eb6e651000a19c96f452c85132811d25e9264d836951022d6e81df2fff38337d"},
{file = "wrapt-1.16.0-cp311-cp311-win32.whl", hash = "sha256:66027d667efe95cc4fa945af59f92c5a02c6f5bb6012bff9e60542c74c75c362"},
{file = "wrapt-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:aefbc4cb0a54f91af643660a0a150ce2c090d3652cf4052a5397fb2de549cd89"},
{file = "wrapt-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5eb404d89131ec9b4f748fa5cfb5346802e5ee8836f57d516576e61f304f3b7b"},
{file = "wrapt-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9090c9e676d5236a6948330e83cb89969f433b1943a558968f659ead07cb3b36"},
{file = "wrapt-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94265b00870aa407bd0cbcfd536f17ecde43b94fb8d228560a1e9d3041462d73"},
{file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2058f813d4f2b5e3a9eb2eb3faf8f1d99b81c3e51aeda4b168406443e8ba809"},
{file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98b5e1f498a8ca1858a1cdbffb023bfd954da4e3fa2c0cb5853d40014557248b"},
{file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:14d7dc606219cdd7405133c713f2c218d4252f2a469003f8c46bb92d5d095d81"},
{file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:49aac49dc4782cb04f58986e81ea0b4768e4ff197b57324dcbd7699c5dfb40b9"},
{file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:418abb18146475c310d7a6dc71143d6f7adec5b004ac9ce08dc7a34e2babdc5c"},
{file = "wrapt-1.16.0-cp312-cp312-win32.whl", hash = "sha256:685f568fa5e627e93f3b52fda002c7ed2fa1800b50ce51f6ed1d572d8ab3e7fc"},
{file = "wrapt-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:dcdba5c86e368442528f7060039eda390cc4091bfd1dca41e8046af7c910dda8"},
{file = "wrapt-1.16.0-py3-none-any.whl", hash = "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1"},
{file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"},
] ]

View File

@@ -10,7 +10,6 @@ from PyInstaller.utils.hooks import collect_data_files
datas = [ datas = [
* collect_data_files("kleinanzeigen_bot"), # embeds *.yaml files * collect_data_files("kleinanzeigen_bot"), # embeds *.yaml files
* collect_data_files("selenium_stealth"), # embeds *.js files
# required to get version info via 'importlib.metadata.version(__package__)' # required to get version info via 'importlib.metadata.version(__package__)'
# but we use https://backend.pdm-project.org/metadata/#writing-dynamic-version-to-file # but we use https://backend.pdm-project.org/metadata/#writing-dynamic-version-to-file

View File

@@ -32,14 +32,14 @@ classifiers = [ # https://pypi.org/classifiers/
] ]
requires-python = ">=3.10,<3.13" # <3.12 required for pyinstaller requires-python = ">=3.10,<3.13" # <3.12 required for pyinstaller
dependencies = [ dependencies = [
"colorama~=0.4", "colorama",
"coloredlogs~=15.0", "coloredlogs",
"overrides~=7.4", "nodriver",
"ruamel.yaml~=0.18", "overrides",
"pywin32==306; sys_platform == 'win32'", "ruamel.yaml",
"selenium~=4.18", "psutil",
"selenium_stealth~=1.0", "pywin32; sys_platform == 'win32'",
"wcmatch~=8.5", "wcmatch",
] ]
[project.urls] [project.urls]
@@ -60,25 +60,28 @@ write_template = "__version__ = '{}'\n"
[tool.pdm.dev-dependencies] [tool.pdm.dev-dependencies]
dev = [ dev = [
"autopep8~=2.0", "autopep8",
"bandit~=1.7", "bandit",
"toml", # required by bandit "toml", # required by bandit
"tomli", # required by bandit "tomli", # required by bandit
"pydantic~=2.6",
"pytest~=8.1", "pytest",
"pyinstaller~=6.4", "flaky", # used by pytest
"psutil",
"pylint~=3.1", "pyinstaller",
"mypy~=1.8",
"pylint",
"mypy",
"pyright",
] ]
[tool.pdm.scripts] # https://pdm-project.org/latest/usage/scripts/ [tool.pdm.scripts] # https://pdm-project.org/latest/usage/scripts/
app = "python -m kleinanzeigen_bot" app = "python -m kleinanzeigen_bot"
compile.cmd = "python -O -m PyInstaller pyinstaller.spec --clean" compile.cmd = "python -O -m PyInstaller pyinstaller.spec --clean"
compile.env = {PYTHONHASHSEED = "1", SOURCE_DATE_EPOCH = "0"} # https://pyinstaller.org/en/stable/advanced-topics.html#creating-a-reproducible-build compile.env = {PYTHONHASHSEED = "1", SOURCE_DATE_EPOCH = "0"} # https://pyinstaller.org/en/stable/advanced-topics.html#creating-a-reproducible-build
format = "autopep8 --recursive --in-place kleinanzeigen_bot tests --verbose" format = "autopep8 --recursive --in-place src tests --verbose"
lint = {shell = "pylint -v src tests && autopep8 -v --exit-code --recursive --diff src tests && echo No issues found."} lint = {shell = "pylint -v src tests && autopep8 -v --exit-code --recursive --diff src tests && mypy" }
scan = "bandit -c pyproject.toml -r kleinanzeigen_bot" scan = "bandit -c pyproject.toml -r src"
test = "python -m pytest --capture=tee-sys -v" test = "python -m pytest --capture=tee-sys -v"
utest = "python -m pytest --capture=tee-sys -v -m 'not itest'" utest = "python -m pytest --capture=tee-sys -v -m 'not itest'"
itest = "python -m pytest --capture=tee-sys -v -m 'itest'" itest = "python -m pytest --capture=tee-sys -v -m 'itest'"
@@ -117,6 +120,7 @@ aggressive = 3
# https://mypy.readthedocs.io/en/stable/config_file.html # https://mypy.readthedocs.io/en/stable/config_file.html
#mypy_path = "$MYPY_CONFIG_FILE_DIR/tests/stubs" #mypy_path = "$MYPY_CONFIG_FILE_DIR/tests/stubs"
python_version = "3.10" python_version = "3.10"
files = "src,tests"
strict = true strict = true
disallow_untyped_calls = false disallow_untyped_calls = false
disallow_untyped_defs = true disallow_untyped_defs = true
@@ -127,6 +131,18 @@ warn_unused_ignores = true
verbosity = 0 verbosity = 0
#####################
# pyright
# https://github.com/microsoft/pyright/
#####################
[tool.pyright]
# https://microsoft.github.io/pyright/#/configuration?id=main-configuration-options
include = ["src", "tests"]
defineConstant = { DEBUG = false }
pythonVersion = "3.10"
typeCheckingMode = "standard"
##################### #####################
# pylint # pylint
# https://pypi.org/project/pylint/ # https://pypi.org/project/pylint/
@@ -190,6 +206,7 @@ disable= [
"multiple-imports", "multiple-imports",
"multiple-statements", "multiple-statements",
"no-self-use", "no-self-use",
"no-member", # pylint cannot find async methods from super class
"too-few-public-methods" "too-few-public-methods"
] ]

View File

@@ -3,24 +3,22 @@ SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
""" """
import atexit, copy, getopt, importlib.metadata, json, logging, os, re, signal, shutil, sys, textwrap, time, urllib import asyncio, atexit, copy, getopt, importlib.metadata, json, logging, os, re, signal, shutil, sys, textwrap, time
import urllib.parse as urllib_parse
import urllib.request as urllib_request
from collections.abc import Iterable from collections.abc import Iterable
from datetime import datetime from datetime import datetime
from logging.handlers import RotatingFileHandler from logging.handlers import RotatingFileHandler
from typing import Any, Final from typing import Any, Final
import certifi, colorama import colorama, nodriver
from overrides import overrides from overrides import overrides
from ruamel.yaml import YAML from ruamel.yaml import YAML
from selenium.common.exceptions import ElementClickInterceptedException, NoSuchElementException, TimeoutException, WebDriverException
from selenium.webdriver.common.by import By
from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.support import expected_conditions as EC
from wcmatch import glob from wcmatch import glob
from . import utils, resources, extract from . import utils, resources, extract
from .utils import abspath, apply_defaults, ensure, is_frozen, pause, pluralize, safe_get, parse_datetime from .utils import abspath, ainput, apply_defaults, ensure, is_frozen, pluralize, safe_get, parse_datetime
from .selenium_mixin import SeleniumMixin from .web_scraping_mixin import By, Element, Page, Is, WebScrapingMixin
from ._version import __version__ from ._version import __version__
# W0406: possibly a bug, see https://github.com/PyCQA/pylint/issues/3933 # W0406: possibly a bug, see https://github.com/PyCQA/pylint/issues/3933
@@ -32,14 +30,10 @@ LOG.setLevel(logging.INFO)
colorama.init() colorama.init()
class KleinanzeigenBot(SeleniumMixin): class KleinanzeigenBot(WebScrapingMixin):
def __init__(self) -> None: def __init__(self) -> None:
# workaround for https://github.com/Second-Hand-Friends/kleinanzeigen-bot/issues/207
# see https://github.com/pyinstaller/pyinstaller/issues/7229#issuecomment-1309383026
os.environ["SSL_CERT_FILE"] = certifi.where()
super().__init__() super().__init__()
self.root_url = "https://www.kleinanzeigen.de" self.root_url = "https://www.kleinanzeigen.de"
@@ -61,15 +55,14 @@ class KleinanzeigenBot(SeleniumMixin):
def __del__(self) -> None: def __del__(self) -> None:
if self.file_log: if self.file_log:
LOG_ROOT.removeHandler(self.file_log) LOG_ROOT.removeHandler(self.file_log)
if self.webdriver: self.close_browser_session()
self.webdriver.quit()
self.webdriver = None
def get_version(self) -> str: def get_version(self) -> str:
return __version__ return __version__
def run(self, args:list[str]) -> None: async def run(self, args:list[str]) -> None:
self.parse_args(args) self.parse_args(args)
try:
match self.command: match self.command:
case "help": case "help":
self.show_help() self.show_help()
@@ -91,9 +84,9 @@ class KleinanzeigenBot(SeleniumMixin):
self.ads_selector = 'due' self.ads_selector = 'due'
if ads := self.load_ads(): if ads := self.load_ads():
self.create_webdriver_session() await self.create_browser_session()
self.login() await self.login()
self.publish_ads(ads) await self.publish_ads(ads)
else: else:
LOG.info("############################################") LOG.info("############################################")
LOG.info("DONE: No new/outdated ads found.") LOG.info("DONE: No new/outdated ads found.")
@@ -102,9 +95,9 @@ class KleinanzeigenBot(SeleniumMixin):
self.configure_file_logging() self.configure_file_logging()
self.load_config() self.load_config()
if ads := self.load_ads(): if ads := self.load_ads():
self.create_webdriver_session() await self.create_browser_session()
self.login() await self.login()
self.delete_ads(ads) await self.delete_ads(ads)
else: else:
LOG.info("############################################") LOG.info("############################################")
LOG.info("DONE: No ads to delete found.") LOG.info("DONE: No ads to delete found.")
@@ -115,15 +108,16 @@ class KleinanzeigenBot(SeleniumMixin):
if not (self.ads_selector in {'all', 'new'} or re.compile(r'\d+[,\d+]*').search(self.ads_selector)): if not (self.ads_selector in {'all', 'new'} or re.compile(r'\d+[,\d+]*').search(self.ads_selector)):
LOG.warning('You provided no ads selector. Defaulting to "new".') LOG.warning('You provided no ads selector. Defaulting to "new".')
self.ads_selector = 'new' self.ads_selector = 'new'
# start session
self.load_config() self.load_config()
self.create_webdriver_session() await self.create_browser_session()
self.login() await self.login()
self.start_download_routine() # call correct version of download await self.download_ads()
case _: case _:
LOG.error("Unknown command: %s", self.command) LOG.error("Unknown command: %s", self.command)
sys.exit(2) sys.exit(2)
finally:
self.close_browser_session()
def show_help(self) -> None: def show_help(self) -> None:
if is_frozen(): if is_frozen():
@@ -200,6 +194,7 @@ class KleinanzeigenBot(SeleniumMixin):
self.delete_old_ads = False self.delete_old_ads = False
case "-v" | "--verbose": case "-v" | "--verbose":
LOG.setLevel(logging.DEBUG) LOG.setLevel(logging.DEBUG)
logging.getLogger("nodriver").setLevel(logging.INFO)
match len(arguments): match len(arguments):
case 0: case 0:
@@ -290,6 +285,7 @@ class KleinanzeigenBot(SeleniumMixin):
continue continue
ad_cfg["description"] = descr_prefix + (ad_cfg["description"] or "") + descr_suffix ad_cfg["description"] = descr_prefix + (ad_cfg["description"] or "") + descr_suffix
ad_cfg["description"] = ad_cfg["description"].replace("@", "(at)")
ensure(len(ad_cfg["description"]) <= 4000, f"Length of ad description including prefix and suffix exceeds 4000 chars. @ [{ad_file}]") ensure(len(ad_cfg["description"]) <= 4000, f"Length of ad description including prefix and suffix exceeds 4000 chars. @ [{ad_file}]")
# pylint: disable=cell-var-from-loop # pylint: disable=cell-var-from-loop
@@ -311,6 +307,7 @@ class KleinanzeigenBot(SeleniumMixin):
ensure(not safe_get(ad_cfg, "price"), f"-> [price] must not be specified for GIVE_AWAY ad @ [{ad_file}]") ensure(not safe_get(ad_cfg, "price"), f"-> [price] must not be specified for GIVE_AWAY ad @ [{ad_file}]")
elif ad_cfg["price_type"] == "FIXED": elif ad_cfg["price_type"] == "FIXED":
assert_has_value("price") assert_has_value("price")
assert_one_of("shipping_type", {"PICKUP", "SHIPPING", "NOT_APPLICABLE"}) assert_one_of("shipping_type", {"PICKUP", "SHIPPING", "NOT_APPLICABLE"})
assert_has_value("contact.name") assert_has_value("contact.name")
assert_has_value("republication_interval") assert_has_value("republication_interval")
@@ -326,9 +323,9 @@ class KleinanzeigenBot(SeleniumMixin):
if ad_cfg["images"]: if ad_cfg["images"]:
images = [] images = []
ad_dir = os.path.dirname(ad_file)
for image_pattern in ad_cfg["images"]: for image_pattern in ad_cfg["images"]:
pattern_images = set() pattern_images = set()
ad_dir = os.path.dirname(ad_file)
for image_file in glob.glob(image_pattern, root_dir = ad_dir, flags = glob.GLOBSTAR | glob.BRACE | glob.EXTGLOB): for image_file in glob.glob(image_pattern, root_dir = ad_dir, flags = glob.GLOBSTAR | glob.BRACE | glob.EXTGLOB):
_, image_file_ext = os.path.splitext(image_file) _, image_file_ext = os.path.splitext(image_file)
ensure(image_file_ext.lower() in {".gif", ".jpg", ".jpeg", ".png"}, f"Unsupported image file type [{image_file}]") ensure(image_file_ext.lower() in {".gif", ".jpg", ".jpeg", ".png"}, f"Unsupported image file type [{image_file}]")
@@ -376,133 +373,122 @@ class KleinanzeigenBot(SeleniumMixin):
self.browser_config.user_data_dir = abspath(self.config["browser"]["user_data_dir"], relative_to = self.config_file_path) self.browser_config.user_data_dir = abspath(self.config["browser"]["user_data_dir"], relative_to = self.config_file_path)
self.browser_config.profile_name = self.config["browser"]["profile_name"] self.browser_config.profile_name = self.config["browser"]["profile_name"]
def login(self) -> None: async def login(self) -> None:
LOG.info("Checking if already logged in") LOG.info("Checking if already logged in...")
self.web_open(f"{self.root_url}") await self.web_open(f"{self.root_url}")
if self.is_logged_in(): if await self.is_logged_in():
LOG.info("Already logged in as [%s]. Skipping login.", self.config["login"]["username"]) LOG.info("Already logged in as [%s]. Skipping login.", self.config["login"]["username"])
return return
LOG.info("Opening login page...")
await self.web_open(f"{self.root_url}/m-einloggen.html?targetUrl=/")
try:
await self.web_find(By.CSS_SELECTOR, "iframe[src*='captcha-delivery.com']", timeout = 2)
LOG.warning("############################################")
LOG.warning("# Captcha present! Please solve the captcha.")
LOG.warning("############################################")
await self.web_await(lambda: self.web_find(By.ID, "login-form") is not None, timeout = 5 * 60)
except TimeoutError:
pass
LOG.info("Logging in as [%s]...", self.config["login"]["username"]) LOG.info("Logging in as [%s]...", self.config["login"]["username"])
self.web_open(f"{self.root_url}/m-einloggen.html?targetUrl=/") await self.web_input(By.ID, "email", self.config["login"]["username"])
await self.web_input(By.ID, "password", self.config["login"]["password"])
# close redesign banner await self.web_click(By.CSS_SELECTOR, "form#login-form button[type='submit']")
try:
self.web_click(By.XPATH, '//*[@id="pre-launch-comms-interstitial-frontend"]//button[.//*[text()[contains(.,"nicht mehr anzeigen")]]]')
except NoSuchElementException:
pass
# accept privacy banner
try:
self.web_click(By.ID, "gdpr-banner-accept")
except NoSuchElementException:
pass
self.web_input(By.ID, "login-email", self.config["login"]["username"])
self.web_input(By.ID, "login-password", self.config["login"]["password"])
self.handle_captcha_if_present("login-recaptcha", "but DON'T click 'Einloggen'.")
self.web_click(By.ID, "login-submit")
try: try:
self.web_find(By.ID, "new-device-login", 4) await self.web_find(By.TEXT, "Wir haben dir gerade einen 6-stelligen Code für die Telefonnummer", timeout = 4)
LOG.warning("############################################") LOG.warning("############################################")
LOG.warning("# Device verification message detected. Use the 'Login bestätigen' URL from the mentioned e-mail into the same browser tab.") LOG.warning("# Device verification message detected. Please handle it.")
LOG.warning("############################################") LOG.warning("############################################")
input("Press ENTER when done...") await ainput("Press ENTER when done...")
except NoSuchElementException: except TimeoutError:
pass pass
def is_logged_in(self) -> bool:
try: try:
user_email_elem = self.web_find(By.ID, "user-email") LOG.info("Handling GDPR disclaimer...")
email_text = user_email_elem.text await self.web_find(By.ID, "gdpr-banner-accept", timeout = 10)
if f"angemeldet als: {self.config['login']['username']}" == email_text: await self.web_click(By.ID, "gdpr-banner-cmp-button")
await self.web_click(By.CSS_SELECTOR, "#ConsentManagementPage button.Button-secondary", timeout = 10)
except TimeoutError:
pass
async def is_logged_in(self) -> bool:
try:
email = await self.web_text(By.ID, "user-email")
if f"angemeldet als: {self.config['login']['username']}" == email:
return True return True
except NoSuchElementException: except TimeoutError:
return False return False
return False return False
def handle_captcha_if_present(self, captcha_element_id:str, msg:str) -> None: async def delete_ads(self, ad_cfgs:list[tuple[str, dict[str, Any], dict[str, Any]]]) -> None:
try:
self.web_click(By.XPATH, f"//*[@id='{captcha_element_id}']")
except NoSuchElementException:
return
LOG.warning("############################################")
LOG.warning("# Captcha present! Please solve and close the captcha, %s", msg)
LOG.warning("############################################")
self.webdriver.switch_to.frame(self.web_find(By.CSS_SELECTOR, f"#{captcha_element_id} iframe"))
self.web_await(lambda _: self.webdriver.find_element(By.ID, "recaptcha-anchor").get_attribute("aria-checked") == "true", timeout = 5 * 60)
self.webdriver.switch_to.default_content()
def delete_ads(self, ad_cfgs:list[tuple[str, dict[str, Any], dict[str, Any]]]) -> None:
count = 0 count = 0
for (ad_file, ad_cfg, _) in ad_cfgs: for (ad_file, ad_cfg, _) in ad_cfgs:
count += 1 count += 1
LOG.info("Processing %s/%s: '%s' from [%s]...", count, len(ad_cfgs), ad_cfg["title"], ad_file) LOG.info("Processing %s/%s: '%s' from [%s]...", count, len(ad_cfgs), ad_cfg["title"], ad_file)
self.delete_ad(ad_cfg) await self.delete_ad(ad_cfg)
pause(2000, 4000) await self.web_sleep()
LOG.info("############################################") LOG.info("############################################")
LOG.info("DONE: Deleting %s", pluralize("ad", count)) LOG.info("DONE: Deleting %s", pluralize("ad", count))
LOG.info("############################################") LOG.info("############################################")
def delete_ad(self, ad_cfg: dict[str, Any]) -> bool: async def delete_ad(self, ad_cfg: dict[str, Any]) -> bool:
LOG.info("Deleting ad '%s' if already present...", ad_cfg["title"]) LOG.info("Deleting ad '%s' if already present...", ad_cfg["title"])
self.web_open(f"{self.root_url}/m-meine-anzeigen.html") await self.web_open(f"{self.root_url}/m-meine-anzeigen.html")
csrf_token_elem = self.web_find(By.XPATH, "//meta[@name='_csrf']") csrf_token_elem = await self.web_find(By.CSS_SELECTOR, "meta[name=_csrf]")
csrf_token = csrf_token_elem.get_attribute("content") csrf_token = csrf_token_elem.attrs["content"]
if csrf_token is None: if csrf_token is None:
raise AssertionError("Expected CSRF Token not found in HTML content!") raise AssertionError("Expected CSRF Token not found in HTML content!")
if self.delete_ads_by_title: if self.delete_ads_by_title:
published_ads = json.loads(self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT")["content"])["ads"] published_ads = json.loads((await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT"))["content"])["ads"]
for published_ad in published_ads: for published_ad in published_ads:
published_ad_id = int(published_ad.get("id", -1)) published_ad_id = int(published_ad.get("id", -1))
published_ad_title = published_ad.get("title", "") published_ad_title = published_ad.get("title", "")
if ad_cfg["id"] == published_ad_id or ad_cfg["title"] == published_ad_title: if ad_cfg["id"] == published_ad_id or ad_cfg["title"] == published_ad_title:
LOG.info(" -> deleting %s '%s'...", published_ad_id, published_ad_title) LOG.info(" -> deleting %s '%s'...", published_ad_id, published_ad_title)
self.web_request( await self.web_request(
url = f"{self.root_url}/m-anzeigen-loeschen.json?ids={published_ad_id}", url = f"{self.root_url}/m-anzeigen-loeschen.json?ids={published_ad_id}",
method = "POST", method = "POST",
headers = {"x-csrf-token": csrf_token} headers = {"x-csrf-token": csrf_token}
) )
elif ad_cfg["id"]: elif ad_cfg["id"]:
self.web_request( await self.web_request(
url = f"{self.root_url}/m-anzeigen-loeschen.json?ids={ad_cfg['id']}", url = f"{self.root_url}/m-anzeigen-loeschen.json?ids={ad_cfg['id']}",
method = "POST", method = "POST",
headers = {"x-csrf-token": csrf_token}, headers = {"x-csrf-token": csrf_token},
valid_response_codes = [200, 404] valid_response_codes = [200, 404]
) )
pause(1500, 3000) await self.web_sleep()
ad_cfg["id"] = None ad_cfg["id"] = None
return True return True
def publish_ads(self, ad_cfgs:list[tuple[str, dict[str, Any], dict[str, Any]]]) -> None: async def publish_ads(self, ad_cfgs:list[tuple[str, dict[str, Any], dict[str, Any]]]) -> None:
count = 0 count = 0
for (ad_file, ad_cfg, ad_cfg_orig) in ad_cfgs: for (ad_file, ad_cfg, ad_cfg_orig) in ad_cfgs:
count += 1 count += 1
LOG.info("Processing %s/%s: '%s' from [%s]...", count, len(ad_cfgs), ad_cfg["title"], ad_file) LOG.info("Processing %s/%s: '%s' from [%s]...", count, len(ad_cfgs), ad_cfg["title"], ad_file)
self.publish_ad(ad_file, ad_cfg, ad_cfg_orig) await self.publish_ad(ad_file, ad_cfg, ad_cfg_orig)
self.web_await(lambda _: self.webdriver.find_element(By.ID, "checking-done").is_displayed(), timeout = 5 * 60) await self.web_await(lambda: self.web_check(By.ID, "checking-done", Is.DISPLAYED), timeout = 5 * 60)
LOG.info("############################################") LOG.info("############################################")
LOG.info("DONE: (Re-)published %s", pluralize("ad", count)) LOG.info("DONE: (Re-)published %s", pluralize("ad", count))
LOG.info("############################################") LOG.info("############################################")
def publish_ad(self, ad_file:str, ad_cfg: dict[str, Any], ad_cfg_orig: dict[str, Any]) -> None: async def publish_ad(self, ad_file:str, ad_cfg: dict[str, Any], ad_cfg_orig: dict[str, Any]) -> None:
self.assert_free_ad_limit_not_reached() await self.assert_free_ad_limit_not_reached()
if self.delete_old_ads: if self.delete_old_ads:
self.delete_ad(ad_cfg) await self.delete_ad(ad_cfg)
LOG.info("Publishing ad '%s'...", ad_cfg["title"]) LOG.info("Publishing ad '%s'...", ad_cfg["title"])
@@ -510,45 +496,44 @@ class KleinanzeigenBot(SeleniumMixin):
LOG.debug(" -> effective ad meta:") LOG.debug(" -> effective ad meta:")
YAML().dump(ad_cfg, sys.stdout) YAML().dump(ad_cfg, sys.stdout)
self.web_open(f"{self.root_url}/p-anzeige-aufgeben-schritt2.html") await self.web_open(f"{self.root_url}/p-anzeige-aufgeben-schritt2.html")
if ad_cfg["type"] == "WANTED": if ad_cfg["type"] == "WANTED":
self.web_click(By.ID, "adType2") await self.web_click(By.ID, "adType2")
############################# #############################
# set title # set title
############################# #############################
self.web_input(By.ID, "postad-title", ad_cfg["title"]) await self.web_input(By.ID, "postad-title", ad_cfg["title"])
############################# #############################
# set category # set category
############################# #############################
self.__set_category(ad_file, ad_cfg) await self.__set_category(ad_file, ad_cfg)
############################# #############################
# set shipping type/options/costs # set shipping type/options/costs
############################# #############################
if ad_cfg["shipping_type"] == "PICKUP": if ad_cfg["shipping_type"] == "PICKUP":
try: try:
self.web_click(By.XPATH, '//*[contains(@class, "ShippingPickupSelector")]//label[text()[contains(.,"Nur Abholung")]]/input[@type="radio"]') await self.web_click(By.XPATH,
except NoSuchElementException as ex: '//*[contains(@class, "ShippingPickupSelector")]//label[text()[contains(.,"Nur Abholung")]]/input[@type="radio"]')
except TimeoutError as ex:
LOG.debug(ex, exc_info = True) LOG.debug(ex, exc_info = True)
elif ad_cfg["shipping_options"]: elif ad_cfg["shipping_options"]:
self.web_click(By.XPATH, '//*[contains(@class, "jsx-2623555103")]') await self.web_click(By.CSS_SELECTOR, '[class*="jsx-2623555103"]')
self.web_click(By.XPATH, '//*[contains(@class, "CarrierSelectionModal--Button")]') await self.web_click(By.CSS_SELECTOR, '[class*="CarrierSelectionModal--Button"]')
self.__set_shipping_options(ad_cfg) await self.__set_shipping_options(ad_cfg)
else: else:
try: try:
self.web_click(By.XPATH, '//*[contains(@class, "jsx-2623555103")]') await self.web_click(By.CSS_SELECTOR, '[class*="jsx-2623555103"]')
self.web_click(By.XPATH, '//*[contains(@class, "CarrierSelectionModal--Button")]') await self.web_click(By.CSS_SELECTOR, '[class*="CarrierSelectionModal--Button"]')
self.web_click(By.XPATH, '//*[contains(@class, "CarrierOption--Main")]') await self.web_click(By.CSS_SELECTOR, '[class*="CarrierOption--Main"]')
if ad_cfg["shipping_costs"]: if ad_cfg["shipping_costs"]:
self.web_input(By.XPATH, await self.web_input(By.CSS_SELECTOR, '.IndividualShippingInput input[type="text"]', str.replace(ad_cfg["shipping_costs"], ".", ",")
'//*[contains(@class, "IndividualShippingInput")]//input[@type="text"]',
str.replace(ad_cfg["shipping_costs"], ".", ",")
) )
self.web_click(By.XPATH, '//*[contains(@class, "ModalDialog--Actions")]//button[.//*[text()[contains(.,"Fertig")]]]') await self.web_click(By.XPATH, '//*[contains(@class, "ModalDialog--Actions")]//button[.//*[text()[contains(.,"Fertig")]]]')
except NoSuchElementException as ex: except TimeoutError as ex:
LOG.debug(ex, exc_info = True) LOG.debug(ex, exc_info = True)
############################# #############################
@@ -557,11 +542,11 @@ class KleinanzeigenBot(SeleniumMixin):
price_type = ad_cfg["price_type"] price_type = ad_cfg["price_type"]
if price_type != "NOT_APPLICABLE": if price_type != "NOT_APPLICABLE":
try: try:
self.web_select(By.XPATH, "//select[@id='price-type-react' or @id='micro-frontend-price-type' or @id='priceType']", price_type) await self.web_select(By.CSS_SELECTOR, "select#price-type-react, select#micro-frontend-price-type, select#priceType", price_type)
except NoSuchElementException: except TimeoutError:
pass pass
if safe_get(ad_cfg, "price"): if safe_get(ad_cfg, "price"):
self.web_input(By.XPATH, "//input[@id='post-ad-frontend-price' or @id='micro-frontend-price' or @id='pstad-price']", ad_cfg["price"]) await self.web_input(By.CSS_SELECTOR, "input#post-ad-frontend-price, input#micro-frontend-price, input#pstad-price", ad_cfg["price"])
############################# #############################
# set sell_directly # set sell_directly
@@ -570,81 +555,80 @@ class KleinanzeigenBot(SeleniumMixin):
try: try:
if ad_cfg["shipping_type"] == "SHIPPING": if ad_cfg["shipping_type"] == "SHIPPING":
if sell_directly and ad_cfg["shipping_options"] and price_type in {"FIXED", "NEGOTIABLE"}: if sell_directly and ad_cfg["shipping_options"] and price_type in {"FIXED", "NEGOTIABLE"}:
if not self.webdriver.find_element(By.ID, "radio-buy-now-yes").is_selected(): if not await self.web_check(By.ID, "radio-buy-now-yes", Is.SELECTED):
self.web_click(By.XPATH, '//*[contains(@id, "radio-buy-now-yes")]') await self.web_click(By.ID, 'radio-buy-now-yes')
elif not self.webdriver.find_element(By.ID, "radio-buy-now-no").is_selected(): elif not await self.web_check(By.ID, "radio-buy-now-no", Is.SELECTED):
self.web_click(By.XPATH, '//*[contains(@id, "radio-buy-now-no")]') await self.web_click(By.ID, 'radio-buy-now-no')
except NoSuchElementException as ex: except TimeoutError as ex:
LOG.debug(ex, exc_info = True) LOG.debug(ex, exc_info = True)
############################# #############################
# set description # set description
############################# #############################
self.web_execute("document.querySelector('#pstad-descrptn').value = `" + ad_cfg["description"].replace("`", "'") + "`") await self.web_execute("document.querySelector('#pstad-descrptn').value = `" + ad_cfg["description"].replace("`", "'") + "`")
############################# #############################
# set contact zipcode # set contact zipcode
############################# #############################
if ad_cfg["contact"]["zipcode"]: if ad_cfg["contact"]["zipcode"]:
self.web_input(By.ID, "pstad-zip", ad_cfg["contact"]["zipcode"]) await self.web_input(By.ID, "pstad-zip", ad_cfg["contact"]["zipcode"])
############################# #############################
# set contact street # set contact street
############################# #############################
if ad_cfg["contact"]["street"]: if ad_cfg["contact"]["street"]:
try: try:
if not self.webdriver.find_element(By.ID, "pstad-street").is_enabled(): if await self.web_check(By.ID, "pstad-street", Is.DISABLED):
self.webdriver.find_element(By.ID, "addressVisibility").click() await self.web_click(By.ID, "addressVisibility")
pause(2000) await self.web_sleep()
except NoSuchElementException: except TimeoutError:
# ignore # ignore
pass pass
self.web_input(By.ID, "pstad-street", ad_cfg["contact"]["street"]) await self.web_input(By.ID, "pstad-street", ad_cfg["contact"]["street"])
############################# #############################
# set contact name # set contact name
############################# #############################
if ad_cfg["contact"]["name"] and not self.webdriver.find_element(By.ID, "postad-contactname").get_attribute("readonly"): if ad_cfg["contact"]["name"] and not await self.web_check(By.ID, "postad-contactname", Is.READONLY):
self.web_input(By.ID, "postad-contactname", ad_cfg["contact"]["name"]) await self.web_input(By.ID, "postad-contactname", ad_cfg["contact"]["name"])
############################# #############################
# set contact phone # set contact phone
############################# #############################
if ad_cfg["contact"]["phone"]: if ad_cfg["contact"]["phone"]:
if self.webdriver.find_element(By.ID, "postad-phonenumber").is_displayed(): if await self.web_check(By.ID, "postad-phonenumber", Is.DISPLAYED):
try: try:
if not self.webdriver.find_element(By.ID, "postad-phonenumber").is_enabled(): if await self.web_check(By.ID, "postad-phonenumber", Is.DISABLED):
self.webdriver.find_element(By.ID, "phoneNumberVisibility").click() await self.web_click(By.ID, "phoneNumberVisibility")
pause(2000) await self.web_sleep()
except NoSuchElementException: except TimeoutError:
# ignore # ignore
pass pass
self.web_input(By.ID, "postad-phonenumber", ad_cfg["contact"]["phone"]) await self.web_input(By.ID, "postad-phonenumber", ad_cfg["contact"]["phone"])
############################# #############################
# upload images # upload images
############################# #############################
self.__upload_images(ad_cfg) await self.__upload_images(ad_cfg)
############################# #############################
# submit # submit
############################# #############################
self.handle_captcha_if_present("postAd-recaptcha", "but DON'T click 'Anzeige aufgeben'.")
try: try:
self.web_click(By.ID, "pstad-submit") await self.web_click(By.ID, "pstad-submit")
except NoSuchElementException: except TimeoutError:
# https://github.com/Second-Hand-Friends/kleinanzeigen-bot/issues/40 # https://github.com/Second-Hand-Friends/kleinanzeigen-bot/issues/40
self.web_click(By.XPATH, "//fieldset[@id='postad-publish']//*[contains(text(),'Anzeige aufgeben')]") await self.web_click(By.XPATH, "//fieldset[@id='postad-publish']//*[contains(text(),'Anzeige aufgeben')]")
self.web_click(By.ID, "imprint-guidance-submit") await self.web_click(By.ID, "imprint-guidance-submit")
self.web_await(EC.url_contains("p-anzeige-aufgeben-bestaetigung.html?adId="), 20) await self.web_await(lambda: "p-anzeige-aufgeben-bestaetigung.html?adId=" in self.page.url, timeout = 20)
ad_cfg_orig["updated_on"] = datetime.utcnow().isoformat() ad_cfg_orig["updated_on"] = datetime.utcnow().isoformat()
if not ad_cfg["created_on"] and not ad_cfg["id"]: if not ad_cfg["created_on"] and not ad_cfg["id"]:
ad_cfg_orig["created_on"] = ad_cfg_orig["updated_on"] ad_cfg_orig["created_on"] = ad_cfg_orig["updated_on"]
# extract the ad id from the URL's query parameter # extract the ad id from the URL's query parameter
current_url_query_params = urllib.parse.parse_qs(urllib.parse.urlparse(self.webdriver.current_url).query) current_url_query_params = urllib_parse.parse_qs(urllib_parse.urlparse(self.page.url).query)
ad_id = int(current_url_query_params.get("adId", [])[0]) ad_id = int(current_url_query_params.get("adId", [])[0])
ad_cfg_orig["id"] = ad_id ad_cfg_orig["id"] = ad_id
@@ -652,24 +636,25 @@ class KleinanzeigenBot(SeleniumMixin):
utils.save_dict(ad_file, ad_cfg_orig) utils.save_dict(ad_file, ad_cfg_orig)
def __set_category(self, ad_file:str, ad_cfg: dict[str, Any]) -> None: async def __set_category(self, ad_file:str, ad_cfg: dict[str, Any]) -> None:
# click on something to trigger automatic category detection # click on something to trigger automatic category detection
self.web_click(By.ID, "pstad-descrptn") await self.web_click(By.ID, "pstad-descrptn")
try:
self.web_find(By.XPATH, "//*[@id='postad-category-path'][text()]")
is_category_auto_selected = True
except NoSuchElementException:
is_category_auto_selected = False is_category_auto_selected = False
try:
if await self.web_text(By.ID, "postad-category-path"):
is_category_auto_selected = True
except TimeoutError:
pass
if ad_cfg["category"]: if ad_cfg["category"]:
utils.pause(2000) # workaround for https://github.com/Second-Hand-Friends/kleinanzeigen-bot/issues/39 await self.web_sleep() # workaround for https://github.com/Second-Hand-Friends/kleinanzeigen-bot/issues/39
self.web_click(By.ID, "pstad-lnk-chngeCtgry") await self.web_click(By.ID, "pstad-lnk-chngeCtgry")
self.web_find(By.ID, "postad-step1-sbmt") await self.web_find(By.ID, "postad-step1-sbmt")
category_url = f"{self.root_url}/p-kategorie-aendern.html#?path={ad_cfg['category']}" category_url = f"{self.root_url}/p-kategorie-aendern.html#?path={ad_cfg['category']}"
self.web_open(category_url) await self.web_open(category_url)
self.web_click(By.XPATH, "//*[@id='postad-step1-sbmt']/button") await self.web_click(By.XPATH, "//*[@id='postad-step1-sbmt']/button")
else: else:
ensure(is_category_auto_selected, f"No category specified in [{ad_file}] and automatic category detection failed") ensure(is_category_auto_selected, f"No category specified in [{ad_file}] and automatic category detection failed")
@@ -680,29 +665,27 @@ class KleinanzeigenBot(SeleniumMixin):
try: try:
# if the <select> element exists but is inside an invisible container, make the container visible # if the <select> element exists but is inside an invisible container, make the container visible
select_container_xpath = f"//div[@class='l-row' and descendant::select[@id='{special_attribute_key}']]" select_container_xpath = f"//div[@class='l-row' and descendant::select[@id='{special_attribute_key}']]"
select_container = self.web_find(By.XPATH, select_container_xpath) if not await self.web_check(By.XPATH, select_container_xpath, Is.DISPLAYED):
if not select_container.is_displayed(): await (await self.web_find(By.XPATH, select_container_xpath)).apply("elem => elem.singleNodeValue.style.display = 'block'")
self.web_execute(f"document.evaluate(\"{select_container_xpath}\"," + except TimeoutError:
" document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.style.display = 'block';")
except BaseException:
pass # nosec pass # nosec
try: try:
self.web_select(By.XPATH, f"//select[@id='{special_attribute_key}']", special_attribute_value) await self.web_select(By.XPATH, f"//select[@id='{special_attribute_key}']", special_attribute_value)
except WebDriverException: except TimeoutError:
LOG.debug("Attribute field '%s' is not of kind dropdown, trying to input as plain text...", special_attribute_key) LOG.debug("Attribute field '%s' is not of kind dropdown, trying to input as plain text...", special_attribute_key)
try: try:
self.web_input(By.ID, special_attribute_key, special_attribute_value) await self.web_input(By.ID, special_attribute_key, special_attribute_value)
except WebDriverException: except TimeoutError:
LOG.debug("Attribute field '%s' is not of kind plain text, trying to input as radio button...", special_attribute_key) LOG.debug("Attribute field '%s' is not of kind plain text, trying to input as radio button...", special_attribute_key)
try: try:
self.web_click(By.XPATH, f"//*[@id='{special_attribute_key}']/option[@value='{special_attribute_value}']") await self.web_click(By.XPATH, f"//*[@id='{special_attribute_key}']/option[@value='{special_attribute_value}']")
except WebDriverException as ex: except TimeoutError as ex:
LOG.debug("Attribute field '%s' is not of kind radio button.", special_attribute_key) LOG.debug("Attribute field '%s' is not of kind radio button.", special_attribute_key)
raise NoSuchElementException(f"Failed to set special attribute [{special_attribute_key}]") from ex raise TimeoutError(f"Failed to set special attribute [{special_attribute_key}]") from ex
LOG.debug("Successfully set attribute field [%s] to [%s]...", special_attribute_key, special_attribute_value) LOG.debug("Successfully set attribute field [%s] to [%s]...", special_attribute_key, special_attribute_value)
def __set_shipping_options(self, ad_cfg: dict[str, Any]) -> None: async def __set_shipping_options(self, ad_cfg: dict[str, Any]) -> None:
try: try:
shipping_option_mapping = { shipping_option_mapping = {
"DHL_2": ("Klein", "Paket 2 kg"), "DHL_2": ("Klein", "Paket 2 kg"),
@@ -725,323 +708,98 @@ class KleinanzeigenBot(SeleniumMixin):
raise ValueError("You can only specify shipping options for one package size!") raise ValueError("You can only specify shipping options for one package size!")
shipping_size, = unique_shipping_sizes shipping_size, = unique_shipping_sizes
self.web_click(By.XPATH, f'//*[contains(@class, "SingleSelectionItem--Main")]//input[@type="radio" and @data-testid="{shipping_size}"]') await self.web_click(By.CSS_SELECTOR, f'.SingleSelectionItem--Main input[type=radio][data-testid="{shipping_size}"]')
for shipping_package in shipping_packages: for shipping_package in shipping_packages:
self.web_click( await self.web_click(
By.XPATH, By.XPATH,
'//*[contains(@class, "CarrierSelectionModal")]' '//*[contains(@class, "CarrierSelectionModal")]'
'//*[contains(@class, "CarrierOption")]' '//*[contains(@class, "CarrierOption")]'
f'//*[contains(@class, "CarrierOption--Main") and @data-testid="{shipping_package}"]' f'//*[contains(@class, "CarrierOption--Main") and @data-testid="{shipping_package}"]'
) )
self.web_click(By.XPATH, '//*[contains(@class, "ModalDialog--Actions")]//button[.//*[text()[contains(.,"Fertig")]]]') await self.web_click(By.XPATH, '//*[contains(@class, "ModalDialog--Actions")]//button[.//*[text()[contains(.,"Fertig")]]]')
except NoSuchElementException as ex: except TimeoutError as ex:
LOG.debug(ex, exc_info = True) LOG.debug(ex, exc_info = True)
def __upload_images(self, ad_cfg: dict[str, Any]) -> None: async def __upload_images(self, ad_cfg: dict[str, Any]) -> None:
LOG.info(" -> found %s", pluralize("image", ad_cfg["images"])) LOG.info(" -> found %s", pluralize("image", ad_cfg["images"]))
image_upload = self.web_find(By.XPATH, "//input[@type='file']") image_upload:Element = await self.web_find(By.CSS_SELECTOR, "input[type=file]")
def count_uploaded_images() -> int:
return len(self.webdriver.find_elements(By.CLASS_NAME, "imagebox-new-thumbnail"))
for image in ad_cfg["images"]: for image in ad_cfg["images"]:
LOG.info(" -> uploading image [%s]", image) LOG.info(" -> uploading image [%s]", image)
previous_uploaded_images_count = count_uploaded_images() await image_upload.send_file(image)
attempt = 0 await self.web_sleep()
while attempt < 3 and previous_uploaded_images_count == count_uploaded_images():
image_upload.send_keys(image)
start_at = time.time()
while previous_uploaded_images_count == count_uploaded_images() and time.time() - start_at < 60:
print(".", end = "", flush = True)
time.sleep(1)
attempt += 1
print(flush = True)
ensure(previous_uploaded_images_count < count_uploaded_images(), f"Couldn't upload image [{image}] within 60 seconds and 3 attempts") async def assert_free_ad_limit_not_reached(self) -> None:
LOG.debug(" => uploaded image within %i seconds", time.time() - start_at)
pause(2000)
def assert_free_ad_limit_not_reached(self) -> None:
try: try:
self.web_find(By.XPATH, '/html/body/div[1]/form/fieldset[6]/div[1]/header') await self.web_find(By.XPATH, '/html/body/div[1]/form/fieldset[6]/div[1]/header', timeout = 2)
raise AssertionError(f"Cannot publish more ads. The monthly limit of free ads of account {self.config['login']['username']} is reached.") raise AssertionError(f"Cannot publish more ads. The monthly limit of free ads of account {self.config['login']['username']} is reached.")
except NoSuchElementException: except TimeoutError:
pass pass
@overrides async def download_ads(self) -> None:
def web_open(self, url:str, timeout:float = 15, reload_if_already_open:bool = False) -> None:
start_at = time.time()
super().web_open(url, timeout, reload_if_already_open)
pause(2000)
# reload the page until no fullscreen ad is displayed anymore
while True:
try:
self.web_find(By.XPATH, "/html/body/header[@id='site-header']", 2)
return
except NoSuchElementException as ex:
elapsed = time.time() - start_at
if elapsed < timeout:
super().web_open(url, timeout - elapsed, True)
else:
raise TimeoutException("Loading page failed, it still shows fullscreen ad.") from ex
def navigate_to_ad_page(self, id_:int | None = None, url:str | None = None) -> bool:
"""
Navigates to an ad page specified with an ad ID; or alternatively by a given URL.
:param id_: if provided (and no url given), the ID is used to search for the ad to navigate to
:param url: if given, this URL is used instead of an id to find the ad page
:return: whether the navigation to the ad page was successful
"""
if not (id_ or url):
raise UserWarning('This function needs either the "id_" or "url" parameter given!')
if url:
self.webdriver.get(url) # navigate to URL directly given
else:
# enter the ad ID into the search bar
self.web_input(By.XPATH, '//*[@id="site-search-query"]', str(id_))
# navigate to ad page and wait
submit_button = self.webdriver.find_element(By.XPATH, '//*[@id="site-search-submit"]')
self.web_await(EC.element_to_be_clickable(submit_button), 15)
try:
submit_button.click()
except ElementClickInterceptedException: # sometimes: special banner might pop up and intercept
LOG.warning('Waiting for unexpected element to close...')
pause(6000, 10000)
submit_button.click()
pause(1000, 2000)
# handle the case that invalid ad ID given
if self.webdriver.current_url.endswith('k0'):
LOG.error('There is no ad under the given ID.')
return False
try: # close (warning) popup, if given
self.webdriver.find_element(By.CSS_SELECTOR, '#vap-ovrly-secure')
LOG.warning('A popup appeared.')
close_button = self.webdriver.find_element(By.CLASS_NAME, 'mfp-close')
close_button.click()
time.sleep(1)
except NoSuchElementException:
print('(no popup)')
return True
def download_images_from_ad_page(self, directory:str, ad_id:int, logger:logging.Logger) -> list[str]:
"""
Downloads all images of an ad.
:param directory: the path of the directory created for this ad
:param ad_id: the ID of the ad to download the images from
:param logger: an initialized logger
:return: the relative paths for all downloaded images
"""
n_images:int
img_paths = []
try:
image_box = self.webdriver.find_element(By.CSS_SELECTOR, '.galleryimage-large')
# if gallery image box exists, proceed with image fetching
n_images = 1
# determine number of images (1 ... N)
next_button:WebElement
try: # check if multiple images given
# edge case: 'Virtueller Rundgang' div could be found by same CSS class
element_candidates = image_box.find_elements(By.CSS_SELECTOR, '.galleryimage--info')
image_counter = element_candidates[-1]
n_images = int(image_counter.text[2:])
logger.info('Found %d images.', n_images)
next_button = self.webdriver.find_element(By.CSS_SELECTOR, '.galleryimage--navigation--next')
except (NoSuchElementException, IndexError):
logger.info('Only one image found.')
# download all images from box
img_element = image_box.find_element(By.XPATH, './/div[1]/img')
img_fn_prefix = 'ad_' + str(ad_id) + '__img'
img_nr = 1
dl_counter = 0
while img_nr <= n_images: # scrolling + downloading
current_img_url = img_element.get_attribute('src') # URL of the image
if current_img_url is None:
continue
file_ending = current_img_url.split('.')[-1].lower()
img_path = directory + '/' + img_fn_prefix + str(img_nr) + '.' + file_ending
if current_img_url.startswith('https'): # verify https (for Bandit linter)
urllib.request.urlretrieve(current_img_url, img_path) # nosec B310
dl_counter += 1
img_paths.append(img_path.split('/')[-1])
# scroll to next image (if exists)
if img_nr < n_images:
try:
# click next button, wait, and reestablish reference
next_button.click()
self.web_await(lambda _: EC.staleness_of(img_element))
new_div = self.webdriver.find_element(By.CSS_SELECTOR, f'div.galleryimage-element:nth-child({img_nr + 1})')
img_element = new_div.find_element(By.XPATH, './/img')
except NoSuchElementException:
logger.error('NEXT button in image gallery somehow missing, abort image fetching.')
break
img_nr += 1
logger.info('Downloaded %d image(s).', dl_counter)
except NoSuchElementException: # some ads do not require images
logger.warning('No image area found. Continue without downloading images.')
return img_paths
def extract_ad_page_info(self, directory:str, id_:int) -> dict[str, Any]:
"""
Extracts all necessary information from an ad´s page.
:param directory: the path of the ad´s previously created directory
:param id_: the ad ID, already extracted by a calling function
:return: a dictionary with the keys as given in an ad YAML, and their respective values
"""
info:dict[str, Any] = {'active': True}
# extract basic info
info['type'] = 'OFFER' if 's-anzeige' in self.webdriver.current_url else 'WANTED'
title:str = self.webdriver.find_element(By.CSS_SELECTOR, '#viewad-title').text
LOG.info('Extracting information from ad with title \"%s\"', title)
info['title'] = title
descr:str = self.webdriver.find_element(By.XPATH, '//*[@id="viewad-description-text"]').text
info['description'] = descr
extractor = extract.AdExtractor(self.webdriver)
# extract category
info['category'] = extractor.extract_category_from_ad_page()
# get special attributes
info['special_attributes'] = extractor.extract_special_attributes_from_ad_page()
# process pricing
info['price'], info['price_type'] = extractor.extract_pricing_info_from_ad_page()
# process shipping
info['shipping_type'], info['shipping_costs'], info['shipping_options'] = extractor.extract_shipping_info_from_ad_page()
info['sell_directly'] = extractor.extract_sell_directly_from_ad_page()
# fetch images
info['images'] = self.download_images_from_ad_page(directory, id_, LOG)
# process address
info['contact'] = extractor.extract_contact_from_ad_page()
# process meta info
info['republication_interval'] = 7 # a default value for downloaded ads
info['id'] = id_
try: # try different locations known for creation date element
creation_date = self.webdriver.find_element(By.XPATH, '/html/body/div[1]/div[2]/div/section[2]/section/section/article/div[3]/div[2]/div[2]/'
'div[1]/span').text
except NoSuchElementException:
creation_date = self.webdriver.find_element(By.CSS_SELECTOR, '#viewad-extra-info > div:nth-child(1) > span:nth-child(2)').text
# convert creation date to ISO format
created_parts = creation_date.split('.')
creation_date = created_parts[2] + '-' + created_parts[1] + '-' + created_parts[0] + ' 00:00:00'
creation_date = datetime.fromisoformat(creation_date).isoformat()
info['created_on'] = creation_date
info['updated_on'] = None # will be set later on
return info
def download_ad_page(self, id_:int) -> None:
"""
Downloads an ad to a specific location, specified by config and ad ID.
NOTE: Requires that the driver session currently is on the ad page.
:param id_: the ad ID
"""
# create sub-directory for ad(s) to download (if necessary):
relative_directory = 'downloaded-ads'
# make sure configured base directory exists
if not os.path.exists(relative_directory) or not os.path.isdir(relative_directory):
os.mkdir(relative_directory)
LOG.info('Created ads directory at /%s.', relative_directory)
new_base_dir = os.path.join(relative_directory, f'ad_{id_}')
if os.path.exists(new_base_dir):
LOG.info('Deleting current folder of ad...')
shutil.rmtree(new_base_dir)
os.mkdir(new_base_dir)
LOG.info('New directory for ad created at %s.', new_base_dir)
# call extraction function
info = self.extract_ad_page_info(new_base_dir, id_)
ad_file_path = new_base_dir + '/' + f'ad_{id_}.yaml'
utils.save_dict(ad_file_path, info)
def start_download_routine(self) -> None:
""" """
Determines which download mode was chosen with the arguments, and calls the specified download routine. Determines which download mode was chosen with the arguments, and calls the specified download routine.
This downloads either all, only unsaved (new), or specific ads given by ID. This downloads either all, only unsaved (new), or specific ads given by ID.
""" """
ad_extractor = extract.AdExtractor(self.browser)
# use relevant download routine # use relevant download routine
if self.ads_selector in {'all', 'new'}: # explore ads overview for these two modes if self.ads_selector in {'all', 'new'}: # explore ads overview for these two modes
LOG.info('Scanning your ad overview...') LOG.info('Scanning your ad overview...')
ext = extract.AdExtractor(self.webdriver) own_ad_urls = await ad_extractor.extract_own_ads_urls()
refs = ext.extract_own_ads_references() LOG.info('%d ads were found!', len(own_ad_urls))
LOG.info('%d ads were found!', len(refs))
if self.ads_selector == 'all': # download all of your adds if self.ads_selector == 'all': # download all of your adds
LOG.info('Start fetch task for all your ads!') LOG.info('Start fetch task for all your ads!')
success_count = 0 success_count = 0
# call download function for each ad page # call download function for each ad page
for ref in refs: for add_url in own_ad_urls:
ref_ad_id: int = utils.extract_ad_id_from_ad_link(ref) ad_id = ad_extractor.extract_ad_id_from_ad_url(add_url)
if self.navigate_to_ad_page(url = ref): if await ad_extractor.naviagte_to_ad_page(add_url):
self.download_ad_page(ref_ad_id) await ad_extractor.download_ad(ad_id)
success_count += 1 success_count += 1
LOG.info("%d of %d ads were downloaded from your profile.", success_count, len(refs)) LOG.info("%d of %d ads were downloaded from your profile.", success_count, len(own_ad_urls))
elif self.ads_selector == 'new': # download only unsaved ads elif self.ads_selector == 'new': # download only unsaved ads
# determine ad IDs from links
ref_ad_ids = [utils.extract_ad_id_from_ad_link(r) for r in refs]
ref_pairs = list(zip(refs, ref_ad_ids))
# check which ads already saved # check which ads already saved
saved_ad_ids = [] saved_ad_ids = []
ads = self.load_ads(ignore_inactive = False, check_id = False) # do not skip because of existing IDs ads = self.load_ads(ignore_inactive = False, check_id = False) # do not skip because of existing IDs
for ad_ in ads: for ad in ads:
ad_id = int(ad_[2]['id']) ad_id = int(ad[2]['id'])
saved_ad_ids.append(ad_id) saved_ad_ids.append(ad_id)
# determine ad IDs from links
ad_id_by_url = {url:ad_extractor.extract_ad_id_from_ad_url(url) for url in own_ad_urls}
LOG.info('Start fetch task for your unsaved ads!') LOG.info('Start fetch task for your unsaved ads!')
new_count = 0 new_count = 0
for ref_pair in ref_pairs: for ad_url, ad_id in ad_id_by_url.items():
# check if ad with ID already saved # check if ad with ID already saved
id_: int = ref_pair[1] if ad_id in saved_ad_ids:
if id_ in saved_ad_ids: LOG.info('The ad with id %d has already been saved.', ad_id)
LOG.info('The ad with id %d has already been saved.', id_)
continue continue
if self.navigate_to_ad_page(url = ref_pair[0]): if await ad_extractor.naviagte_to_ad_page(ad_url):
self.download_ad_page(id_) await ad_extractor.download_ad(ad_id)
new_count += 1 new_count += 1
LOG.info('%d new ad(s) were downloaded from your profile.', new_count) LOG.info('%d new ad(s) were downloaded from your profile.', new_count)
elif re.compile(r'\d+[,\d+]*').search(self.ads_selector): # download ad(s) with specific id(s) elif re.compile(r'\d+[,\d+]*').search(self.ads_selector): # download ad(s) with specific id(s)
ids = [int(n) for n in self.ads_selector.split(',')] ids = [int(n) for n in self.ads_selector.split(',')]
LOG.info('Start fetch task for the ad(s) with the id(s):') LOG.info('Start fetch task for the ad(s) with the id(s):')
LOG.info(' | '.join([str(id_) for id_ in ids])) LOG.info(' | '.join([str(ad_id) for ad_id in ids]))
for id_ in ids: # call download routine for every id for ad_id in ids: # call download routine for every id
exists = self.navigate_to_ad_page(id_) exists = await ad_extractor.naviagte_to_ad_page(ad_id)
if exists: if exists:
self.download_ad_page(id_) await ad_extractor.download_ad(ad_id)
LOG.info('Downloaded ad with id %d', id_) LOG.info('Downloaded ad with id %d', ad_id)
else: else:
LOG.error('The page with the id %d does not exist!', id_) LOG.error('The page with the id %d does not exist!', ad_id)
############################# #############################
@@ -1065,7 +823,9 @@ def main(args:list[str]) -> None:
sys.excepthook = utils.on_exception sys.excepthook = utils.on_exception
atexit.register(utils.on_exit) atexit.register(utils.on_exit)
KleinanzeigenBot().run(args) bot = KleinanzeigenBot()
atexit.register(bot.close_browser_session)
nodriver.loop().run_until_complete(bot.run(args))
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -3,51 +3,291 @@ SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
""" """
import json import json, logging, os, shutil
from decimal import DecimalException import urllib.request as urllib_request
from typing import Any from datetime import datetime
from typing import Any, Final
from selenium.common.exceptions import NoSuchElementException from .utils import is_integer, parse_decimal, save_dict
from selenium.webdriver.common.by import By from .web_scraping_mixin import Browser, By, Element, Is, WebScrapingMixin
from selenium.webdriver.remote.webdriver import WebDriver
import selenium.webdriver.support.expected_conditions as EC
from .selenium_mixin import SeleniumMixin LOG:Final[logging.Logger] = logging.getLogger("kleinanzeigen_bot.AdExtractor")
from .utils import parse_decimal, pause
class AdExtractor(SeleniumMixin): class AdExtractor(WebScrapingMixin):
""" """
Wrapper class for ad extraction that uses an active bot´s web driver to extract specific elements from an ad page. Wrapper class for ad extraction that uses an active bot´s browser session to extract specific elements from an ad page.
""" """
def __init__(self, driver:WebDriver): def __init__(self, browser:Browser):
super().__init__() super().__init__()
self.webdriver = driver self.browser = browser
def extract_category_from_ad_page(self) -> str: async def download_ad(self, ad_id:int) -> None:
"""
Downloads an ad to a specific location, specified by config and ad ID.
NOTE: Requires that the driver session currently is on the ad page.
:param ad_id: the ad ID
"""
# create sub-directory for ad(s) to download (if necessary):
relative_directory = 'downloaded-ads'
# make sure configured base directory exists
if not os.path.exists(relative_directory) or not os.path.isdir(relative_directory):
os.mkdir(relative_directory)
LOG.info('Created ads directory at ./%s.', relative_directory)
new_base_dir = os.path.join(relative_directory, f'ad_{ad_id}')
if os.path.exists(new_base_dir):
LOG.info('Deleting current folder of ad...')
shutil.rmtree(new_base_dir)
os.mkdir(new_base_dir)
LOG.info('New directory for ad created at %s.', new_base_dir)
# call extraction function
info = await self._extract_ad_page_info(new_base_dir, ad_id)
ad_file_path = new_base_dir + '/' + f'ad_{ad_id}.yaml'
save_dict(ad_file_path, info)
async def _download_images_from_ad_page(self, directory:str, ad_id:int) -> list[str]:
"""
Downloads all images of an ad.
:param directory: the path of the directory created for this ad
:param ad_id: the ID of the ad to download the images from
:return: the relative paths for all downloaded images
"""
n_images:int
img_paths = []
try:
# download all images from box
image_box = await self.web_find(By.CLASS_NAME, 'galleryimage-large')
n_images = len(await self.web_find_all(By.CSS_SELECTOR, '.galleryimage-element[data-ix]', parent = image_box))
LOG.info('Found %d images.', n_images)
img_element:Element = await self.web_find(By.CSS_SELECTOR, 'div:nth-child(1) > img', parent = image_box)
img_fn_prefix = 'ad_' + str(ad_id) + '__img'
img_nr = 1
dl_counter = 0
while img_nr <= n_images: # scrolling + downloading
current_img_url = img_element.attrs['src'] # URL of the image
if current_img_url is None:
continue
file_ending = current_img_url.split('.')[-1].lower()
img_path = directory + '/' + img_fn_prefix + str(img_nr) + '.' + file_ending
if current_img_url.startswith('https'): # verify https (for Bandit linter)
urllib_request.urlretrieve(current_img_url, img_path) # nosec B310
dl_counter += 1
img_paths.append(img_path.split('/')[-1])
# navigate to next image (if exists)
if img_nr < n_images:
try:
# click next button, wait, and re-establish reference
await (await self.web_find(By.CLASS_NAME, 'galleryimage--navigation--next')).click()
new_div = await self.web_find(By.CSS_SELECTOR, f'div.galleryimage-element:nth-child({img_nr + 1})')
img_element = await self.web_find(By.TAG_NAME, 'img', parent = new_div)
except TimeoutError:
LOG.error('NEXT button in image gallery somehow missing, abort image fetching.')
break
img_nr += 1
LOG.info('Downloaded %d image(s).', dl_counter)
except TimeoutError: # some ads do not require images
LOG.warning('No image area found. Continue without downloading images.')
return img_paths
def extract_ad_id_from_ad_url(self, url: str) -> int:
"""
Extracts the ID of an ad, given by its reference link.
:param url: the URL to the ad page
:return: the ad ID, a (ten-digit) integer number
"""
num_part = url.split('/')[-1] # suffix
id_part = num_part.split('-')[0]
try:
return int(id_part)
except ValueError:
LOG.warning('The ad ID could not be extracted from the given URL %s', url)
return -1
async def extract_own_ads_urls(self) -> list[str]:
"""
Extracts the references to all own ads.
:return: the links to your ad pages
"""
# navigate to "your ads" page
await self.web_open('https://www.kleinanzeigen.de/m-meine-anzeigen.html')
await self.web_sleep(2000, 3000)
# collect ad references:
pagination_section = await self.web_find(By.CSS_SELECTOR, 'section:nth-of-type(4)',
parent = await self.web_find(By.CSS_SELECTOR, '.l-splitpage'))
# scroll down to load dynamically
await self.web_scroll_page_down()
await self.web_sleep(2000, 3000)
# detect multi-page
try:
pagination = await self.web_find(By.CSS_SELECTOR, 'div > div:nth-of-type(2) > div:nth-of-type(2) > div',
parent = pagination_section)
except TimeoutError: # 0 ads - no pagination area
LOG.warning('There are currently no ads on your profile!')
return []
n_buttons = len(await self.web_find_all(By.CSS_SELECTOR, 'button',
parent = await self.web_find(By.CSS_SELECTOR, 'div:nth-of-type(1)', parent = pagination)))
if n_buttons > 1:
multi_page = True
LOG.info('It seems like you have many ads!')
else:
multi_page = False
LOG.info('It seems like all your ads fit on one overview page.')
refs:list[str] = []
while True: # loop reference extraction until no more forward page
# extract references
list_items = await self.web_find_all(By.CLASS_NAME, 'cardbox',
parent = await self.web_find(By.ID, 'my-manageads-adlist'))
refs += [
(await self.web_find(By.CSS_SELECTOR, 'article > section > section:nth-of-type(2) > h2 > div > a', parent = li)).attrs['href']
for li in list_items
]
if not multi_page: # only one iteration for single-page overview
break
# check if last page
nav_button:Element = (await self.web_find_all(By.CSS_SELECTOR, 'button.jsx-2828608826'))[-1]
if nav_button.attrs['title'] != 'Nächste':
LOG.info('Last ad overview page explored.')
break
# navigate to next overview page
await nav_button.click()
await self.web_sleep(2000, 3000)
await self.web_scroll_page_down()
return refs
async def naviagte_to_ad_page(self, id_or_url:int | str) -> bool:
"""
Navigates to an ad page specified with an ad ID; or alternatively by a given URL.
:return: whether the navigation to the ad page was successful
"""
if is_integer(id_or_url):
# enter the ad ID into the search bar
await self.web_input(By.ID, "site-search-query", id_or_url)
# navigate to ad page and wait
await self.web_check(By.ID, 'site-search-submit', Is.CLICKABLE)
submit_button = await self.web_find(By.ID, 'site-search-submit')
await submit_button.click()
else:
await self.web_open(str(id_or_url)) # navigate to URL directly given
await self.web_sleep()
# handle the case that invalid ad ID given
if self.page.url.endswith('k0'):
LOG.error('There is no ad under the given ID.')
return False
# close (warning) popup, if given
try:
await self.web_find(By.ID, 'vap-ovrly-secure')
LOG.warning('A popup appeared.')
await self.web_click(By.CLASS_NAME, 'mfp-close')
await self.web_sleep()
except TimeoutError:
pass
return True
async def _extract_ad_page_info(self, directory:str, ad_id:int) -> dict[str, Any]:
"""
Extracts all necessary information from an ad´s page.
:param directory: the path of the ad´s previously created directory
:param ad_id: the ad ID, already extracted by a calling function
:return: a dictionary with the keys as given in an ad YAML, and their respective values
"""
info:dict[str, Any] = {'active': True}
# extract basic info
info['type'] = 'OFFER' if 's-anzeige' in self.page.url else 'WANTED'
title:str = await self.web_text(By.ID, 'viewad-title')
LOG.info('Extracting information from ad with title \"%s\"', title)
info['title'] = title
descr:str = await self.web_text(By.ID, 'viewad-description-text')
info['description'] = descr
# extract category
info['category'] = await self._extract_category_from_ad_page()
# get special attributes
info['special_attributes'] = await self._extract_special_attributes_from_ad_page()
# process pricing
info['price'], info['price_type'] = await self._extract_pricing_info_from_ad_page()
# process shipping
info['shipping_type'], info['shipping_costs'], info['shipping_options'] = await self._extract_shipping_info_from_ad_page()
info['sell_directly'] = await self._extract_sell_directly_from_ad_page()
# fetch images
info['images'] = await self._download_images_from_ad_page(directory, ad_id)
# process address
info['contact'] = await self._extract_contact_from_ad_page()
# process meta info
info['republication_interval'] = 7 # a default value for downloaded ads
info['id'] = ad_id
try: # try different locations known for creation date element
creation_date = await self.web_text(By.XPATH,
'/html/body/div[1]/div[2]/div/section[2]/section/section/article/div[3]/div[2]/div[2]/div[1]/span')
except TimeoutError:
creation_date = await self.web_text(By.CSS_SELECTOR, '#viewad-extra-info > div:nth-child(1) > span:nth-child(2)')
# convert creation date to ISO format
created_parts = creation_date.split('.')
creation_date = created_parts[2] + '-' + created_parts[1] + '-' + created_parts[0] + ' 00:00:00'
creation_date = datetime.fromisoformat(creation_date).isoformat()
info['created_on'] = creation_date
info['updated_on'] = None # will be set later on
return info
async def _extract_category_from_ad_page(self) -> str:
""" """
Extracts a category of an ad in numerical form. Extracts a category of an ad in numerical form.
Assumes that the web driver currently shows an ad page. Assumes that the web driver currently shows an ad page.
:return: a category string of form abc/def, where a-f are digits :return: a category string of form abc/def, where a-f are digits
""" """
category_line = self.webdriver.find_element(By.XPATH, '//*[@id="vap-brdcrmb"]') category_line = await self.web_find(By.ID, 'vap-brdcrmb')
category_first_part = category_line.find_element(By.XPATH, './/a[2]') category_first_part = await self.web_find(By.CSS_SELECTOR, 'a:nth-of-type(2)', parent = category_line)
category_second_part = category_line.find_element(By.XPATH, './/a[3]') category_second_part = await self.web_find(By.CSS_SELECTOR, 'a:nth-of-type(3)', parent = category_line)
cat_num_first = category_first_part.get_attribute('href').split('/')[-1][1:] cat_num_first = category_first_part.attrs['href'].split('/')[-1][1:]
cat_num_second = category_second_part.get_attribute('href').split('/')[-1][1:] cat_num_second = category_second_part.attrs['href'].split('/')[-1][1:]
category:str = cat_num_first + '/' + cat_num_second category:str = cat_num_first + '/' + cat_num_second
return category return category
def extract_special_attributes_from_ad_page(self) -> dict[str, Any]: async def _extract_special_attributes_from_ad_page(self) -> dict[str, Any]:
""" """
Extracts the special attributes from an ad page. Extracts the special attributes from an ad page.
:return: a dictionary (possibly empty) where the keys are the attribute names, mapped to their values :return: a dictionary (possibly empty) where the keys are the attribute names, mapped to their values
""" """
belen_conf = self.webdriver.execute_script("return window.BelenConf") belen_conf = await self.web_execute("window.BelenConf")
special_attributes_str = belen_conf["universalAnalyticsOpts"]["dimensions"]["dimension108"] special_attributes_str = belen_conf["universalAnalyticsOpts"]["dimensions"]["dimension108"]
special_attributes = json.loads(special_attributes_str) special_attributes = json.loads(special_attributes_str)
if not isinstance(special_attributes, dict): if not isinstance(special_attributes, dict):
@@ -58,36 +298,32 @@ class AdExtractor(SeleniumMixin):
special_attributes = {k: v for k, v in special_attributes.items() if not k.endswith('.versand_s')} special_attributes = {k: v for k, v in special_attributes.items() if not k.endswith('.versand_s')}
return special_attributes return special_attributes
def extract_pricing_info_from_ad_page(self) -> tuple[float | None, str]: async def _extract_pricing_info_from_ad_page(self) -> tuple[float | None, str]:
""" """
Extracts the pricing information (price and pricing type) from an ad page. Extracts the pricing information (price and pricing type) from an ad page.
:return: the price of the offer (optional); and the pricing type :return: the price of the offer (optional); and the pricing type
""" """
try: try:
price_str:str = self.webdriver.find_element(By.CLASS_NAME, 'boxedarticle--price').text price_str:str = await self.web_text(By.ID, 'viewad-price')
price_type:str price:int | None = None
price:float | None = -1
match price_str.split()[-1]: match price_str.split()[-1]:
case '': case '':
price_type = 'FIXED' price_type = 'FIXED'
price = float(parse_decimal(price_str.split()[0].replace('.', ''))) price = int(price_str.split()[0])
case 'VB': # can be either 'X € VB', or just 'VB' case 'VB':
price_type = 'NEGOTIABLE' price_type = 'NEGOTIABLE'
try: if not price_str == "VB": # can be either 'X € VB', or just 'VB'
price = float(parse_decimal(price_str.split()[0].replace('.', ''))) price = int(price_str.split()[0])
except DecimalException:
price = None
case 'verschenken': case 'verschenken':
price_type = 'GIVE_AWAY' price_type = 'GIVE_AWAY'
price = None
case _: case _:
price_type = 'NOT_APPLICABLE' price_type = 'NOT_APPLICABLE'
return price, price_type return price, price_type
except NoSuchElementException: # no 'commercial' ad, has no pricing box etc. except TimeoutError: # no 'commercial' ad, has no pricing box etc.
return None, 'NOT_APPLICABLE' return None, 'NOT_APPLICABLE'
def extract_shipping_info_from_ad_page(self) -> tuple[str, float | None, list[str] | None]: async def _extract_shipping_info_from_ad_page(self) -> tuple[str, float | None, list[str] | None]:
""" """
Extracts shipping information from an ad page. Extracts shipping information from an ad page.
@@ -95,8 +331,7 @@ class AdExtractor(SeleniumMixin):
""" """
ship_type, ship_costs, shipping_options = 'NOT_APPLICABLE', None, None ship_type, ship_costs, shipping_options = 'NOT_APPLICABLE', None, None
try: try:
shipping_text = self.webdriver.find_element(By.CSS_SELECTOR, '.boxedarticle--details--shipping') \ shipping_text = await self.web_text(By.ID, 'boxedarticle--details--shipping')
.text.strip()
# e.g. '+ Versand ab 5,49 €' OR 'Nur Abholung' # e.g. '+ Versand ab 5,49 €' OR 'Nur Abholung'
if shipping_text == 'Nur Abholung': if shipping_text == 'Nur Abholung':
ship_type = 'PICKUP' ship_type = 'PICKUP'
@@ -124,115 +359,58 @@ class AdExtractor(SeleniumMixin):
if shipping_price in shipping_text: if shipping_price in shipping_text:
shipping_options = [shipping_option] shipping_options = [shipping_option]
break break
except NoSuchElementException: # no pricing box -> no shipping given except TimeoutError: # no pricing box -> no shipping given
ship_type = 'NOT_APPLICABLE' ship_type = 'NOT_APPLICABLE'
return ship_type, ship_costs, shipping_options return ship_type, ship_costs, shipping_options
def extract_sell_directly_from_ad_page(self) -> bool | None: async def _extract_sell_directly_from_ad_page(self) -> bool | None:
""" """
Extracts the sell directly option from an ad page. Extracts the sell directly option from an ad page.
:return: a boolean indicating whether the sell directly option is active (optional) :return: a boolean indicating whether the sell directly option is active (optional)
""" """
try: try:
buy_now_is_active = self.webdriver.find_element(By.ID, 'j-buy-now').text == "Direkt kaufen" buy_now_is_active:bool = (await self.web_text(By.ID, 'j-buy-now')) == "Direkt kaufen"
return buy_now_is_active return buy_now_is_active
except NoSuchElementException: except TimeoutError:
return None return None
def extract_contact_from_ad_page(self) -> dict[str, (str | None)]: async def _extract_contact_from_ad_page(self) -> dict[str, (str | None)]:
""" """
Processes the address part involving street (optional), zip code + city, and phone number (optional). Processes the address part involving street (optional), zip code + city, and phone number (optional).
:return: a dictionary containing the address parts with their corresponding values :return: a dictionary containing the address parts with their corresponding values
""" """
contact:dict[str, (str | None)] = {} contact:dict[str, (str | None)] = {}
address_element = self.webdriver.find_element(By.CSS_SELECTOR, '#viewad-locality') address_text = await self.web_text(By.ID, 'viewad-locality')
address_text = address_element.text.strip()
# format: e.g. (Beispiel Allee 42,) 12345 Bundesland - Stadt # format: e.g. (Beispiel Allee 42,) 12345 Bundesland - Stadt
try: try:
street_element = self.webdriver.find_element(By.XPATH, '//*[@id="street-address"]') street = (await self.web_text(By.ID, 'street-address'))[:-1] # trailing comma
street = street_element.text[:-2] # trailing comma and whitespace
contact['street'] = street contact['street'] = street
except NoSuchElementException: except TimeoutError:
print('No street given in the contact.') LOG.info('No street given in the contact.')
# construct remaining address # construct remaining address
address_halves = address_text.split(' - ') address_halves = address_text.split(' - ')
address_left_parts = address_halves[0].split(' ') # zip code and region/city address_left_parts = address_halves[0].split(' ') # zip code and region/city
contact['zipcode'] = address_left_parts[0] contact['zipcode'] = address_left_parts[0]
contact_person_element = self.webdriver.find_element(By.CSS_SELECTOR, '#viewad-contact') contact_person_element:Element = await self.web_find(By.ID, 'viewad-contact')
name_element = contact_person_element.find_element(By.CLASS_NAME, 'iconlist-text') name_element = await self.web_find(By.CLASS_NAME, 'iconlist-text', parent = contact_person_element)
try: try:
name = name_element.find_element(By.TAG_NAME, 'a').text name = await self.web_text(By.TAG_NAME, 'a', parent = name_element)
except NoSuchElementException: # edge case: name without link except TimeoutError: # edge case: name without link
name = name_element.find_element(By.TAG_NAME, 'span').text name = await self.web_text(By.TAG_NAME, 'span', parent = name_element)
contact['name'] = name contact['name'] = name
if 'street' not in contact: if 'street' not in contact:
contact['street'] = None contact['street'] = None
try: # phone number is unusual for non-professional sellers today try: # phone number is unusual for non-professional sellers today
phone_element = self.webdriver.find_element(By.CSS_SELECTOR, '#viewad-contact-phone') phone_element = await self.web_find(By.ID, 'viewad-contact-phone')
phone_number = phone_element.find_element(By.TAG_NAME, 'a').text phone_number = await self.web_text(By.TAG_NAME, 'a', parent = phone_element)
contact['phone'] = ''.join(phone_number.replace('-', ' ').split(' ')).replace('+49(0)', '0') contact['phone'] = ''.join(phone_number.replace('-', ' ').split(' ')).replace('+49(0)', '0')
except NoSuchElementException: except TimeoutError:
contact['phone'] = None # phone seems to be a deprecated feature (for non-professional users) contact['phone'] = None # phone seems to be a deprecated feature (for non-professional users)
# also see 'https://themen.kleinanzeigen.de/hilfe/deine-anzeigen/Telefon/ # also see 'https://themen.kleinanzeigen.de/hilfe/deine-anzeigen/Telefon/
return contact return contact
def extract_own_ads_references(self) -> list[str]:
"""
Extracts the references to all own ads.
:return: the links to your ad pages
"""
# navigate to your ads page
self.webdriver.get('https://www.kleinanzeigen.de/m-meine-anzeigen.html')
self.web_await(EC.url_contains('meine-anzeigen'), 15)
pause(2000, 3000)
# collect ad references:
pagination_section = self.webdriver.find_element(By.CSS_SELECTOR, '.l-splitpage')\
.find_element(By.XPATH, './/section[4]')
# scroll down to load dynamically
self.web_scroll_page_down()
pause(2000, 3000)
# detect multi-page
try:
pagination = pagination_section.find_element(By.XPATH, './/div/div[2]/div[2]/div') # Pagination
except NoSuchElementException: # 0 ads - no pagination area
print('There currently seem to be no ads on your profile!')
return []
n_buttons = len(pagination.find_element(By.XPATH, './/div[1]').find_elements(By.TAG_NAME, 'button'))
multi_page:bool
if n_buttons > 1:
multi_page = True
print('It seems like you have many ads!')
else:
multi_page = False
print('It seems like all your ads fit on one overview page.')
refs:list[str] = []
while True: # loop reference extraction until no more forward page
# extract references
list_section = self.webdriver.find_element(By.XPATH, '//*[@id="my-manageads-adlist"]')
list_items = list_section.find_elements(By.CLASS_NAME, 'cardbox')
refs += [li.find_element(By.XPATH, 'article/section/section[2]/h2/div/a').get_attribute('href') for li in list_items]
if not multi_page: # only one iteration for single-page overview
break
# check if last page
nav_button = self.webdriver.find_elements(By.CSS_SELECTOR, 'button.jsx-2828608826')[-1]
if nav_button.get_attribute('title') != 'Nächste':
print('Last ad overview page explored.')
break
# navigate to next overview page
nav_button.click()
pause(2000, 3000)
self.web_scroll_page_down()
return refs

View File

@@ -27,12 +27,7 @@ categories: []
# browser configuration # browser configuration
browser: browser:
# https://peter.sh/experiments/chromium-command-line-switches/ # https://peter.sh/experiments/chromium-command-line-switches/
arguments: arguments: []
# https://stackoverflow.com/a/50725918/5116073
- --disable-dev-shm-usage
- --no-sandbox
# --headless
# --start-maximized
binary_location: # path to custom browser executable, if not specified will be looked up on PATH binary_location: # path to custom browser executable, if not specified will be looked up on PATH
extensions: [] # a list of .crx extension files to be loaded extensions: [] # a list of .crx extension files to be loaded
use_private_window: true use_private_window: true

View File

@@ -1,322 +0,0 @@
"""
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import logging, os, platform, shutil, time
from collections.abc import Callable, Iterable
from typing import Any, Final, TypeVar
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException, TimeoutException, WebDriverException
from selenium.webdriver.common.by import By
from selenium.webdriver.chromium.options import ChromiumOptions
from selenium.webdriver.chromium.webdriver import ChromiumDriver
from selenium.webdriver.remote.webdriver import WebDriver
from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select, WebDriverWait
import selenium_stealth
from .utils import ensure, pause, T
LOG:Final[logging.Logger] = logging.getLogger("kleinanzeigen_bot.selenium_mixin")
class BrowserConfig:
def __init__(self) -> None:
self.arguments:Iterable[str] = []
self.binary_location:str | None = None
self.extensions:Iterable[str] = []
self.use_private_window:bool = True
self.user_data_dir:str = ""
self.profile_name:str = ""
CHROMIUM_OPTIONS = TypeVar('CHROMIUM_OPTIONS', bound = ChromiumOptions) # pylint: disable=invalid-name
class SeleniumMixin:
def __init__(self) -> None:
os.environ["SE_AVOID_STATS"] = "true" # see https://www.selenium.dev/documentation/selenium_manager/
self.browser_config:Final[BrowserConfig] = BrowserConfig()
self.webdriver:WebDriver = None
def _init_browser_options(self, browser_options:CHROMIUM_OPTIONS) -> CHROMIUM_OPTIONS:
if self.browser_config.use_private_window:
if isinstance(browser_options, webdriver.EdgeOptions):
browser_options.add_argument("-inprivate")
else:
browser_options.add_argument("--incognito")
if self.browser_config.user_data_dir:
LOG.info(" -> Browser User Data Dir: %s", self.browser_config.user_data_dir)
browser_options.add_argument(f"--user-data-dir={self.browser_config.user_data_dir}")
if self.browser_config.profile_name:
LOG.info(" -> Browser Profile Name: %s", self.browser_config.profile_name)
browser_options.add_argument(f"--profile-directory={self.browser_config.profile_name}")
browser_options.add_argument("--disable-crash-reporter")
browser_options.add_argument("--no-first-run")
browser_options.add_argument("--no-service-autorun")
for chrome_option in self.browser_config.arguments:
LOG.info(" -> Custom chrome argument: %s", chrome_option)
browser_options.add_argument(chrome_option)
LOG.debug("Effective browser arguments: %s", browser_options.arguments)
for crx_extension in self.browser_config.extensions:
ensure(os.path.exists(crx_extension), f"Configured extension-file [{crx_extension}] does not exist.")
browser_options.add_extension(crx_extension)
LOG.debug("Effective browser extensions: %s", browser_options.extensions)
browser_options.add_experimental_option("excludeSwitches", ["enable-automation"])
browser_options.add_experimental_option("useAutomationExtension", False)
browser_options.add_experimental_option("prefs", {
"credentials_enable_service": False,
"profile.password_manager_enabled": False,
"profile.default_content_setting_values.notifications": 2, # 1 = allow, 2 = block browser notifications
"devtools.preferences.currentDockState": "\"bottom\""
})
if not LOG.isEnabledFor(logging.DEBUG):
browser_options.add_argument("--log-level=3") # INFO: 0, WARNING: 1, ERROR: 2, FATAL: 3
LOG.debug("Effective experimental options: %s", browser_options.experimental_options)
if self.browser_config.binary_location:
browser_options.binary_location = self.browser_config.binary_location
LOG.info(" -> Chrome binary location: %s", self.browser_config.binary_location)
return browser_options
def create_webdriver_session(self) -> None:
LOG.info("Creating WebDriver session...")
if self.browser_config.binary_location:
ensure(os.path.exists(self.browser_config.binary_location), f"Specified browser binary [{self.browser_config.binary_location}] does not exist.")
else:
self.browser_config.binary_location = self.get_compatible_browser()
if "edge" in self.browser_config.binary_location.lower():
os.environ["MSEDGEDRIVER_TELEMETRY_OPTOUT"] = "1" # https://docs.microsoft.com/en-us/microsoft-edge/privacy-whitepaper/#microsoft-edge-driver
browser_options = self._init_browser_options(webdriver.EdgeOptions())
browser_options.binary_location = self.browser_config.binary_location
self.webdriver = webdriver.Edge(options = browser_options)
else:
browser_options = self._init_browser_options(webdriver.ChromeOptions())
browser_options.binary_location = self.browser_config.binary_location
self.webdriver = webdriver.Chrome(options = browser_options)
LOG.info(" -> Chrome driver: %s", self.webdriver.service.path)
# workaround to support Edge, see https://github.com/diprajpatra/selenium-stealth/pull/25
selenium_stealth.Driver = ChromiumDriver
selenium_stealth.stealth(self.webdriver, # https://github.com/diprajpatra/selenium-stealth#args
languages = ("de-DE", "de", "en-US", "en"),
platform = "Win32",
fix_hairline = True,
)
LOG.info("New WebDriver session is: %s %s", self.webdriver.session_id, self.webdriver.command_executor._url) # pylint: disable=protected-access
def get_compatible_browser(self) -> str | None:
match platform.system():
case "Linux":
browser_paths = [
shutil.which("chromium"),
shutil.which("chromium-browser"),
shutil.which("google-chrome"),
shutil.which("microsoft-edge")
]
case "Darwin":
browser_paths = [
"/Applications/Chromium.app/Contents/MacOS/Chromium",
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
"/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
]
case "Windows":
browser_paths = [
os.environ.get("ProgramFiles", "C:\\Program Files") + r'\Microsoft\Edge\Application\msedge.exe',
os.environ.get("ProgramFiles(x86)", "C:\\Program Files (x86)") + r'\Microsoft\Edge\Application\msedge.exe',
os.environ["ProgramFiles"] + r'\Chromium\Application\chrome.exe',
os.environ["ProgramFiles(x86)"] + r'\Chromium\Application\chrome.exe',
os.environ["LOCALAPPDATA"] + r'\Chromium\Application\chrome.exe',
os.environ["ProgramFiles"] + r'\Chrome\Application\chrome.exe',
os.environ["ProgramFiles(x86)"] + r'\Chrome\Application\chrome.exe',
os.environ["LOCALAPPDATA"] + r'\Chrome\Application\chrome.exe',
shutil.which("msedge.exe"),
shutil.which("chromium.exe"),
shutil.which("chrome.exe")
]
case _ as os_name:
LOG.warning("Installed browser for OS [%s] could not be detected", os_name)
return None
for browser_path in browser_paths:
if browser_path and os.path.isfile(browser_path):
return browser_path
raise AssertionError("Installed browser could not be detected")
def web_await(self, condition: Callable[[WebDriver], T], timeout:float = 5, exception_on_timeout: Callable[[], Exception] | None = None) -> T:
"""
Blocks/waits until the given condition is met.
:param timeout: timeout in seconds
:raises TimeoutException: if element could not be found within time
"""
max_attempts = 2
for attempt in range(max_attempts + 1)[1:]:
try:
return WebDriverWait(self.webdriver, timeout).until(condition) # type: ignore[no-any-return]
except TimeoutException as ex:
if exception_on_timeout:
raise exception_on_timeout() from ex
raise ex
except WebDriverException as ex:
# temporary workaround for:
# - https://groups.google.com/g/chromedriver-users/c/Z_CaHJTJnLw
# - https://bugs.chromium.org/p/chromedriver/issues/detail?id=4048
if ex.msg == "target frame detached" and attempt < max_attempts:
LOG.warning(ex)
else:
raise ex
raise AssertionError("Should never be reached.")
def web_click(self, selector_type:By, selector_value:str, timeout:float = 5) -> WebElement:
"""
:param timeout: timeout in seconds
:raises NoSuchElementException: if element could not be found within time
"""
elem = self.web_await(
EC.element_to_be_clickable((selector_type, selector_value)),
timeout,
lambda: NoSuchElementException(f"Element {selector_type}:{selector_value} not found or not clickable")
)
elem.click()
pause()
return elem
def web_execute(self, javascript:str) -> Any:
"""
Executes the given JavaScript code in the context of the current page.
:return: The command's JSON response
"""
return self.webdriver.execute_script(javascript)
def web_find(self, selector_type:By, selector_value:str, timeout:float = 5) -> WebElement:
"""
Locates an HTML element.
:param timeout: timeout in seconds
:raises NoSuchElementException: if element could not be found within time
"""
return self.web_await(
EC.presence_of_element_located((selector_type, selector_value)),
timeout,
lambda: NoSuchElementException(f"Element {selector_type}='{selector_value}' not found")
)
def web_input(self, selector_type:By, selector_value:str, text:str, timeout:float = 5) -> WebElement:
"""
Enters text into an HTML input field.
:param timeout: timeout in seconds
:raises NoSuchElementException: if element could not be found within time
"""
input_field = self.web_find(selector_type, selector_value, timeout)
input_field.clear()
input_field.send_keys(text)
pause()
return input_field
def web_open(self, url:str, timeout:float = 15, reload_if_already_open:bool = False) -> None:
"""
:param url: url to open in browser
:param timeout: timespan in seconds within the page needs to be loaded
:param reload_if_already_open: if False does nothing if the URL is already open in the browser
:raises TimeoutException: if page did not open within given timespan
"""
LOG.debug(" -> Opening [%s]...", url)
if not reload_if_already_open and url == self.webdriver.current_url:
LOG.debug(" => skipping, [%s] is already open", url)
return
self.webdriver.get(url)
WebDriverWait(self.webdriver, timeout).until(lambda _: self.web_execute("return document.readyState") == "complete")
# pylint: disable=dangerous-default-value
def web_request(self, url:str, method:str = "GET", valid_response_codes:Iterable[int] = [200], headers:dict[str, str] | None = None) -> dict[str, Any]:
method = method.upper()
LOG.debug(" -> HTTP %s [%s]...", method, url)
response:dict[str, Any] = self.webdriver.execute_async_script(f"""
var callback = arguments[arguments.length - 1];
fetch("{url}", {{
method: "{method}",
redirect: "follow",
headers: {headers or {}}
}})
.then(response => response.text().then(responseText => {{
headers = {{}};
response.headers.forEach((v, k) => headers[k] = v);
callback({{
"statusCode": response.status,
"statusMessage": response.statusText,
"headers": headers,
"content": responseText
}})
}}))
""")
ensure(
response["statusCode"] in valid_response_codes,
f'Invalid response "{response["statusCode"]} response["statusMessage"]" received for HTTP {method} to {url}'
)
return response
# pylint: enable=dangerous-default-value
def web_scroll_page_down(self, scroll_length: int = 10, scroll_speed: int = 10000, scroll_back_top: bool = False) -> None:
"""
Smoothly scrolls the current web page down.
:param scroll_length: the length of a single scroll iteration, determines smoothness of scrolling, lower is smoother
:param scroll_speed: the speed of scrolling, higher is faster
:param scroll_back_top: whether to scroll the page back to the top after scrolling to the bottom
"""
current_y_pos = 0
bottom_y_pos: int = self.webdriver.execute_script('return document.body.scrollHeight;') # get bottom position by JS
while current_y_pos < bottom_y_pos: # scroll in steps until bottom reached
current_y_pos += scroll_length
self.webdriver.execute_script(f'window.scrollTo(0, {current_y_pos});') # scroll one step
time.sleep(scroll_length / scroll_speed)
if scroll_back_top: # scroll back to top in same style
while current_y_pos > 0:
current_y_pos -= scroll_length
self.webdriver.execute_script(f'window.scrollTo(0, {current_y_pos});')
time.sleep(scroll_length / scroll_speed / 2) # double speed
def web_select(self, selector_type:By, selector_value:str, selected_value:Any, timeout:float = 5) -> WebElement:
"""
Selects an <option/> of a <select/> HTML element.
:param timeout: timeout in seconds
:raises NoSuchElementException: if element could not be found within time
:raises UnexpectedTagNameException: if element is not a <select> element
"""
elem = self.web_await(
EC.element_to_be_clickable((selector_type, selector_value)),
timeout,
lambda: NoSuchElementException(f"Element {selector_type}='{selector_value}' not found or not clickable")
)
Select(elem).select_by_value(selected_value)
pause()
return elem

View File

@@ -3,7 +3,7 @@ SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
""" """
import copy, decimal, json, logging, os, re, secrets, sys, traceback, time import asyncio, copy, decimal, json, logging, os, re, sys, traceback, time
from importlib.resources import read_text as get_resource_as_string from importlib.resources import read_text as get_resource_as_string
from collections.abc import Callable, Sized from collections.abc import Callable, Sized
from datetime import datetime from datetime import datetime
@@ -68,6 +68,18 @@ def is_frozen() -> bool:
return getattr(sys, "frozen", False) return getattr(sys, "frozen", False)
def is_integer(obj:Any) -> bool:
try:
int(obj)
return True
except (ValueError, TypeError):
return False
async def ainput(prompt: str) -> str:
return await asyncio.to_thread(input, f'{prompt} ')
def apply_defaults( def apply_defaults(
target:dict[Any, Any], target:dict[Any, Any],
defaults:dict[Any, Any], defaults:dict[Any, Any],
@@ -119,7 +131,7 @@ def configure_console_logging() -> None:
stdout_log = logging.StreamHandler(sys.stderr) stdout_log = logging.StreamHandler(sys.stderr)
stdout_log.setLevel(logging.DEBUG) stdout_log.setLevel(logging.DEBUG)
stdout_log.setFormatter(coloredlogs.ColoredFormatter("[%(levelname)s] %(message)s")) stdout_log.setFormatter(coloredlogs.ColoredFormatter("[%(levelname)s] %(message)s"))
stdout_log.addFilter(type("", (logging.Filter,), { stdout_log.addFilter(type("", (logging.Filter,), { # pyright: ignore
"filter": lambda rec: rec.levelno <= logging.INFO "filter": lambda rec: rec.levelno <= logging.INFO
})) }))
LOG_ROOT.addHandler(stdout_log) LOG_ROOT.addHandler(stdout_log)
@@ -151,12 +163,6 @@ def on_sigint(_sig:int, _frame:FrameType | None) -> None:
sys.exit(0) sys.exit(0)
def pause(min_ms:int = 200, max_ms:int = 2000) -> None:
duration = max_ms <= min_ms and min_ms or secrets.randbelow(max_ms - min_ms) + min_ms
LOG.log(logging.INFO if duration > 1500 else logging.DEBUG, " ... pausing for %d ms ...", duration)
time.sleep(duration / 1000)
def pluralize(noun:str, count:int | Sized, prefix_with_count:bool = True) -> str: def pluralize(noun:str, count:int | Sized, prefix_with_count:bool = True) -> str:
""" """
>>> pluralize("field", 1) >>> pluralize("field", 1)
@@ -272,20 +278,3 @@ def parse_datetime(date:datetime | str | None) -> datetime | None:
if isinstance(date, datetime): if isinstance(date, datetime):
return date return date
return datetime.fromisoformat(date) return datetime.fromisoformat(date)
def extract_ad_id_from_ad_link(url: str) -> int:
"""
Extracts the ID of an ad, given by its reference link.
:param url: the URL to the ad page
:return: the ad ID, a (ten-digit) integer number
"""
num_part = url.split('/')[-1] # suffix
id_part = num_part.split('-')[0]
try:
return int(id_part)
except ValueError:
print('The ad ID could not be extracted from the given ad reference!')
return -1

View File

@@ -0,0 +1,532 @@
"""
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import asyncio, enum, inspect, json, logging, os, platform, secrets, shutil, time
from collections.abc import Callable, Coroutine, Iterable
from typing import cast, Any, Final
try:
from typing import Never # type: ignore[attr-defined,unused-ignore] # mypy
except ImportError:
from typing import NoReturn as Never # Python <3.11
import nodriver, psutil
from nodriver.core.browser import Browser
from nodriver.core.config import Config
from nodriver.core.element import Element
from nodriver.core.tab import Tab as Page
from .utils import ensure, T
LOG:Final[logging.Logger] = logging.getLogger("kleinanzeigen_bot.selenium_mixin")
__all__ = [
"Browser",
"BrowserConfig",
"By",
"Element",
"Page",
"Is",
"WebScrapingMixin"
]
class By(enum.Enum):
ID = enum.auto()
CLASS_NAME = enum.auto()
CSS_SELECTOR = enum.auto()
TAG_NAME = enum.auto()
TEXT = enum.auto()
XPATH = enum.auto()
class Is(enum.Enum):
CLICKABLE = enum.auto()
DISPLAYED = enum.auto()
DISABLED = enum.auto()
READONLY = enum.auto()
SELECTED = enum.auto()
class BrowserConfig:
def __init__(self) -> None:
self.arguments:Iterable[str] = []
self.binary_location:str | None = None
self.extensions:Iterable[str] = []
self.use_private_window:bool = True
self.user_data_dir:str = ""
self.profile_name:str = ""
class WebScrapingMixin:
def __init__(self) -> None:
self.browser_config:Final[BrowserConfig] = BrowserConfig()
self.browser:Browser = None # pyright: ignore
self.page:Page = None # pyright: ignore
async def create_browser_session(self) -> None:
LOG.info("Creating Browser session...")
if self.browser_config.binary_location:
ensure(os.path.exists(self.browser_config.binary_location), f"Specified browser binary [{self.browser_config.binary_location}] does not exist.")
else:
self.browser_config.binary_location = self.get_compatible_browser()
LOG.info(" -> Chrome binary location: %s", self.browser_config.binary_location)
# default_browser_args: @ https://github.com/ultrafunkamsterdam/nodriver/blob/main/nodriver/core/config.py
# https://peter.sh/experiments/chromium-command-line-switches/
# https://github.com/GoogleChrome/chrome-launcher/blob/main/docs/chrome-flags-for-tools.md
browser_args = [
# "--disable-dev-shm-usage", # https://stackoverflow.com/a/50725918/5116073
"--disable-crash-reporter",
"--disable-domain-reliability",
"--disable-sync",
"--no-experiments",
"--disable-features=MediaRouter",
"--use-mock-keychain",
"--test-type", # https://stackoverflow.com/a/36746675/5116073
# https://chromium.googlesource.com/chromium/src/+/master/net/dns/README.md#request-remapping
'--host-resolver-rules="MAP connect.facebook.net 127.0.0.1, MAP securepubads.g.doubleclick.net 127.0.0.1, MAP www.googletagmanager.com 127.0.0.1"'
]
is_edge = "edge" in self.browser_config.binary_location.lower()
if is_edge:
os.environ["MSEDGEDRIVER_TELEMETRY_OPTOUT"] = "1" # https://docs.microsoft.com/en-us/microsoft-edge/privacy-whitepaper/#microsoft-edge-driver
if self.browser_config.use_private_window:
browser_args.append("-inprivate" if is_edge else "--incognito")
if self.browser_config.profile_name:
LOG.info(" -> Browser profile name: %s", self.browser_config.profile_name)
browser_args.append(f"--profile-directory={self.browser_config.profile_name}")
for browser_arg in self.browser_config.arguments:
LOG.info(" -> Custom Chrome argument: %s", browser_arg)
browser_args.append(browser_arg)
if not LOG.isEnabledFor(logging.DEBUG):
browser_args.append("--log-level=3") # INFO: 0, WARNING: 1, ERROR: 2, FATAL: 3
if self.browser_config.user_data_dir:
LOG.info(" -> Browser user data dir: %s", self.browser_config.user_data_dir)
cfg = Config(
headless = False,
browser_executable_path = self.browser_config.binary_location,
browser_args = browser_args,
user_data_dir = self.browser_config.user_data_dir
)
# already logged by nodriver:
# LOG.debug("-> Effective browser arguments: \n\t\t%s", "\n\t\t".join(cfg.browser_args))
profile_dir = os.path.join(cfg.user_data_dir, self.browser_config.profile_name or "Default")
os.makedirs(profile_dir, exist_ok = True)
prefs_file = os.path.join(profile_dir, "Preferences")
if not os.path.exists(prefs_file):
LOG.info("-> Setting chrome prefs [%s]...", prefs_file)
with open(prefs_file, "w", encoding='UTF-8') as fd:
json.dump({
"credentials_enable_service": False,
"enable_do_not_track": True,
"google": {
"services": {
"consented_to_sync": False
}
},
"profile": {
"default_content_setting_values": {
"popups": 0,
"notifications": 2 # 1 = allow, 2 = block browser notifications
},
"password_manager_enabled": False
},
"signin": {
"allowed": False
},
"translate_site_blacklist": [
"www.kleinanzeigen.de"
],
"devtools": {
"preferences": {
"currentDockState": '"bottom"'
}
}
}, fd)
# load extensions
for crx_extension in self.browser_config.extensions:
LOG.info(" -> Adding extension: [%s]", crx_extension)
ensure(os.path.exists(crx_extension), f"Configured extension-file [{crx_extension}] does not exist.")
cfg.add_extension(crx_extension)
self.browser = await nodriver.start(cfg)
LOG.info("New Browser session is %s", self.browser.websocket_url)
def close_browser_session(self) -> None:
if self.browser:
LOG.debug("Closing Browser session...")
self.page = None # pyright: ignore
browser_process = psutil.Process(self.browser._process_pid) # pylint: disable=protected-access
browser_children:list[psutil.Process] = browser_process.children()
self.browser.stop()
for p in browser_children:
if p.is_running():
p.kill() # terminate orphaned browser processes
self.browser = None # pyright: ignore
def get_compatible_browser(self) -> str:
match platform.system():
case "Linux":
browser_paths = [
shutil.which("chromium"),
shutil.which("chromium-browser"),
shutil.which("google-chrome"),
shutil.which("microsoft-edge")
]
case "Darwin":
browser_paths = [
"/Applications/Chromium.app/Contents/MacOS/Chromium",
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
"/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
]
case "Windows":
browser_paths = [
os.environ.get("ProgramFiles", "C:\\Program Files") + r'\Microsoft\Edge\Application\msedge.exe',
os.environ.get("ProgramFiles(x86)", "C:\\Program Files (x86)") + r'\Microsoft\Edge\Application\msedge.exe',
os.environ["ProgramFiles"] + r'\Chromium\Application\chrome.exe',
os.environ["ProgramFiles(x86)"] + r'\Chromium\Application\chrome.exe',
os.environ["LOCALAPPDATA"] + r'\Chromium\Application\chrome.exe',
os.environ["ProgramFiles"] + r'\Chrome\Application\chrome.exe',
os.environ["ProgramFiles(x86)"] + r'\Chrome\Application\chrome.exe',
os.environ["LOCALAPPDATA"] + r'\Chrome\Application\chrome.exe',
shutil.which("msedge.exe"),
shutil.which("chromium.exe"),
shutil.which("chrome.exe")
]
case _ as os_name:
raise AssertionError(f"Installed browser for OS [{os_name}] could not be detected")
for browser_path in browser_paths:
if browser_path and os.path.isfile(browser_path):
return browser_path
raise AssertionError("Installed browser could not be detected")
async def web_await(self, condition: Callable[[], T | Never | Coroutine[Any,Any,T | Never]], *,
timeout:int | float = 5, timeout_error_message: str = "") -> T:
"""
Blocks/waits until the given condition is met.
:param timeout: timeout in seconds
:raises TimeoutError: if element could not be found within time
"""
loop = asyncio.get_running_loop()
start_at = loop.time()
while True:
await self.page
ex:Exception | None = None
try:
result_raw = condition()
result:T = (await result_raw) if inspect.isawaitable(result_raw) else result_raw
if result:
return result
except Exception as ex1:
ex = ex1
if loop.time() - start_at > timeout:
if ex:
raise ex
raise TimeoutError(timeout_error_message or f"Condition not met within {timeout} seconds")
await self.page.sleep(0.5)
async def web_check(self, selector_type:By, selector_value:str, attr:Is, *, timeout:int | float = 5) -> bool:
"""
Locates an HTML element and returns a state.
:param timeout: timeout in seconds
:raises TimeoutError: if element could not be found within time
"""
def is_disabled(elem:Element) -> bool:
return elem.attrs.get("disabled") is not None
async def is_displayed(elem:Element) -> bool:
return cast(bool, await elem.apply("""
function (element) {
var style = window.getComputedStyle(element);
return style.display !== 'none'
&& style.visibility !== 'hidden'
&& style.opacity !== '0'
&& element.offsetWidth > 0
&& element.offsetHeight > 0
}
"""))
elem:Element = await self.web_find(selector_type, selector_value, timeout = timeout)
match attr:
case Is.CLICKABLE:
return not is_disabled(elem) or await is_displayed(elem)
case Is.DISPLAYED:
return await is_displayed(elem)
case Is.DISABLED:
return is_disabled(elem)
case Is.READONLY:
return elem.attrs.get("readonly") is not None
case Is.SELECTED:
return cast(bool, await elem.apply("""
function (element) {
if (element.tagName.toLowerCase() === 'input') {
if (element.type === 'checkbox' || element.type === 'radio') {
return element.checked
}
}
return false
}
"""))
raise AssertionError(f"Unsupported attribute: {attr}")
async def web_click(self, selector_type:By, selector_value:str, *, timeout:int | float = 5) -> Element:
"""
Locates an HTML element by ID.
:param timeout: timeout in seconds
:raises TimeoutError: if element could not be found within time
"""
elem = await self.web_find(selector_type, selector_value, timeout = timeout)
await elem.click()
await self.web_sleep()
return elem
async def web_execute(self, javascript:str) -> Any:
"""
Executes the given JavaScript code in the context of the current page.
:return: The javascript's return value
"""
return await self.page.evaluate(javascript, True)
async def web_find(self, selector_type:By, selector_value:str, *, parent:Element = None, timeout:int | float = 5) -> Element:
"""
Locates an HTML element by the given selector type and value.
:param timeout: timeout in seconds
:raises TimeoutError: if element could not be found within time
"""
match selector_type:
case By.ID:
return await self.web_await(
lambda: self.page.query_selector(f"#{selector_value}", parent),
timeout = timeout,
timeout_error_message = f"No HTML element found with ID '{selector_value}' within {timeout} seconds.")
case By.CLASS_NAME:
return await self.web_await(
lambda: self.page.query_selector(f".{selector_value}", parent),
timeout = timeout,
timeout_error_message = f"No HTML element found with ID '{selector_value}' within {timeout} seconds.")
case By.TAG_NAME:
return await self.web_await(
lambda: self.page.query_selector(selector_value, parent),
timeout = timeout,
timeout_error_message = f"No HTML element found of tag <{selector_value}> within {timeout} seconds.")
case By.CSS_SELECTOR:
return await self.web_await(
lambda: self.page.query_selector(selector_value, parent),
timeout = timeout,
timeout_error_message = f"No HTML element found using CSS selector '{selector_value}' within {timeout} seconds.")
case By.TEXT:
if parent:
raise AssertionError(f"Specifying a parent element currently not supported with selector type: {selector_type}")
return await self.web_await(
lambda: self.page.find_element_by_text(selector_value, True),
timeout = timeout,
timeout_error_message = f"No HTML element found containing text '{selector_value}' within {timeout} seconds.")
case By.XPATH:
if parent:
raise AssertionError(f"Specifying a parent element currently not supported with selector type: {selector_type}")
return await self.web_await(
lambda: self.page.find_element_by_text(selector_value, True),
timeout = timeout,
timeout_error_message = f"No HTML element found using XPath '{selector_value}' within {timeout} seconds.")
raise AssertionError(f"Unsupported selector type: {selector_type}")
async def web_find_all(self, selector_type:By, selector_value:str, *, parent:Element = None, timeout:int | float = 5) -> list[Element]:
"""
Locates an HTML element by ID.
:param timeout: timeout in seconds
:raises TimeoutError: if element could not be found within time
"""
match selector_type:
case By.CLASS_NAME:
return await self.web_await(
lambda: self.page.query_selector_all(f".{selector_value}", parent),
timeout = timeout,
timeout_error_message = f"No HTML elements found with CSS class '{selector_value}' within {timeout} seconds.")
case By.CSS_SELECTOR:
return await self.web_await(
lambda: self.page.query_selector_all(selector_value, parent),
timeout = timeout,
timeout_error_message = f"No HTML elements found using CSS selector '{selector_value}' within {timeout} seconds.")
case By.TAG_NAME:
return await self.web_await(
lambda: self.page.query_selector_all(selector_value, parent),
timeout = timeout,
timeout_error_message = f"No HTML elements found of tag <{selector_value}> within {timeout} seconds.")
case By.TEXT:
if parent:
raise AssertionError(f"Specifying a parent element currently not supported with selector type: {selector_type}")
return await self.web_await(
lambda: self.page.find_elements_by_text(selector_value),
timeout = timeout,
timeout_error_message = f"No HTML elements found containing text '{selector_value}' within {timeout} seconds.")
case By.XPATH:
if parent:
raise AssertionError(f"Specifying a parent element currently not supported with selector type: {selector_type}")
return await self.web_await(
lambda: self.page.find_elements_by_text(selector_value),
timeout = timeout,
timeout_error_message = f"No HTML elements found using XPath '{selector_value}' within {timeout} seconds.")
raise AssertionError(f"Unsupported selector type: {selector_type}")
async def web_input(self, selector_type:By, selector_value:str, text:str | int, *, timeout:int | float = 5) -> Element:
"""
Enters text into an HTML input field.
:param timeout: timeout in seconds
:raises TimeoutError: if element could not be found within time
"""
input_field = await self.web_find(selector_type, selector_value, timeout = timeout)
await input_field.clear_input()
await input_field.send_keys(str(text))
await self.web_sleep()
return input_field
async def web_open(self, url:str, *, timeout:int | float = 15000, reload_if_already_open:bool = False) -> None:
"""
:param url: url to open in browser
:param timeout: timespan in seconds within the page needs to be loaded
:param reload_if_already_open: if False does nothing if the URL is already open in the browser
:raises TimeoutException: if page did not open within given timespan
"""
LOG.debug(" -> Opening [%s]...", url)
if not reload_if_already_open and self.page and url == self.page.url:
LOG.debug(" => skipping, [%s] is already open", url)
return
self.page = await self.browser.get(url, False, False)
await self.web_await(lambda: self.web_execute("document.readyState == 'complete'"), timeout = timeout,
timeout_error_message = f"Page did not finish loading within {timeout} seconds.")
async def web_text(self, selector_type:By, selector_value:str, *, parent:Element = None, timeout:int | float = 5) -> str:
return str(await (await self.web_find(selector_type, selector_value, parent = parent, timeout = timeout)).apply("""
function (elem) {
let sel = window.getSelection()
sel.removeAllRanges()
let range = document.createRange()
range.selectNode(elem)
sel.addRange(range)
let visibleText = sel.toString().trim()
sel.removeAllRanges()
return visibleText
}
"""))
async def web_sleep(self, min_ms:int = 1000, max_ms:int = 2500) -> None:
duration = max_ms <= min_ms and min_ms or secrets.randbelow(max_ms - min_ms) + min_ms
LOG.log(logging.INFO if duration > 1500 else logging.DEBUG, " ... pausing for %d ms ...", duration)
await self.page.sleep(duration / 1000)
async def web_request(self, url:str, method:str = "GET", valid_response_codes:int | Iterable[int] = 200,
headers:dict[str, str] | None = None) -> dict[str, Any]:
method = method.upper()
LOG.debug(" -> HTTP %s [%s]...", method, url)
response = cast(dict[str, Any], await self.page.evaluate(f"""
fetch("{url}", {{
method: "{method}",
redirect: "follow",
headers: {headers or {}}
}})
.then(response => response.text().then(responseText => {{
headers = {{}};
response.headers.forEach((v, k) => headers[k] = v);
return {{
statusCode: response.status,
statusMessage: response.statusText,
headers: headers,
content: responseText
}}
}}))
""", await_promise=True))
if isinstance(valid_response_codes, int):
valid_response_codes = [valid_response_codes]
ensure(
response["statusCode"] in valid_response_codes,
f'Invalid response "{response["statusCode"]} response["statusMessage"]" received for HTTP {method} to {url}'
)
return response
# pylint: enable=dangerous-default-value
async def web_scroll_page_down(self, scroll_length: int = 10, scroll_speed: int = 10000, scroll_back_top: bool = False) -> None:
"""
Smoothly scrolls the current web page down.
:param scroll_length: the length of a single scroll iteration, determines smoothness of scrolling, lower is smoother
:param scroll_speed: the speed of scrolling, higher is faster
:param scroll_back_top: whether to scroll the page back to the top after scrolling to the bottom
"""
current_y_pos = 0
bottom_y_pos: int = await self.web_execute('document.body.scrollHeight') # get bottom position
while current_y_pos < bottom_y_pos: # scroll in steps until bottom reached
current_y_pos += scroll_length
await self.web_execute(f'window.scrollTo(0, {current_y_pos})') # scroll one step
time.sleep(scroll_length / scroll_speed)
if scroll_back_top: # scroll back to top in same style
while current_y_pos > 0:
current_y_pos -= scroll_length
await self.web_execute(f'window.scrollTo(0, {current_y_pos})')
time.sleep(scroll_length / scroll_speed / 2) # double speed
async def web_select(self, selector_type:By, selector_value:str, selected_value:Any, timeout:int | float = 5) -> Element:
"""
Selects an <option/> of a <select/> HTML element.
:param timeout: timeout in seconds
:raises TimeoutError: if element could not be found within time
:raises UnexpectedTagNameException: if element is not a <select> element
"""
await self.web_await(
lambda: self.web_check(selector_type, selector_value, Is.CLICKABLE), timeout = timeout,
timeout_error_message = f"No clickable HTML element with selector: {selector_type}='{selector_value}' found"
)
elem = await self.web_find(selector_type, selector_value)
await elem.apply(f"""
function (element) {{
for(let i=0; i < element.options.length; i++)
{{
if(element.options[i].value == "{selected_value}") {{
element.selectedIndex = i;
break;
}}
}}
throw new Error("Option with value {selected_value} not found.");
}}
""")
await self.web_sleep()
return elem

View File

@@ -1,22 +0,0 @@
"""
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import pytest
from kleinanzeigen_bot.selenium_mixin import SeleniumMixin
from kleinanzeigen_bot import utils
@pytest.mark.itest
def test_webdriver_auto_init():
selenium_mixin = SeleniumMixin()
selenium_mixin.browser_config.arguments = ["--no-sandbox"]
browser_path = selenium_mixin.get_compatible_browser()
utils.ensure(browser_path is not None, "Browser not auto-detected")
selenium_mixin.webdriver = None
selenium_mixin.create_webdriver_session()
selenium_mixin.webdriver.quit()

View File

@@ -3,12 +3,11 @@ SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/ SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
""" """
import os, sys, time
import pytest import pytest
from kleinanzeigen_bot import utils from kleinanzeigen_bot import utils
def test_ensure(): def test_ensure() -> None:
utils.ensure(True, "TRUE") utils.ensure(True, "TRUE")
utils.ensure("Some Value", "TRUE") utils.ensure("Some Value", "TRUE")
utils.ensure(123, "TRUE") utils.ensure(123, "TRUE")
@@ -29,13 +28,3 @@ def test_ensure():
with pytest.raises(AssertionError): with pytest.raises(AssertionError):
utils.ensure(lambda: False, "FALSE", timeout = 2) utils.ensure(lambda: False, "FALSE", timeout = 2)
def test_pause():
start = time.time()
utils.pause(100, 100)
elapsed = 1000 * (time.time() - start)
if sys.platform == "darwin" and os.getenv("GITHUB_ACTIONS", "true") == "true":
assert 99 < elapsed < 300
else:
assert 99 < elapsed < 120

View File

@@ -0,0 +1,41 @@
"""
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
"""
import logging, os, time
from typing import Any
import nodriver, pytest
from flaky import flaky
from kleinanzeigen_bot.web_scraping_mixin import WebScrapingMixin
from kleinanzeigen_bot.utils import ensure
if os.environ.get("CI"):
logging.getLogger("kleinanzeigen_bot").setLevel(logging.DEBUG)
logging.getLogger("nodriver").setLevel(logging.DEBUG)
def delay_rerun(*args:Any) -> bool: # pylint: disable=unused-argument
time.sleep(5)
return True
async def atest_init() -> None:
web_scraping_mixin = WebScrapingMixin()
browser_path = web_scraping_mixin.get_compatible_browser()
ensure(browser_path is not None, "Browser not auto-detected")
web_scraping_mixin.close_browser_session()
try:
await web_scraping_mixin.create_browser_session()
finally:
web_scraping_mixin.close_browser_session()
@flaky(max_runs = 3, min_passes = 1, rerun_filter = delay_rerun) # type: ignore[misc] # mypy
@pytest.mark.itest
def test_init() -> None:
nodriver.loop().run_until_complete(atest_init())