mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 10:31:50 +01:00
replace selenium with nodriver
This commit is contained in:
@@ -20,7 +20,7 @@
|
|||||||
|
|
||||||
It is the spiritual successor to [Second-Hand-Friends/ebayKleinanzeigen](https://github.com/Second-Hand-Friends/ebayKleinanzeigen) with the following advantages:
|
It is the spiritual successor to [Second-Hand-Friends/ebayKleinanzeigen](https://github.com/Second-Hand-Friends/ebayKleinanzeigen) with the following advantages:
|
||||||
- supports Microsoft Edge browser (Chromium based)
|
- supports Microsoft Edge browser (Chromium based)
|
||||||
- compatible chromedriver is installed automatically
|
- does not require selenium and chromedrivers
|
||||||
- better captcha handling
|
- better captcha handling
|
||||||
- config:
|
- config:
|
||||||
- use YAML or JSON for config files
|
- use YAML or JSON for config files
|
||||||
@@ -29,7 +29,7 @@ It is the spiritual successor to [Second-Hand-Friends/ebayKleinanzeigen](https:/
|
|||||||
- reference categories by name (looked up from [categories.yaml](https://github.com/Second-Hand-Friends/kleinanzeigen-bot/blob/main/kleinanzeigen_bot/resources/categories.yaml))
|
- reference categories by name (looked up from [categories.yaml](https://github.com/Second-Hand-Friends/kleinanzeigen-bot/blob/main/kleinanzeigen_bot/resources/categories.yaml))
|
||||||
- logging is configurable and colorized
|
- logging is configurable and colorized
|
||||||
- provided as self-contained executable for Windows, Linux and macOS
|
- provided as self-contained executable for Windows, Linux and macOS
|
||||||
- source code is pylint checked and uses Python type hints
|
- source code is pylint/bandit/mypy checked and uses Python type hints
|
||||||
- CI builds
|
- CI builds
|
||||||
|
|
||||||
|
|
||||||
@@ -290,7 +290,7 @@ description: # can be multiline, see syntax here https://yaml-multiline.info/
|
|||||||
# or category ID (e.g. 161/27)
|
# or category ID (e.g. 161/27)
|
||||||
category: Notebooks
|
category: Notebooks
|
||||||
|
|
||||||
price:
|
price: # without decimals, e.g. 75
|
||||||
price_type: # one of: FIXED, NEGOTIABLE, GIVE_AWAY
|
price_type: # one of: FIXED, NEGOTIABLE, GIVE_AWAY
|
||||||
|
|
||||||
special_attributes:
|
special_attributes:
|
||||||
|
|||||||
476
pdm.lock
generated
476
pdm.lock
generated
@@ -5,7 +5,7 @@
|
|||||||
groups = ["default", "dev"]
|
groups = ["default", "dev"]
|
||||||
strategy = ["cross_platform"]
|
strategy = ["cross_platform"]
|
||||||
lock_version = "4.4.1"
|
lock_version = "4.4.1"
|
||||||
content_hash = "sha256:66695736c39c00414bd1e0c1d85a95957e00e058699d53eee716d6822214668f"
|
content_hash = "sha256:9174562d901578582fe3f3a087416ba5f2def90f12ac79407c2e8c6a93e8b8ca"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "altgraph"
|
name = "altgraph"
|
||||||
@@ -16,16 +16,6 @@ files = [
|
|||||||
{file = "altgraph-0.17.4.tar.gz", hash = "sha256:1b5afbb98f6c4dcadb2e2ae6ab9fa994bbb8c1d75f4fa96d340f9437ae454406"},
|
{file = "altgraph-0.17.4.tar.gz", hash = "sha256:1b5afbb98f6c4dcadb2e2ae6ab9fa994bbb8c1d75f4fa96d340f9437ae454406"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "annotated-types"
|
|
||||||
version = "0.6.0"
|
|
||||||
requires_python = ">=3.8"
|
|
||||||
summary = "Reusable constraint types to use with typing.Annotated"
|
|
||||||
files = [
|
|
||||||
{file = "annotated_types-0.6.0-py3-none-any.whl", hash = "sha256:0641064de18ba7a25dee8f96403ebc39113d0cb953a01429249d5c7564666a43"},
|
|
||||||
{file = "annotated_types-0.6.0.tar.gz", hash = "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "astroid"
|
name = "astroid"
|
||||||
version = "3.1.0"
|
version = "3.1.0"
|
||||||
@@ -39,16 +29,6 @@ files = [
|
|||||||
{file = "astroid-3.1.0.tar.gz", hash = "sha256:ac248253bfa4bd924a0de213707e7ebeeb3138abeb48d798784ead1e56d419d4"},
|
{file = "astroid-3.1.0.tar.gz", hash = "sha256:ac248253bfa4bd924a0de213707e7ebeeb3138abeb48d798784ead1e56d419d4"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "attrs"
|
|
||||||
version = "23.2.0"
|
|
||||||
requires_python = ">=3.7"
|
|
||||||
summary = "Classes Without Boilerplate"
|
|
||||||
files = [
|
|
||||||
{file = "attrs-23.2.0-py3-none-any.whl", hash = "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1"},
|
|
||||||
{file = "attrs-23.2.0.tar.gz", hash = "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "autopep8"
|
name = "autopep8"
|
||||||
version = "2.0.4"
|
version = "2.0.4"
|
||||||
@@ -89,60 +69,6 @@ files = [
|
|||||||
{file = "bracex-2.4.tar.gz", hash = "sha256:a27eaf1df42cf561fed58b7a8f3fdf129d1ea16a81e1fadd1d17989bc6384beb"},
|
{file = "bracex-2.4.tar.gz", hash = "sha256:a27eaf1df42cf561fed58b7a8f3fdf129d1ea16a81e1fadd1d17989bc6384beb"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "certifi"
|
|
||||||
version = "2024.2.2"
|
|
||||||
requires_python = ">=3.6"
|
|
||||||
summary = "Python package for providing Mozilla's CA Bundle."
|
|
||||||
files = [
|
|
||||||
{file = "certifi-2024.2.2-py3-none-any.whl", hash = "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1"},
|
|
||||||
{file = "certifi-2024.2.2.tar.gz", hash = "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "cffi"
|
|
||||||
version = "1.16.0"
|
|
||||||
requires_python = ">=3.8"
|
|
||||||
summary = "Foreign Function Interface for Python calling C code."
|
|
||||||
dependencies = [
|
|
||||||
"pycparser",
|
|
||||||
]
|
|
||||||
files = [
|
|
||||||
{file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"},
|
|
||||||
{file = "cffi-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9"},
|
|
||||||
{file = "cffi-1.16.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673"},
|
|
||||||
{file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896"},
|
|
||||||
{file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684"},
|
|
||||||
{file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7"},
|
|
||||||
{file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614"},
|
|
||||||
{file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743"},
|
|
||||||
{file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d"},
|
|
||||||
{file = "cffi-1.16.0-cp310-cp310-win32.whl", hash = "sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a"},
|
|
||||||
{file = "cffi-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1"},
|
|
||||||
{file = "cffi-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404"},
|
|
||||||
{file = "cffi-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417"},
|
|
||||||
{file = "cffi-1.16.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627"},
|
|
||||||
{file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936"},
|
|
||||||
{file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d"},
|
|
||||||
{file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56"},
|
|
||||||
{file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e"},
|
|
||||||
{file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc"},
|
|
||||||
{file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb"},
|
|
||||||
{file = "cffi-1.16.0-cp311-cp311-win32.whl", hash = "sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab"},
|
|
||||||
{file = "cffi-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba"},
|
|
||||||
{file = "cffi-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956"},
|
|
||||||
{file = "cffi-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e"},
|
|
||||||
{file = "cffi-1.16.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e"},
|
|
||||||
{file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2"},
|
|
||||||
{file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357"},
|
|
||||||
{file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6"},
|
|
||||||
{file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969"},
|
|
||||||
{file = "cffi-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520"},
|
|
||||||
{file = "cffi-1.16.0-cp312-cp312-win32.whl", hash = "sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b"},
|
|
||||||
{file = "cffi-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235"},
|
|
||||||
{file = "cffi-1.16.0.tar.gz", hash = "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "colorama"
|
name = "colorama"
|
||||||
version = "0.4.6"
|
version = "0.4.6"
|
||||||
@@ -166,6 +92,19 @@ files = [
|
|||||||
{file = "coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0"},
|
{file = "coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "deprecated"
|
||||||
|
version = "1.2.14"
|
||||||
|
requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
||||||
|
summary = "Python @deprecated decorator to deprecate old python classes, functions or methods."
|
||||||
|
dependencies = [
|
||||||
|
"wrapt<2,>=1.10",
|
||||||
|
]
|
||||||
|
files = [
|
||||||
|
{file = "Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c"},
|
||||||
|
{file = "Deprecated-1.2.14.tar.gz", hash = "sha256:e5323eb936458dccc2582dc6f9c322c852a775a27065ff2b0c4970b9d53d01b3"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "dill"
|
name = "dill"
|
||||||
version = "0.3.8"
|
version = "0.3.8"
|
||||||
@@ -187,13 +126,13 @@ files = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "h11"
|
name = "flaky"
|
||||||
version = "0.14.0"
|
version = "3.7.0"
|
||||||
requires_python = ">=3.7"
|
requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
||||||
summary = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
|
summary = "Plugin for nose or pytest that automatically reruns flaky tests."
|
||||||
files = [
|
files = [
|
||||||
{file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
|
{file = "flaky-3.7.0-py2.py3-none-any.whl", hash = "sha256:d6eda73cab5ae7364504b7c44670f70abed9e75f77dd116352f662817592ec9c"},
|
||||||
{file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
|
{file = "flaky-3.7.0.tar.gz", hash = "sha256:3ad100780721a1911f57a165809b7ea265a7863305acb66708220820caf8aa0d"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -209,16 +148,6 @@ files = [
|
|||||||
{file = "humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc"},
|
{file = "humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "idna"
|
|
||||||
version = "3.6"
|
|
||||||
requires_python = ">=3.5"
|
|
||||||
summary = "Internationalized Domain Names in Applications (IDNA)"
|
|
||||||
files = [
|
|
||||||
{file = "idna-3.6-py3-none-any.whl", hash = "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f"},
|
|
||||||
{file = "idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "iniconfig"
|
name = "iniconfig"
|
||||||
version = "2.0.0"
|
version = "2.0.0"
|
||||||
@@ -284,6 +213,16 @@ files = [
|
|||||||
{file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
|
{file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mss"
|
||||||
|
version = "9.0.1"
|
||||||
|
requires_python = ">=3.8"
|
||||||
|
summary = "An ultra fast cross-platform multiple screenshots module in pure python using ctypes."
|
||||||
|
files = [
|
||||||
|
{file = "mss-9.0.1-py3-none-any.whl", hash = "sha256:7ee44db7ab14cbea6a3eb63813c57d677a109ca5979d3b76046e4bddd3ca1a0b"},
|
||||||
|
{file = "mss-9.0.1.tar.gz", hash = "sha256:6eb7b9008cf27428811fa33aeb35f3334db81e3f7cc2dd49ec7c6e5a94b39f12"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mypy"
|
name = "mypy"
|
||||||
version = "1.8.0"
|
version = "1.8.0"
|
||||||
@@ -325,16 +264,31 @@ files = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "outcome"
|
name = "nodeenv"
|
||||||
version = "1.3.0.post0"
|
version = "1.8.0"
|
||||||
requires_python = ">=3.7"
|
requires_python = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*"
|
||||||
summary = "Capture the outcome of Python function calls."
|
summary = "Node.js virtual environment builder"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"attrs>=19.2.0",
|
"setuptools",
|
||||||
]
|
]
|
||||||
files = [
|
files = [
|
||||||
{file = "outcome-1.3.0.post0-py2.py3-none-any.whl", hash = "sha256:e771c5ce06d1415e356078d3bdd68523f284b4ce5419828922b6871e65eda82b"},
|
{file = "nodeenv-1.8.0-py2.py3-none-any.whl", hash = "sha256:df865724bb3c3adc86b3876fa209771517b0cfe596beff01a92700e0e8be4cec"},
|
||||||
{file = "outcome-1.3.0.post0.tar.gz", hash = "sha256:9dcf02e65f2971b80047b377468e72a268e15c0af3cf1238e6ff14f7f91143b8"},
|
{file = "nodeenv-1.8.0.tar.gz", hash = "sha256:d51e0c37e64fbf47d017feac3145cdbb58836d7eee8c6f6d3b6880c5456227d2"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "nodriver"
|
||||||
|
version = "0.27rc1"
|
||||||
|
requires_python = ">=3.9"
|
||||||
|
summary = "* Official successor of Undetected Chromedriver"
|
||||||
|
dependencies = [
|
||||||
|
"deprecated",
|
||||||
|
"mss",
|
||||||
|
"websockets>=11",
|
||||||
|
]
|
||||||
|
files = [
|
||||||
|
{file = "nodriver-0.27rc1-py3-none-any.whl", hash = "sha256:d7e858417347628e53fc5bd3692b0608ac403ec9dc5027a3e6f27dce1074052e"},
|
||||||
|
{file = "nodriver-0.27rc1.tar.gz", hash = "sha256:f0b1019a07cc3da0386d363cf5e7f6c96a8fd10de76566d201f45b7fe96af7cb"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -422,97 +376,6 @@ files = [
|
|||||||
{file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"},
|
{file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "pycparser"
|
|
||||||
version = "2.21"
|
|
||||||
requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
|
||||||
summary = "C parser in Python"
|
|
||||||
files = [
|
|
||||||
{file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"},
|
|
||||||
{file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "pydantic"
|
|
||||||
version = "2.6.3"
|
|
||||||
requires_python = ">=3.8"
|
|
||||||
summary = "Data validation using Python type hints"
|
|
||||||
dependencies = [
|
|
||||||
"annotated-types>=0.4.0",
|
|
||||||
"pydantic-core==2.16.3",
|
|
||||||
"typing-extensions>=4.6.1",
|
|
||||||
]
|
|
||||||
files = [
|
|
||||||
{file = "pydantic-2.6.3-py3-none-any.whl", hash = "sha256:72c6034df47f46ccdf81869fddb81aade68056003900a8724a4f160700016a2a"},
|
|
||||||
{file = "pydantic-2.6.3.tar.gz", hash = "sha256:e07805c4c7f5c6826e33a1d4c9d47950d7eaf34868e2690f8594d2e30241f11f"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "pydantic-core"
|
|
||||||
version = "2.16.3"
|
|
||||||
requires_python = ">=3.8"
|
|
||||||
summary = ""
|
|
||||||
dependencies = [
|
|
||||||
"typing-extensions!=4.7.0,>=4.6.0",
|
|
||||||
]
|
|
||||||
files = [
|
|
||||||
{file = "pydantic_core-2.16.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:75b81e678d1c1ede0785c7f46690621e4c6e63ccd9192af1f0bd9d504bbb6bf4"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9c865a7ee6f93783bd5d781af5a4c43dadc37053a5b42f7d18dc019f8c9d2bd1"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:162e498303d2b1c036b957a1278fa0899d02b2842f1ff901b6395104c5554a45"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2f583bd01bbfbff4eaee0868e6fc607efdfcc2b03c1c766b06a707abbc856187"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b926dd38db1519ed3043a4de50214e0d600d404099c3392f098a7f9d75029ff8"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:716b542728d4c742353448765aa7cdaa519a7b82f9564130e2b3f6766018c9ec"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc4ad7f7ee1a13d9cb49d8198cd7d7e3aa93e425f371a68235f784e99741561f"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bd87f48924f360e5d1c5f770d6155ce0e7d83f7b4e10c2f9ec001c73cf475c99"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0df446663464884297c793874573549229f9eca73b59360878f382a0fc085979"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4df8a199d9f6afc5ae9a65f8f95ee52cae389a8c6b20163762bde0426275b7db"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp310-none-win32.whl", hash = "sha256:456855f57b413f077dff513a5a28ed838dbbb15082ba00f80750377eed23d132"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp310-none-win_amd64.whl", hash = "sha256:732da3243e1b8d3eab8c6ae23ae6a58548849d2e4a4e03a1924c8ddf71a387cb"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:519ae0312616026bf4cedc0fe459e982734f3ca82ee8c7246c19b650b60a5ee4"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b3992a322a5617ded0a9f23fd06dbc1e4bd7cf39bc4ccf344b10f80af58beacd"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8d62da299c6ecb04df729e4b5c52dc0d53f4f8430b4492b93aa8de1f541c4aac"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2acca2be4bb2f2147ada8cac612f8a98fc09f41c89f87add7256ad27332c2fda"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1b662180108c55dfbf1280d865b2d116633d436cfc0bba82323554873967b340"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e7c6ed0dc9d8e65f24f5824291550139fe6f37fac03788d4580da0d33bc00c97"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a6b1bb0827f56654b4437955555dc3aeeebeddc47c2d7ed575477f082622c49e"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e56f8186d6210ac7ece503193ec84104da7ceb98f68ce18c07282fcc2452e76f"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:936e5db01dd49476fa8f4383c259b8b1303d5dd5fb34c97de194560698cc2c5e"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:33809aebac276089b78db106ee692bdc9044710e26f24a9a2eaa35a0f9fa70ba"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp311-none-win32.whl", hash = "sha256:ded1c35f15c9dea16ead9bffcde9bb5c7c031bff076355dc58dcb1cb436c4721"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp311-none-win_amd64.whl", hash = "sha256:d89ca19cdd0dd5f31606a9329e309d4fcbb3df860960acec32630297d61820df"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp311-none-win_arm64.whl", hash = "sha256:6162f8d2dc27ba21027f261e4fa26f8bcb3cf9784b7f9499466a311ac284b5b9"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:0f56ae86b60ea987ae8bcd6654a887238fd53d1384f9b222ac457070b7ac4cff"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c9bd22a2a639e26171068f8ebb5400ce2c1bc7d17959f60a3b753ae13c632975"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4204e773b4b408062960e65468d5346bdfe139247ee5f1ca2a378983e11388a2"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f651dd19363c632f4abe3480a7c87a9773be27cfe1341aef06e8759599454120"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aaf09e615a0bf98d406657e0008e4a8701b11481840be7d31755dc9f97c44053"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8e47755d8152c1ab5b55928ab422a76e2e7b22b5ed8e90a7d584268dd49e9c6b"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:500960cb3a0543a724a81ba859da816e8cf01b0e6aaeedf2c3775d12ee49cade"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cf6204fe865da605285c34cf1172879d0314ff267b1c35ff59de7154f35fdc2e"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d33dd21f572545649f90c38c227cc8631268ba25c460b5569abebdd0ec5974ca"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:49d5d58abd4b83fb8ce763be7794d09b2f50f10aa65c0f0c1696c677edeb7cbf"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp312-none-win32.whl", hash = "sha256:f53aace168a2a10582e570b7736cc5bef12cae9cf21775e3eafac597e8551fbe"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp312-none-win_amd64.whl", hash = "sha256:0d32576b1de5a30d9a97f300cc6a3f4694c428d956adbc7e6e2f9cad279e45ed"},
|
|
||||||
{file = "pydantic_core-2.16.3-cp312-none-win_arm64.whl", hash = "sha256:ec08be75bb268473677edb83ba71e7e74b43c008e4a7b1907c6d57e940bf34b6"},
|
|
||||||
{file = "pydantic_core-2.16.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:36fa178aacbc277bc6b62a2c3da95226520da4f4e9e206fdf076484363895d2c"},
|
|
||||||
{file = "pydantic_core-2.16.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:dcca5d2bf65c6fb591fff92da03f94cd4f315972f97c21975398bd4bd046854a"},
|
|
||||||
{file = "pydantic_core-2.16.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2a72fb9963cba4cd5793854fd12f4cfee731e86df140f59ff52a49b3552db241"},
|
|
||||||
{file = "pydantic_core-2.16.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b60cc1a081f80a2105a59385b92d82278b15d80ebb3adb200542ae165cd7d183"},
|
|
||||||
{file = "pydantic_core-2.16.3-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cbcc558401de90a746d02ef330c528f2e668c83350f045833543cd57ecead1ad"},
|
|
||||||
{file = "pydantic_core-2.16.3-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:fee427241c2d9fb7192b658190f9f5fd6dfe41e02f3c1489d2ec1e6a5ab1e04a"},
|
|
||||||
{file = "pydantic_core-2.16.3-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f4cb85f693044e0f71f394ff76c98ddc1bc0953e48c061725e540396d5c8a2e1"},
|
|
||||||
{file = "pydantic_core-2.16.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:b29eeb887aa931c2fcef5aa515d9d176d25006794610c264ddc114c053bf96fe"},
|
|
||||||
{file = "pydantic_core-2.16.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a425479ee40ff021f8216c9d07a6a3b54b31c8267c6e17aa88b70d7ebd0e5e5b"},
|
|
||||||
{file = "pydantic_core-2.16.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:5c5cbc703168d1b7a838668998308018a2718c2130595e8e190220238addc96f"},
|
|
||||||
{file = "pydantic_core-2.16.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99b6add4c0b39a513d323d3b93bc173dac663c27b99860dd5bf491b240d26137"},
|
|
||||||
{file = "pydantic_core-2.16.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75f76ee558751746d6a38f89d60b6228fa174e5172d143886af0f85aa306fd89"},
|
|
||||||
{file = "pydantic_core-2.16.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:00ee1c97b5364b84cb0bd82e9bbf645d5e2871fb8c58059d158412fee2d33d8a"},
|
|
||||||
{file = "pydantic_core-2.16.3-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:287073c66748f624be4cef893ef9174e3eb88fe0b8a78dc22e88eca4bc357ca6"},
|
|
||||||
{file = "pydantic_core-2.16.3-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:ed25e1835c00a332cb10c683cd39da96a719ab1dfc08427d476bce41b92531fc"},
|
|
||||||
{file = "pydantic_core-2.16.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:86b3d0033580bd6bbe07590152007275bd7af95f98eaa5bd36f3da219dcd93da"},
|
|
||||||
{file = "pydantic_core-2.16.3.tar.gz", hash = "sha256:1cac689f80a3abab2d3c0048b29eea5751114054f032a941a32de4c852c59cad"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pygments"
|
name = "pygments"
|
||||||
version = "2.17.2"
|
version = "2.17.2"
|
||||||
@@ -598,18 +461,21 @@ files = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pysocks"
|
name = "pyright"
|
||||||
version = "1.7.1"
|
version = "1.1.352"
|
||||||
requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
requires_python = ">=3.7"
|
||||||
summary = "A Python SOCKS client module. See https://github.com/Anorov/PySocks for more information."
|
summary = "Command line wrapper for pyright"
|
||||||
|
dependencies = [
|
||||||
|
"nodeenv>=1.6.0",
|
||||||
|
]
|
||||||
files = [
|
files = [
|
||||||
{file = "PySocks-1.7.1-py3-none-any.whl", hash = "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5"},
|
{file = "pyright-1.1.352-py3-none-any.whl", hash = "sha256:0040cf173c6a60704e553bfd129dfe54de59cc76d0b2b80f77cfab4f50701d64"},
|
||||||
{file = "PySocks-1.7.1.tar.gz", hash = "sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0"},
|
{file = "pyright-1.1.352.tar.gz", hash = "sha256:a621c0dfbcf1291b3610641a07380fefaa1d0e182890a1b2a7f13b446e8109a9"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pytest"
|
name = "pytest"
|
||||||
version = "8.1.0"
|
version = "8.0.2"
|
||||||
requires_python = ">=3.8"
|
requires_python = ">=3.8"
|
||||||
summary = "pytest: simple powerful testing with Python"
|
summary = "pytest: simple powerful testing with Python"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
@@ -617,12 +483,12 @@ dependencies = [
|
|||||||
"exceptiongroup>=1.0.0rc8; python_version < \"3.11\"",
|
"exceptiongroup>=1.0.0rc8; python_version < \"3.11\"",
|
||||||
"iniconfig",
|
"iniconfig",
|
||||||
"packaging",
|
"packaging",
|
||||||
"pluggy<2.0,>=1.4",
|
"pluggy<2.0,>=1.3.0",
|
||||||
"tomli>=1; python_version < \"3.11\"",
|
"tomli>=1.0.0; python_version < \"3.11\"",
|
||||||
]
|
]
|
||||||
files = [
|
files = [
|
||||||
{file = "pytest-8.1.0-py3-none-any.whl", hash = "sha256:ee32db7af8de4629a455806befa90559f307424c07b8413ccfc30bf5b221dd7e"},
|
{file = "pytest-8.0.2-py3-none-any.whl", hash = "sha256:edfaaef32ce5172d5466b5127b42e0d6d35ebbe4453f0e3505d96afd93f6b096"},
|
||||||
{file = "pytest-8.1.0.tar.gz", hash = "sha256:f8fa04ab8f98d185113ae60ea6d79c22f8143b14bc1caeced44a0ab844928323"},
|
{file = "pytest-8.0.2.tar.gz", hash = "sha256:d4051d623a2e0b7e51960ba963193b09ce6daeb9759a451844a21e4ddedfc1bd"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -742,35 +608,6 @@ files = [
|
|||||||
{file = "ruamel.yaml.clib-0.2.8.tar.gz", hash = "sha256:beb2e0404003de9a4cab9753a8805a8fe9320ee6673136ed7f04255fe60bb512"},
|
{file = "ruamel.yaml.clib-0.2.8.tar.gz", hash = "sha256:beb2e0404003de9a4cab9753a8805a8fe9320ee6673136ed7f04255fe60bb512"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "selenium"
|
|
||||||
version = "4.18.1"
|
|
||||||
requires_python = ">=3.8"
|
|
||||||
summary = ""
|
|
||||||
dependencies = [
|
|
||||||
"certifi>=2021.10.8",
|
|
||||||
"trio-websocket~=0.9",
|
|
||||||
"trio~=0.17",
|
|
||||||
"typing-extensions>=4.9.0",
|
|
||||||
"urllib3[socks]<3,>=1.26",
|
|
||||||
]
|
|
||||||
files = [
|
|
||||||
{file = "selenium-4.18.1-py3-none-any.whl", hash = "sha256:b24a3cdd2d47c29832e81345bfcde0c12bb608738013e53c781b211b418df241"},
|
|
||||||
{file = "selenium-4.18.1.tar.gz", hash = "sha256:a11f67afa8bfac6b77e148c987b33f6b14eb1cae4d352722a75de1f26e3f0ae2"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "selenium-stealth"
|
|
||||||
version = "1.0.6"
|
|
||||||
requires_python = ">=3, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
|
||||||
summary = "Trying to make python selenium more stealthy."
|
|
||||||
dependencies = [
|
|
||||||
"selenium",
|
|
||||||
]
|
|
||||||
files = [
|
|
||||||
{file = "selenium_stealth-1.0.6-py3-none-any.whl", hash = "sha256:b62da5452aa4a84f29a4dfb21a9696aff20788a7c570dd0b81bc04a940848b97"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "setuptools"
|
name = "setuptools"
|
||||||
version = "69.1.1"
|
version = "69.1.1"
|
||||||
@@ -781,25 +618,6 @@ files = [
|
|||||||
{file = "setuptools-69.1.1.tar.gz", hash = "sha256:5c0806c7d9af348e6dd3777b4f4dbb42c7ad85b190104837488eab9a7c945cf8"},
|
{file = "setuptools-69.1.1.tar.gz", hash = "sha256:5c0806c7d9af348e6dd3777b4f4dbb42c7ad85b190104837488eab9a7c945cf8"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "sniffio"
|
|
||||||
version = "1.3.1"
|
|
||||||
requires_python = ">=3.7"
|
|
||||||
summary = "Sniff out which async library your code is running under"
|
|
||||||
files = [
|
|
||||||
{file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
|
|
||||||
{file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "sortedcontainers"
|
|
||||||
version = "2.4.0"
|
|
||||||
summary = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set"
|
|
||||||
files = [
|
|
||||||
{file = "sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0"},
|
|
||||||
{file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "stevedore"
|
name = "stevedore"
|
||||||
version = "5.2.0"
|
version = "5.2.0"
|
||||||
@@ -843,40 +661,6 @@ files = [
|
|||||||
{file = "tomlkit-0.12.4.tar.gz", hash = "sha256:7ca1cfc12232806517a8515047ba66a19369e71edf2439d0f5824f91032b6cc3"},
|
{file = "tomlkit-0.12.4.tar.gz", hash = "sha256:7ca1cfc12232806517a8515047ba66a19369e71edf2439d0f5824f91032b6cc3"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "trio"
|
|
||||||
version = "0.24.0"
|
|
||||||
requires_python = ">=3.8"
|
|
||||||
summary = "A friendly Python library for async concurrency and I/O"
|
|
||||||
dependencies = [
|
|
||||||
"attrs>=20.1.0",
|
|
||||||
"cffi>=1.14; os_name == \"nt\" and implementation_name != \"pypy\"",
|
|
||||||
"exceptiongroup; python_version < \"3.11\"",
|
|
||||||
"idna",
|
|
||||||
"outcome",
|
|
||||||
"sniffio>=1.3.0",
|
|
||||||
"sortedcontainers",
|
|
||||||
]
|
|
||||||
files = [
|
|
||||||
{file = "trio-0.24.0-py3-none-any.whl", hash = "sha256:c3bd3a4e3e3025cd9a2241eae75637c43fe0b9e88b4c97b9161a55b9e54cd72c"},
|
|
||||||
{file = "trio-0.24.0.tar.gz", hash = "sha256:ffa09a74a6bf81b84f8613909fb0beaee84757450183a7a2e0b47b455c0cac5d"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "trio-websocket"
|
|
||||||
version = "0.11.1"
|
|
||||||
requires_python = ">=3.7"
|
|
||||||
summary = "WebSocket library for Trio"
|
|
||||||
dependencies = [
|
|
||||||
"exceptiongroup; python_version < \"3.11\"",
|
|
||||||
"trio>=0.11",
|
|
||||||
"wsproto>=0.14",
|
|
||||||
]
|
|
||||||
files = [
|
|
||||||
{file = "trio-websocket-0.11.1.tar.gz", hash = "sha256:18c11793647703c158b1f6e62de638acada927344d534e3c7628eedcb746839f"},
|
|
||||||
{file = "trio_websocket-0.11.1-py3-none-any.whl", hash = "sha256:520d046b0d030cf970b8b2b2e00c4c2245b3807853ecd44214acd33d74581638"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "typing-extensions"
|
name = "typing-extensions"
|
||||||
version = "4.10.0"
|
version = "4.10.0"
|
||||||
@@ -887,31 +671,6 @@ files = [
|
|||||||
{file = "typing_extensions-4.10.0.tar.gz", hash = "sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb"},
|
{file = "typing_extensions-4.10.0.tar.gz", hash = "sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "urllib3"
|
|
||||||
version = "2.2.1"
|
|
||||||
requires_python = ">=3.8"
|
|
||||||
summary = "HTTP library with thread-safe connection pooling, file post, and more."
|
|
||||||
files = [
|
|
||||||
{file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"},
|
|
||||||
{file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "urllib3"
|
|
||||||
version = "2.2.1"
|
|
||||||
extras = ["socks"]
|
|
||||||
requires_python = ">=3.8"
|
|
||||||
summary = "HTTP library with thread-safe connection pooling, file post, and more."
|
|
||||||
dependencies = [
|
|
||||||
"pysocks!=1.5.7,<2.0,>=1.5.6",
|
|
||||||
"urllib3==2.2.1",
|
|
||||||
]
|
|
||||||
files = [
|
|
||||||
{file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"},
|
|
||||||
{file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wcmatch"
|
name = "wcmatch"
|
||||||
version = "8.5.1"
|
version = "8.5.1"
|
||||||
@@ -926,14 +685,99 @@ files = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wsproto"
|
name = "websockets"
|
||||||
version = "1.2.0"
|
version = "12.0"
|
||||||
requires_python = ">=3.7.0"
|
requires_python = ">=3.8"
|
||||||
summary = "WebSockets state-machine based protocol implementation"
|
summary = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)"
|
||||||
dependencies = [
|
|
||||||
"h11<1,>=0.9.0",
|
|
||||||
]
|
|
||||||
files = [
|
files = [
|
||||||
{file = "wsproto-1.2.0-py3-none-any.whl", hash = "sha256:b9acddd652b585d75b20477888c56642fdade28bdfd3579aa24a4d2c037dd736"},
|
{file = "websockets-12.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d554236b2a2006e0ce16315c16eaa0d628dab009c33b63ea03f41c6107958374"},
|
||||||
{file = "wsproto-1.2.0.tar.gz", hash = "sha256:ad565f26ecb92588a3e43bc3d96164de84cd9902482b130d0ddbaa9664a85065"},
|
{file = "websockets-12.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2d225bb6886591b1746b17c0573e29804619c8f755b5598d875bb4235ea639be"},
|
||||||
|
{file = "websockets-12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:eb809e816916a3b210bed3c82fb88eaf16e8afcf9c115ebb2bacede1797d2547"},
|
||||||
|
{file = "websockets-12.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c588f6abc13f78a67044c6b1273a99e1cf31038ad51815b3b016ce699f0d75c2"},
|
||||||
|
{file = "websockets-12.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5aa9348186d79a5f232115ed3fa9020eab66d6c3437d72f9d2c8ac0c6858c558"},
|
||||||
|
{file = "websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6350b14a40c95ddd53e775dbdbbbc59b124a5c8ecd6fbb09c2e52029f7a9f480"},
|
||||||
|
{file = "websockets-12.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:70ec754cc2a769bcd218ed8d7209055667b30860ffecb8633a834dde27d6307c"},
|
||||||
|
{file = "websockets-12.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6e96f5ed1b83a8ddb07909b45bd94833b0710f738115751cdaa9da1fb0cb66e8"},
|
||||||
|
{file = "websockets-12.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4d87be612cbef86f994178d5186add3d94e9f31cc3cb499a0482b866ec477603"},
|
||||||
|
{file = "websockets-12.0-cp310-cp310-win32.whl", hash = "sha256:befe90632d66caaf72e8b2ed4d7f02b348913813c8b0a32fae1cc5fe3730902f"},
|
||||||
|
{file = "websockets-12.0-cp310-cp310-win_amd64.whl", hash = "sha256:363f57ca8bc8576195d0540c648aa58ac18cf85b76ad5202b9f976918f4219cf"},
|
||||||
|
{file = "websockets-12.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5d873c7de42dea355d73f170be0f23788cf3fa9f7bed718fd2830eefedce01b4"},
|
||||||
|
{file = "websockets-12.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3f61726cae9f65b872502ff3c1496abc93ffbe31b278455c418492016e2afc8f"},
|
||||||
|
{file = "websockets-12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ed2fcf7a07334c77fc8a230755c2209223a7cc44fc27597729b8ef5425aa61a3"},
|
||||||
|
{file = "websockets-12.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e332c210b14b57904869ca9f9bf4ca32f5427a03eeb625da9b616c85a3a506c"},
|
||||||
|
{file = "websockets-12.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5693ef74233122f8ebab026817b1b37fe25c411ecfca084b29bc7d6efc548f45"},
|
||||||
|
{file = "websockets-12.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e9e7db18b4539a29cc5ad8c8b252738a30e2b13f033c2d6e9d0549b45841c04"},
|
||||||
|
{file = "websockets-12.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6e2df67b8014767d0f785baa98393725739287684b9f8d8a1001eb2839031447"},
|
||||||
|
{file = "websockets-12.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:bea88d71630c5900690fcb03161ab18f8f244805c59e2e0dc4ffadae0a7ee0ca"},
|
||||||
|
{file = "websockets-12.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dff6cdf35e31d1315790149fee351f9e52978130cef6c87c4b6c9b3baf78bc53"},
|
||||||
|
{file = "websockets-12.0-cp311-cp311-win32.whl", hash = "sha256:3e3aa8c468af01d70332a382350ee95f6986db479ce7af14d5e81ec52aa2b402"},
|
||||||
|
{file = "websockets-12.0-cp311-cp311-win_amd64.whl", hash = "sha256:25eb766c8ad27da0f79420b2af4b85d29914ba0edf69f547cc4f06ca6f1d403b"},
|
||||||
|
{file = "websockets-12.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0e6e2711d5a8e6e482cacb927a49a3d432345dfe7dea8ace7b5790df5932e4df"},
|
||||||
|
{file = "websockets-12.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:dbcf72a37f0b3316e993e13ecf32f10c0e1259c28ffd0a85cee26e8549595fbc"},
|
||||||
|
{file = "websockets-12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12743ab88ab2af1d17dd4acb4645677cb7063ef4db93abffbf164218a5d54c6b"},
|
||||||
|
{file = "websockets-12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b645f491f3c48d3f8a00d1fce07445fab7347fec54a3e65f0725d730d5b99cb"},
|
||||||
|
{file = "websockets-12.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9893d1aa45a7f8b3bc4510f6ccf8db8c3b62120917af15e3de247f0780294b92"},
|
||||||
|
{file = "websockets-12.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f38a7b376117ef7aff996e737583172bdf535932c9ca021746573bce40165ed"},
|
||||||
|
{file = "websockets-12.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:f764ba54e33daf20e167915edc443b6f88956f37fb606449b4a5b10ba42235a5"},
|
||||||
|
{file = "websockets-12.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:1e4b3f8ea6a9cfa8be8484c9221ec0257508e3a1ec43c36acdefb2a9c3b00aa2"},
|
||||||
|
{file = "websockets-12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9fdf06fd06c32205a07e47328ab49c40fc1407cdec801d698a7c41167ea45113"},
|
||||||
|
{file = "websockets-12.0-cp312-cp312-win32.whl", hash = "sha256:baa386875b70cbd81798fa9f71be689c1bf484f65fd6fb08d051a0ee4e79924d"},
|
||||||
|
{file = "websockets-12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ae0a5da8f35a5be197f328d4727dbcfafa53d1824fac3d96cdd3a642fe09394f"},
|
||||||
|
{file = "websockets-12.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:248d8e2446e13c1d4326e0a6a4e9629cb13a11195051a73acf414812700badbd"},
|
||||||
|
{file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f44069528d45a933997a6fef143030d8ca8042f0dfaad753e2906398290e2870"},
|
||||||
|
{file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c4e37d36f0d19f0a4413d3e18c0d03d0c268ada2061868c1e6f5ab1a6d575077"},
|
||||||
|
{file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d829f975fc2e527a3ef2f9c8f25e553eb7bc779c6665e8e1d52aa22800bb38b"},
|
||||||
|
{file = "websockets-12.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:2c71bd45a777433dd9113847af751aae36e448bc6b8c361a566cb043eda6ec30"},
|
||||||
|
{file = "websockets-12.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0bee75f400895aef54157b36ed6d3b308fcab62e5260703add87f44cee9c82a6"},
|
||||||
|
{file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:423fc1ed29f7512fceb727e2d2aecb952c46aa34895e9ed96071821309951123"},
|
||||||
|
{file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:27a5e9964ef509016759f2ef3f2c1e13f403725a5e6a1775555994966a66e931"},
|
||||||
|
{file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3181df4583c4d3994d31fb235dc681d2aaad744fbdbf94c4802485ececdecf2"},
|
||||||
|
{file = "websockets-12.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:b067cb952ce8bf40115f6c19f478dc71c5e719b7fbaa511359795dfd9d1a6468"},
|
||||||
|
{file = "websockets-12.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:00700340c6c7ab788f176d118775202aadea7602c5cc6be6ae127761c16d6b0b"},
|
||||||
|
{file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e469d01137942849cff40517c97a30a93ae79917752b34029f0ec72df6b46399"},
|
||||||
|
{file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffefa1374cd508d633646d51a8e9277763a9b78ae71324183693959cf94635a7"},
|
||||||
|
{file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba0cab91b3956dfa9f512147860783a1829a8d905ee218a9837c18f683239611"},
|
||||||
|
{file = "websockets-12.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2cb388a5bfb56df4d9a406783b7f9dbefb888c09b71629351cc6b036e9259370"},
|
||||||
|
{file = "websockets-12.0-py3-none-any.whl", hash = "sha256:dc284bbc8d7c78a6c69e0c7325ab46ee5e40bb4d50e494d8131a07ef47500e9e"},
|
||||||
|
{file = "websockets-12.0.tar.gz", hash = "sha256:81df9cbcbb6c260de1e007e58c011bfebe2dafc8435107b0537f393dd38c8b1b"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wrapt"
|
||||||
|
version = "1.16.0"
|
||||||
|
requires_python = ">=3.6"
|
||||||
|
summary = "Module for decorators, wrappers and monkey patching."
|
||||||
|
files = [
|
||||||
|
{file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"},
|
||||||
|
{file = "wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020"},
|
||||||
|
{file = "wrapt-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb2dee3874a500de01c93d5c71415fcaef1d858370d405824783e7a8ef5db440"},
|
||||||
|
{file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a88e6010048489cda82b1326889ec075a8c856c2e6a256072b28eaee3ccf487"},
|
||||||
|
{file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac83a914ebaf589b69f7d0a1277602ff494e21f4c2f743313414378f8f50a4cf"},
|
||||||
|
{file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:73aa7d98215d39b8455f103de64391cb79dfcad601701a3aa0dddacf74911d72"},
|
||||||
|
{file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:807cc8543a477ab7422f1120a217054f958a66ef7314f76dd9e77d3f02cdccd0"},
|
||||||
|
{file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bf5703fdeb350e36885f2875d853ce13172ae281c56e509f4e6eca049bdfb136"},
|
||||||
|
{file = "wrapt-1.16.0-cp310-cp310-win32.whl", hash = "sha256:f6b2d0c6703c988d334f297aa5df18c45e97b0af3679bb75059e0e0bd8b1069d"},
|
||||||
|
{file = "wrapt-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:decbfa2f618fa8ed81c95ee18a387ff973143c656ef800c9f24fb7e9c16054e2"},
|
||||||
|
{file = "wrapt-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a5db485fe2de4403f13fafdc231b0dbae5eca4359232d2efc79025527375b09"},
|
||||||
|
{file = "wrapt-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75ea7d0ee2a15733684badb16de6794894ed9c55aa5e9903260922f0482e687d"},
|
||||||
|
{file = "wrapt-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a452f9ca3e3267cd4d0fcf2edd0d035b1934ac2bd7e0e57ac91ad6b95c0c6389"},
|
||||||
|
{file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:43aa59eadec7890d9958748db829df269f0368521ba6dc68cc172d5d03ed8060"},
|
||||||
|
{file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72554a23c78a8e7aa02abbd699d129eead8b147a23c56e08d08dfc29cfdddca1"},
|
||||||
|
{file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d2efee35b4b0a347e0d99d28e884dfd82797852d62fcd7ebdeee26f3ceb72cf3"},
|
||||||
|
{file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:6dcfcffe73710be01d90cae08c3e548d90932d37b39ef83969ae135d36ef3956"},
|
||||||
|
{file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:eb6e651000a19c96f452c85132811d25e9264d836951022d6e81df2fff38337d"},
|
||||||
|
{file = "wrapt-1.16.0-cp311-cp311-win32.whl", hash = "sha256:66027d667efe95cc4fa945af59f92c5a02c6f5bb6012bff9e60542c74c75c362"},
|
||||||
|
{file = "wrapt-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:aefbc4cb0a54f91af643660a0a150ce2c090d3652cf4052a5397fb2de549cd89"},
|
||||||
|
{file = "wrapt-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5eb404d89131ec9b4f748fa5cfb5346802e5ee8836f57d516576e61f304f3b7b"},
|
||||||
|
{file = "wrapt-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9090c9e676d5236a6948330e83cb89969f433b1943a558968f659ead07cb3b36"},
|
||||||
|
{file = "wrapt-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94265b00870aa407bd0cbcfd536f17ecde43b94fb8d228560a1e9d3041462d73"},
|
||||||
|
{file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2058f813d4f2b5e3a9eb2eb3faf8f1d99b81c3e51aeda4b168406443e8ba809"},
|
||||||
|
{file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98b5e1f498a8ca1858a1cdbffb023bfd954da4e3fa2c0cb5853d40014557248b"},
|
||||||
|
{file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:14d7dc606219cdd7405133c713f2c218d4252f2a469003f8c46bb92d5d095d81"},
|
||||||
|
{file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:49aac49dc4782cb04f58986e81ea0b4768e4ff197b57324dcbd7699c5dfb40b9"},
|
||||||
|
{file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:418abb18146475c310d7a6dc71143d6f7adec5b004ac9ce08dc7a34e2babdc5c"},
|
||||||
|
{file = "wrapt-1.16.0-cp312-cp312-win32.whl", hash = "sha256:685f568fa5e627e93f3b52fda002c7ed2fa1800b50ce51f6ed1d572d8ab3e7fc"},
|
||||||
|
{file = "wrapt-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:dcdba5c86e368442528f7060039eda390cc4091bfd1dca41e8046af7c910dda8"},
|
||||||
|
{file = "wrapt-1.16.0-py3-none-any.whl", hash = "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1"},
|
||||||
|
{file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"},
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -10,7 +10,6 @@ from PyInstaller.utils.hooks import collect_data_files
|
|||||||
|
|
||||||
datas = [
|
datas = [
|
||||||
* collect_data_files("kleinanzeigen_bot"), # embeds *.yaml files
|
* collect_data_files("kleinanzeigen_bot"), # embeds *.yaml files
|
||||||
* collect_data_files("selenium_stealth"), # embeds *.js files
|
|
||||||
|
|
||||||
# required to get version info via 'importlib.metadata.version(__package__)'
|
# required to get version info via 'importlib.metadata.version(__package__)'
|
||||||
# but we use https://backend.pdm-project.org/metadata/#writing-dynamic-version-to-file
|
# but we use https://backend.pdm-project.org/metadata/#writing-dynamic-version-to-file
|
||||||
|
|||||||
@@ -32,14 +32,14 @@ classifiers = [ # https://pypi.org/classifiers/
|
|||||||
]
|
]
|
||||||
requires-python = ">=3.10,<3.13" # <3.12 required for pyinstaller
|
requires-python = ">=3.10,<3.13" # <3.12 required for pyinstaller
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"colorama~=0.4",
|
"colorama",
|
||||||
"coloredlogs~=15.0",
|
"coloredlogs",
|
||||||
"overrides~=7.4",
|
"nodriver",
|
||||||
"ruamel.yaml~=0.18",
|
"overrides",
|
||||||
"pywin32==306; sys_platform == 'win32'",
|
"ruamel.yaml",
|
||||||
"selenium~=4.18",
|
"psutil",
|
||||||
"selenium_stealth~=1.0",
|
"pywin32; sys_platform == 'win32'",
|
||||||
"wcmatch~=8.5",
|
"wcmatch",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
@@ -60,25 +60,28 @@ write_template = "__version__ = '{}'\n"
|
|||||||
|
|
||||||
[tool.pdm.dev-dependencies]
|
[tool.pdm.dev-dependencies]
|
||||||
dev = [
|
dev = [
|
||||||
"autopep8~=2.0",
|
"autopep8",
|
||||||
"bandit~=1.7",
|
"bandit",
|
||||||
"toml", # required by bandit
|
"toml", # required by bandit
|
||||||
"tomli", # required by bandit
|
"tomli", # required by bandit
|
||||||
"pydantic~=2.6",
|
|
||||||
"pytest~=8.1",
|
"pytest",
|
||||||
"pyinstaller~=6.4",
|
"flaky", # used by pytest
|
||||||
"psutil",
|
|
||||||
"pylint~=3.1",
|
"pyinstaller",
|
||||||
"mypy~=1.8",
|
|
||||||
|
"pylint",
|
||||||
|
"mypy",
|
||||||
|
"pyright",
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.pdm.scripts] # https://pdm-project.org/latest/usage/scripts/
|
[tool.pdm.scripts] # https://pdm-project.org/latest/usage/scripts/
|
||||||
app = "python -m kleinanzeigen_bot"
|
app = "python -m kleinanzeigen_bot"
|
||||||
compile.cmd = "python -O -m PyInstaller pyinstaller.spec --clean"
|
compile.cmd = "python -O -m PyInstaller pyinstaller.spec --clean"
|
||||||
compile.env = {PYTHONHASHSEED = "1", SOURCE_DATE_EPOCH = "0"} # https://pyinstaller.org/en/stable/advanced-topics.html#creating-a-reproducible-build
|
compile.env = {PYTHONHASHSEED = "1", SOURCE_DATE_EPOCH = "0"} # https://pyinstaller.org/en/stable/advanced-topics.html#creating-a-reproducible-build
|
||||||
format = "autopep8 --recursive --in-place kleinanzeigen_bot tests --verbose"
|
format = "autopep8 --recursive --in-place src tests --verbose"
|
||||||
lint = {shell = "pylint -v src tests && autopep8 -v --exit-code --recursive --diff src tests && echo No issues found."}
|
lint = {shell = "pylint -v src tests && autopep8 -v --exit-code --recursive --diff src tests && mypy" }
|
||||||
scan = "bandit -c pyproject.toml -r kleinanzeigen_bot"
|
scan = "bandit -c pyproject.toml -r src"
|
||||||
test = "python -m pytest --capture=tee-sys -v"
|
test = "python -m pytest --capture=tee-sys -v"
|
||||||
utest = "python -m pytest --capture=tee-sys -v -m 'not itest'"
|
utest = "python -m pytest --capture=tee-sys -v -m 'not itest'"
|
||||||
itest = "python -m pytest --capture=tee-sys -v -m 'itest'"
|
itest = "python -m pytest --capture=tee-sys -v -m 'itest'"
|
||||||
@@ -117,6 +120,7 @@ aggressive = 3
|
|||||||
# https://mypy.readthedocs.io/en/stable/config_file.html
|
# https://mypy.readthedocs.io/en/stable/config_file.html
|
||||||
#mypy_path = "$MYPY_CONFIG_FILE_DIR/tests/stubs"
|
#mypy_path = "$MYPY_CONFIG_FILE_DIR/tests/stubs"
|
||||||
python_version = "3.10"
|
python_version = "3.10"
|
||||||
|
files = "src,tests"
|
||||||
strict = true
|
strict = true
|
||||||
disallow_untyped_calls = false
|
disallow_untyped_calls = false
|
||||||
disallow_untyped_defs = true
|
disallow_untyped_defs = true
|
||||||
@@ -127,6 +131,18 @@ warn_unused_ignores = true
|
|||||||
verbosity = 0
|
verbosity = 0
|
||||||
|
|
||||||
|
|
||||||
|
#####################
|
||||||
|
# pyright
|
||||||
|
# https://github.com/microsoft/pyright/
|
||||||
|
#####################
|
||||||
|
[tool.pyright]
|
||||||
|
# https://microsoft.github.io/pyright/#/configuration?id=main-configuration-options
|
||||||
|
include = ["src", "tests"]
|
||||||
|
defineConstant = { DEBUG = false }
|
||||||
|
pythonVersion = "3.10"
|
||||||
|
typeCheckingMode = "standard"
|
||||||
|
|
||||||
|
|
||||||
#####################
|
#####################
|
||||||
# pylint
|
# pylint
|
||||||
# https://pypi.org/project/pylint/
|
# https://pypi.org/project/pylint/
|
||||||
@@ -190,6 +206,7 @@ disable= [
|
|||||||
"multiple-imports",
|
"multiple-imports",
|
||||||
"multiple-statements",
|
"multiple-statements",
|
||||||
"no-self-use",
|
"no-self-use",
|
||||||
|
"no-member", # pylint cannot find async methods from super class
|
||||||
"too-few-public-methods"
|
"too-few-public-methods"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@@ -3,24 +3,22 @@ SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
|||||||
SPDX-License-Identifier: AGPL-3.0-or-later
|
SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||||
"""
|
"""
|
||||||
import atexit, copy, getopt, importlib.metadata, json, logging, os, re, signal, shutil, sys, textwrap, time, urllib
|
import asyncio, atexit, copy, getopt, importlib.metadata, json, logging, os, re, signal, shutil, sys, textwrap, time
|
||||||
|
import urllib.parse as urllib_parse
|
||||||
|
import urllib.request as urllib_request
|
||||||
from collections.abc import Iterable
|
from collections.abc import Iterable
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from logging.handlers import RotatingFileHandler
|
from logging.handlers import RotatingFileHandler
|
||||||
from typing import Any, Final
|
from typing import Any, Final
|
||||||
|
|
||||||
import certifi, colorama
|
import colorama, nodriver
|
||||||
from overrides import overrides
|
from overrides import overrides
|
||||||
from ruamel.yaml import YAML
|
from ruamel.yaml import YAML
|
||||||
from selenium.common.exceptions import ElementClickInterceptedException, NoSuchElementException, TimeoutException, WebDriverException
|
|
||||||
from selenium.webdriver.common.by import By
|
|
||||||
from selenium.webdriver.remote.webelement import WebElement
|
|
||||||
from selenium.webdriver.support import expected_conditions as EC
|
|
||||||
from wcmatch import glob
|
from wcmatch import glob
|
||||||
|
|
||||||
from . import utils, resources, extract
|
from . import utils, resources, extract
|
||||||
from .utils import abspath, apply_defaults, ensure, is_frozen, pause, pluralize, safe_get, parse_datetime
|
from .utils import abspath, ainput, apply_defaults, ensure, is_frozen, pluralize, safe_get, parse_datetime
|
||||||
from .selenium_mixin import SeleniumMixin
|
from .web_scraping_mixin import By, Element, Page, Is, WebScrapingMixin
|
||||||
from ._version import __version__
|
from ._version import __version__
|
||||||
|
|
||||||
# W0406: possibly a bug, see https://github.com/PyCQA/pylint/issues/3933
|
# W0406: possibly a bug, see https://github.com/PyCQA/pylint/issues/3933
|
||||||
@@ -32,14 +30,10 @@ LOG.setLevel(logging.INFO)
|
|||||||
colorama.init()
|
colorama.init()
|
||||||
|
|
||||||
|
|
||||||
class KleinanzeigenBot(SeleniumMixin):
|
class KleinanzeigenBot(WebScrapingMixin):
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
|
|
||||||
# workaround for https://github.com/Second-Hand-Friends/kleinanzeigen-bot/issues/207
|
|
||||||
# see https://github.com/pyinstaller/pyinstaller/issues/7229#issuecomment-1309383026
|
|
||||||
os.environ["SSL_CERT_FILE"] = certifi.where()
|
|
||||||
|
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
self.root_url = "https://www.kleinanzeigen.de"
|
self.root_url = "https://www.kleinanzeigen.de"
|
||||||
@@ -61,15 +55,14 @@ class KleinanzeigenBot(SeleniumMixin):
|
|||||||
def __del__(self) -> None:
|
def __del__(self) -> None:
|
||||||
if self.file_log:
|
if self.file_log:
|
||||||
LOG_ROOT.removeHandler(self.file_log)
|
LOG_ROOT.removeHandler(self.file_log)
|
||||||
if self.webdriver:
|
self.close_browser_session()
|
||||||
self.webdriver.quit()
|
|
||||||
self.webdriver = None
|
|
||||||
|
|
||||||
def get_version(self) -> str:
|
def get_version(self) -> str:
|
||||||
return __version__
|
return __version__
|
||||||
|
|
||||||
def run(self, args:list[str]) -> None:
|
async def run(self, args:list[str]) -> None:
|
||||||
self.parse_args(args)
|
self.parse_args(args)
|
||||||
|
try:
|
||||||
match self.command:
|
match self.command:
|
||||||
case "help":
|
case "help":
|
||||||
self.show_help()
|
self.show_help()
|
||||||
@@ -91,9 +84,9 @@ class KleinanzeigenBot(SeleniumMixin):
|
|||||||
self.ads_selector = 'due'
|
self.ads_selector = 'due'
|
||||||
|
|
||||||
if ads := self.load_ads():
|
if ads := self.load_ads():
|
||||||
self.create_webdriver_session()
|
await self.create_browser_session()
|
||||||
self.login()
|
await self.login()
|
||||||
self.publish_ads(ads)
|
await self.publish_ads(ads)
|
||||||
else:
|
else:
|
||||||
LOG.info("############################################")
|
LOG.info("############################################")
|
||||||
LOG.info("DONE: No new/outdated ads found.")
|
LOG.info("DONE: No new/outdated ads found.")
|
||||||
@@ -102,9 +95,9 @@ class KleinanzeigenBot(SeleniumMixin):
|
|||||||
self.configure_file_logging()
|
self.configure_file_logging()
|
||||||
self.load_config()
|
self.load_config()
|
||||||
if ads := self.load_ads():
|
if ads := self.load_ads():
|
||||||
self.create_webdriver_session()
|
await self.create_browser_session()
|
||||||
self.login()
|
await self.login()
|
||||||
self.delete_ads(ads)
|
await self.delete_ads(ads)
|
||||||
else:
|
else:
|
||||||
LOG.info("############################################")
|
LOG.info("############################################")
|
||||||
LOG.info("DONE: No ads to delete found.")
|
LOG.info("DONE: No ads to delete found.")
|
||||||
@@ -115,15 +108,16 @@ class KleinanzeigenBot(SeleniumMixin):
|
|||||||
if not (self.ads_selector in {'all', 'new'} or re.compile(r'\d+[,\d+]*').search(self.ads_selector)):
|
if not (self.ads_selector in {'all', 'new'} or re.compile(r'\d+[,\d+]*').search(self.ads_selector)):
|
||||||
LOG.warning('You provided no ads selector. Defaulting to "new".')
|
LOG.warning('You provided no ads selector. Defaulting to "new".')
|
||||||
self.ads_selector = 'new'
|
self.ads_selector = 'new'
|
||||||
# start session
|
|
||||||
self.load_config()
|
self.load_config()
|
||||||
self.create_webdriver_session()
|
await self.create_browser_session()
|
||||||
self.login()
|
await self.login()
|
||||||
self.start_download_routine() # call correct version of download
|
await self.download_ads()
|
||||||
|
|
||||||
case _:
|
case _:
|
||||||
LOG.error("Unknown command: %s", self.command)
|
LOG.error("Unknown command: %s", self.command)
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
|
finally:
|
||||||
|
self.close_browser_session()
|
||||||
|
|
||||||
def show_help(self) -> None:
|
def show_help(self) -> None:
|
||||||
if is_frozen():
|
if is_frozen():
|
||||||
@@ -200,6 +194,7 @@ class KleinanzeigenBot(SeleniumMixin):
|
|||||||
self.delete_old_ads = False
|
self.delete_old_ads = False
|
||||||
case "-v" | "--verbose":
|
case "-v" | "--verbose":
|
||||||
LOG.setLevel(logging.DEBUG)
|
LOG.setLevel(logging.DEBUG)
|
||||||
|
logging.getLogger("nodriver").setLevel(logging.INFO)
|
||||||
|
|
||||||
match len(arguments):
|
match len(arguments):
|
||||||
case 0:
|
case 0:
|
||||||
@@ -290,6 +285,7 @@ class KleinanzeigenBot(SeleniumMixin):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
ad_cfg["description"] = descr_prefix + (ad_cfg["description"] or "") + descr_suffix
|
ad_cfg["description"] = descr_prefix + (ad_cfg["description"] or "") + descr_suffix
|
||||||
|
ad_cfg["description"] = ad_cfg["description"].replace("@", "(at)")
|
||||||
ensure(len(ad_cfg["description"]) <= 4000, f"Length of ad description including prefix and suffix exceeds 4000 chars. @ [{ad_file}]")
|
ensure(len(ad_cfg["description"]) <= 4000, f"Length of ad description including prefix and suffix exceeds 4000 chars. @ [{ad_file}]")
|
||||||
|
|
||||||
# pylint: disable=cell-var-from-loop
|
# pylint: disable=cell-var-from-loop
|
||||||
@@ -311,6 +307,7 @@ class KleinanzeigenBot(SeleniumMixin):
|
|||||||
ensure(not safe_get(ad_cfg, "price"), f"-> [price] must not be specified for GIVE_AWAY ad @ [{ad_file}]")
|
ensure(not safe_get(ad_cfg, "price"), f"-> [price] must not be specified for GIVE_AWAY ad @ [{ad_file}]")
|
||||||
elif ad_cfg["price_type"] == "FIXED":
|
elif ad_cfg["price_type"] == "FIXED":
|
||||||
assert_has_value("price")
|
assert_has_value("price")
|
||||||
|
|
||||||
assert_one_of("shipping_type", {"PICKUP", "SHIPPING", "NOT_APPLICABLE"})
|
assert_one_of("shipping_type", {"PICKUP", "SHIPPING", "NOT_APPLICABLE"})
|
||||||
assert_has_value("contact.name")
|
assert_has_value("contact.name")
|
||||||
assert_has_value("republication_interval")
|
assert_has_value("republication_interval")
|
||||||
@@ -326,9 +323,9 @@ class KleinanzeigenBot(SeleniumMixin):
|
|||||||
|
|
||||||
if ad_cfg["images"]:
|
if ad_cfg["images"]:
|
||||||
images = []
|
images = []
|
||||||
|
ad_dir = os.path.dirname(ad_file)
|
||||||
for image_pattern in ad_cfg["images"]:
|
for image_pattern in ad_cfg["images"]:
|
||||||
pattern_images = set()
|
pattern_images = set()
|
||||||
ad_dir = os.path.dirname(ad_file)
|
|
||||||
for image_file in glob.glob(image_pattern, root_dir = ad_dir, flags = glob.GLOBSTAR | glob.BRACE | glob.EXTGLOB):
|
for image_file in glob.glob(image_pattern, root_dir = ad_dir, flags = glob.GLOBSTAR | glob.BRACE | glob.EXTGLOB):
|
||||||
_, image_file_ext = os.path.splitext(image_file)
|
_, image_file_ext = os.path.splitext(image_file)
|
||||||
ensure(image_file_ext.lower() in {".gif", ".jpg", ".jpeg", ".png"}, f"Unsupported image file type [{image_file}]")
|
ensure(image_file_ext.lower() in {".gif", ".jpg", ".jpeg", ".png"}, f"Unsupported image file type [{image_file}]")
|
||||||
@@ -376,133 +373,122 @@ class KleinanzeigenBot(SeleniumMixin):
|
|||||||
self.browser_config.user_data_dir = abspath(self.config["browser"]["user_data_dir"], relative_to = self.config_file_path)
|
self.browser_config.user_data_dir = abspath(self.config["browser"]["user_data_dir"], relative_to = self.config_file_path)
|
||||||
self.browser_config.profile_name = self.config["browser"]["profile_name"]
|
self.browser_config.profile_name = self.config["browser"]["profile_name"]
|
||||||
|
|
||||||
def login(self) -> None:
|
async def login(self) -> None:
|
||||||
LOG.info("Checking if already logged in")
|
LOG.info("Checking if already logged in...")
|
||||||
self.web_open(f"{self.root_url}")
|
await self.web_open(f"{self.root_url}")
|
||||||
|
|
||||||
if self.is_logged_in():
|
if await self.is_logged_in():
|
||||||
LOG.info("Already logged in as [%s]. Skipping login.", self.config["login"]["username"])
|
LOG.info("Already logged in as [%s]. Skipping login.", self.config["login"]["username"])
|
||||||
return
|
return
|
||||||
|
|
||||||
|
LOG.info("Opening login page...")
|
||||||
|
await self.web_open(f"{self.root_url}/m-einloggen.html?targetUrl=/")
|
||||||
|
|
||||||
|
try:
|
||||||
|
await self.web_find(By.CSS_SELECTOR, "iframe[src*='captcha-delivery.com']", timeout = 2)
|
||||||
|
LOG.warning("############################################")
|
||||||
|
LOG.warning("# Captcha present! Please solve the captcha.")
|
||||||
|
LOG.warning("############################################")
|
||||||
|
await self.web_await(lambda: self.web_find(By.ID, "login-form") is not None, timeout = 5 * 60)
|
||||||
|
except TimeoutError:
|
||||||
|
pass
|
||||||
|
|
||||||
LOG.info("Logging in as [%s]...", self.config["login"]["username"])
|
LOG.info("Logging in as [%s]...", self.config["login"]["username"])
|
||||||
self.web_open(f"{self.root_url}/m-einloggen.html?targetUrl=/")
|
await self.web_input(By.ID, "email", self.config["login"]["username"])
|
||||||
|
await self.web_input(By.ID, "password", self.config["login"]["password"])
|
||||||
# close redesign banner
|
await self.web_click(By.CSS_SELECTOR, "form#login-form button[type='submit']")
|
||||||
try:
|
|
||||||
self.web_click(By.XPATH, '//*[@id="pre-launch-comms-interstitial-frontend"]//button[.//*[text()[contains(.,"nicht mehr anzeigen")]]]')
|
|
||||||
except NoSuchElementException:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# accept privacy banner
|
|
||||||
try:
|
|
||||||
self.web_click(By.ID, "gdpr-banner-accept")
|
|
||||||
except NoSuchElementException:
|
|
||||||
pass
|
|
||||||
|
|
||||||
self.web_input(By.ID, "login-email", self.config["login"]["username"])
|
|
||||||
self.web_input(By.ID, "login-password", self.config["login"]["password"])
|
|
||||||
|
|
||||||
self.handle_captcha_if_present("login-recaptcha", "but DON'T click 'Einloggen'.")
|
|
||||||
|
|
||||||
self.web_click(By.ID, "login-submit")
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.web_find(By.ID, "new-device-login", 4)
|
await self.web_find(By.TEXT, "Wir haben dir gerade einen 6-stelligen Code für die Telefonnummer", timeout = 4)
|
||||||
LOG.warning("############################################")
|
LOG.warning("############################################")
|
||||||
LOG.warning("# Device verification message detected. Use the 'Login bestätigen' URL from the mentioned e-mail into the same browser tab.")
|
LOG.warning("# Device verification message detected. Please handle it.")
|
||||||
LOG.warning("############################################")
|
LOG.warning("############################################")
|
||||||
input("Press ENTER when done...")
|
await ainput("Press ENTER when done...")
|
||||||
except NoSuchElementException:
|
except TimeoutError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def is_logged_in(self) -> bool:
|
|
||||||
try:
|
try:
|
||||||
user_email_elem = self.web_find(By.ID, "user-email")
|
LOG.info("Handling GDPR disclaimer...")
|
||||||
email_text = user_email_elem.text
|
await self.web_find(By.ID, "gdpr-banner-accept", timeout = 10)
|
||||||
if f"angemeldet als: {self.config['login']['username']}" == email_text:
|
await self.web_click(By.ID, "gdpr-banner-cmp-button")
|
||||||
|
await self.web_click(By.CSS_SELECTOR, "#ConsentManagementPage button.Button-secondary", timeout = 10)
|
||||||
|
except TimeoutError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
async def is_logged_in(self) -> bool:
|
||||||
|
try:
|
||||||
|
email = await self.web_text(By.ID, "user-email")
|
||||||
|
if f"angemeldet als: {self.config['login']['username']}" == email:
|
||||||
return True
|
return True
|
||||||
except NoSuchElementException:
|
except TimeoutError:
|
||||||
return False
|
return False
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def handle_captcha_if_present(self, captcha_element_id:str, msg:str) -> None:
|
async def delete_ads(self, ad_cfgs:list[tuple[str, dict[str, Any], dict[str, Any]]]) -> None:
|
||||||
try:
|
|
||||||
self.web_click(By.XPATH, f"//*[@id='{captcha_element_id}']")
|
|
||||||
except NoSuchElementException:
|
|
||||||
return
|
|
||||||
|
|
||||||
LOG.warning("############################################")
|
|
||||||
LOG.warning("# Captcha present! Please solve and close the captcha, %s", msg)
|
|
||||||
LOG.warning("############################################")
|
|
||||||
self.webdriver.switch_to.frame(self.web_find(By.CSS_SELECTOR, f"#{captcha_element_id} iframe"))
|
|
||||||
self.web_await(lambda _: self.webdriver.find_element(By.ID, "recaptcha-anchor").get_attribute("aria-checked") == "true", timeout = 5 * 60)
|
|
||||||
self.webdriver.switch_to.default_content()
|
|
||||||
|
|
||||||
def delete_ads(self, ad_cfgs:list[tuple[str, dict[str, Any], dict[str, Any]]]) -> None:
|
|
||||||
count = 0
|
count = 0
|
||||||
|
|
||||||
for (ad_file, ad_cfg, _) in ad_cfgs:
|
for (ad_file, ad_cfg, _) in ad_cfgs:
|
||||||
count += 1
|
count += 1
|
||||||
LOG.info("Processing %s/%s: '%s' from [%s]...", count, len(ad_cfgs), ad_cfg["title"], ad_file)
|
LOG.info("Processing %s/%s: '%s' from [%s]...", count, len(ad_cfgs), ad_cfg["title"], ad_file)
|
||||||
self.delete_ad(ad_cfg)
|
await self.delete_ad(ad_cfg)
|
||||||
pause(2000, 4000)
|
await self.web_sleep()
|
||||||
|
|
||||||
LOG.info("############################################")
|
LOG.info("############################################")
|
||||||
LOG.info("DONE: Deleting %s", pluralize("ad", count))
|
LOG.info("DONE: Deleting %s", pluralize("ad", count))
|
||||||
LOG.info("############################################")
|
LOG.info("############################################")
|
||||||
|
|
||||||
def delete_ad(self, ad_cfg: dict[str, Any]) -> bool:
|
async def delete_ad(self, ad_cfg: dict[str, Any]) -> bool:
|
||||||
LOG.info("Deleting ad '%s' if already present...", ad_cfg["title"])
|
LOG.info("Deleting ad '%s' if already present...", ad_cfg["title"])
|
||||||
|
|
||||||
self.web_open(f"{self.root_url}/m-meine-anzeigen.html")
|
await self.web_open(f"{self.root_url}/m-meine-anzeigen.html")
|
||||||
csrf_token_elem = self.web_find(By.XPATH, "//meta[@name='_csrf']")
|
csrf_token_elem = await self.web_find(By.CSS_SELECTOR, "meta[name=_csrf]")
|
||||||
csrf_token = csrf_token_elem.get_attribute("content")
|
csrf_token = csrf_token_elem.attrs["content"]
|
||||||
if csrf_token is None:
|
if csrf_token is None:
|
||||||
raise AssertionError("Expected CSRF Token not found in HTML content!")
|
raise AssertionError("Expected CSRF Token not found in HTML content!")
|
||||||
|
|
||||||
if self.delete_ads_by_title:
|
if self.delete_ads_by_title:
|
||||||
published_ads = json.loads(self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT")["content"])["ads"]
|
published_ads = json.loads((await self.web_request(f"{self.root_url}/m-meine-anzeigen-verwalten.json?sort=DEFAULT"))["content"])["ads"]
|
||||||
|
|
||||||
for published_ad in published_ads:
|
for published_ad in published_ads:
|
||||||
published_ad_id = int(published_ad.get("id", -1))
|
published_ad_id = int(published_ad.get("id", -1))
|
||||||
published_ad_title = published_ad.get("title", "")
|
published_ad_title = published_ad.get("title", "")
|
||||||
if ad_cfg["id"] == published_ad_id or ad_cfg["title"] == published_ad_title:
|
if ad_cfg["id"] == published_ad_id or ad_cfg["title"] == published_ad_title:
|
||||||
LOG.info(" -> deleting %s '%s'...", published_ad_id, published_ad_title)
|
LOG.info(" -> deleting %s '%s'...", published_ad_id, published_ad_title)
|
||||||
self.web_request(
|
await self.web_request(
|
||||||
url = f"{self.root_url}/m-anzeigen-loeschen.json?ids={published_ad_id}",
|
url = f"{self.root_url}/m-anzeigen-loeschen.json?ids={published_ad_id}",
|
||||||
method = "POST",
|
method = "POST",
|
||||||
headers = {"x-csrf-token": csrf_token}
|
headers = {"x-csrf-token": csrf_token}
|
||||||
)
|
)
|
||||||
elif ad_cfg["id"]:
|
elif ad_cfg["id"]:
|
||||||
self.web_request(
|
await self.web_request(
|
||||||
url = f"{self.root_url}/m-anzeigen-loeschen.json?ids={ad_cfg['id']}",
|
url = f"{self.root_url}/m-anzeigen-loeschen.json?ids={ad_cfg['id']}",
|
||||||
method = "POST",
|
method = "POST",
|
||||||
headers = {"x-csrf-token": csrf_token},
|
headers = {"x-csrf-token": csrf_token},
|
||||||
valid_response_codes = [200, 404]
|
valid_response_codes = [200, 404]
|
||||||
)
|
)
|
||||||
|
|
||||||
pause(1500, 3000)
|
await self.web_sleep()
|
||||||
ad_cfg["id"] = None
|
ad_cfg["id"] = None
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def publish_ads(self, ad_cfgs:list[tuple[str, dict[str, Any], dict[str, Any]]]) -> None:
|
async def publish_ads(self, ad_cfgs:list[tuple[str, dict[str, Any], dict[str, Any]]]) -> None:
|
||||||
count = 0
|
count = 0
|
||||||
|
|
||||||
for (ad_file, ad_cfg, ad_cfg_orig) in ad_cfgs:
|
for (ad_file, ad_cfg, ad_cfg_orig) in ad_cfgs:
|
||||||
count += 1
|
count += 1
|
||||||
LOG.info("Processing %s/%s: '%s' from [%s]...", count, len(ad_cfgs), ad_cfg["title"], ad_file)
|
LOG.info("Processing %s/%s: '%s' from [%s]...", count, len(ad_cfgs), ad_cfg["title"], ad_file)
|
||||||
self.publish_ad(ad_file, ad_cfg, ad_cfg_orig)
|
await self.publish_ad(ad_file, ad_cfg, ad_cfg_orig)
|
||||||
self.web_await(lambda _: self.webdriver.find_element(By.ID, "checking-done").is_displayed(), timeout = 5 * 60)
|
await self.web_await(lambda: self.web_check(By.ID, "checking-done", Is.DISPLAYED), timeout = 5 * 60)
|
||||||
|
|
||||||
LOG.info("############################################")
|
LOG.info("############################################")
|
||||||
LOG.info("DONE: (Re-)published %s", pluralize("ad", count))
|
LOG.info("DONE: (Re-)published %s", pluralize("ad", count))
|
||||||
LOG.info("############################################")
|
LOG.info("############################################")
|
||||||
|
|
||||||
def publish_ad(self, ad_file:str, ad_cfg: dict[str, Any], ad_cfg_orig: dict[str, Any]) -> None:
|
async def publish_ad(self, ad_file:str, ad_cfg: dict[str, Any], ad_cfg_orig: dict[str, Any]) -> None:
|
||||||
self.assert_free_ad_limit_not_reached()
|
await self.assert_free_ad_limit_not_reached()
|
||||||
|
|
||||||
if self.delete_old_ads:
|
if self.delete_old_ads:
|
||||||
self.delete_ad(ad_cfg)
|
await self.delete_ad(ad_cfg)
|
||||||
|
|
||||||
LOG.info("Publishing ad '%s'...", ad_cfg["title"])
|
LOG.info("Publishing ad '%s'...", ad_cfg["title"])
|
||||||
|
|
||||||
@@ -510,45 +496,44 @@ class KleinanzeigenBot(SeleniumMixin):
|
|||||||
LOG.debug(" -> effective ad meta:")
|
LOG.debug(" -> effective ad meta:")
|
||||||
YAML().dump(ad_cfg, sys.stdout)
|
YAML().dump(ad_cfg, sys.stdout)
|
||||||
|
|
||||||
self.web_open(f"{self.root_url}/p-anzeige-aufgeben-schritt2.html")
|
await self.web_open(f"{self.root_url}/p-anzeige-aufgeben-schritt2.html")
|
||||||
|
|
||||||
if ad_cfg["type"] == "WANTED":
|
if ad_cfg["type"] == "WANTED":
|
||||||
self.web_click(By.ID, "adType2")
|
await self.web_click(By.ID, "adType2")
|
||||||
|
|
||||||
#############################
|
#############################
|
||||||
# set title
|
# set title
|
||||||
#############################
|
#############################
|
||||||
self.web_input(By.ID, "postad-title", ad_cfg["title"])
|
await self.web_input(By.ID, "postad-title", ad_cfg["title"])
|
||||||
|
|
||||||
#############################
|
#############################
|
||||||
# set category
|
# set category
|
||||||
#############################
|
#############################
|
||||||
self.__set_category(ad_file, ad_cfg)
|
await self.__set_category(ad_file, ad_cfg)
|
||||||
|
|
||||||
#############################
|
#############################
|
||||||
# set shipping type/options/costs
|
# set shipping type/options/costs
|
||||||
#############################
|
#############################
|
||||||
if ad_cfg["shipping_type"] == "PICKUP":
|
if ad_cfg["shipping_type"] == "PICKUP":
|
||||||
try:
|
try:
|
||||||
self.web_click(By.XPATH, '//*[contains(@class, "ShippingPickupSelector")]//label[text()[contains(.,"Nur Abholung")]]/input[@type="radio"]')
|
await self.web_click(By.XPATH,
|
||||||
except NoSuchElementException as ex:
|
'//*[contains(@class, "ShippingPickupSelector")]//label[text()[contains(.,"Nur Abholung")]]/input[@type="radio"]')
|
||||||
|
except TimeoutError as ex:
|
||||||
LOG.debug(ex, exc_info = True)
|
LOG.debug(ex, exc_info = True)
|
||||||
elif ad_cfg["shipping_options"]:
|
elif ad_cfg["shipping_options"]:
|
||||||
self.web_click(By.XPATH, '//*[contains(@class, "jsx-2623555103")]')
|
await self.web_click(By.CSS_SELECTOR, '[class*="jsx-2623555103"]')
|
||||||
self.web_click(By.XPATH, '//*[contains(@class, "CarrierSelectionModal--Button")]')
|
await self.web_click(By.CSS_SELECTOR, '[class*="CarrierSelectionModal--Button"]')
|
||||||
self.__set_shipping_options(ad_cfg)
|
await self.__set_shipping_options(ad_cfg)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
self.web_click(By.XPATH, '//*[contains(@class, "jsx-2623555103")]')
|
await self.web_click(By.CSS_SELECTOR, '[class*="jsx-2623555103"]')
|
||||||
self.web_click(By.XPATH, '//*[contains(@class, "CarrierSelectionModal--Button")]')
|
await self.web_click(By.CSS_SELECTOR, '[class*="CarrierSelectionModal--Button"]')
|
||||||
self.web_click(By.XPATH, '//*[contains(@class, "CarrierOption--Main")]')
|
await self.web_click(By.CSS_SELECTOR, '[class*="CarrierOption--Main"]')
|
||||||
if ad_cfg["shipping_costs"]:
|
if ad_cfg["shipping_costs"]:
|
||||||
self.web_input(By.XPATH,
|
await self.web_input(By.CSS_SELECTOR, '.IndividualShippingInput input[type="text"]', str.replace(ad_cfg["shipping_costs"], ".", ",")
|
||||||
'//*[contains(@class, "IndividualShippingInput")]//input[@type="text"]',
|
|
||||||
str.replace(ad_cfg["shipping_costs"], ".", ",")
|
|
||||||
)
|
)
|
||||||
self.web_click(By.XPATH, '//*[contains(@class, "ModalDialog--Actions")]//button[.//*[text()[contains(.,"Fertig")]]]')
|
await self.web_click(By.XPATH, '//*[contains(@class, "ModalDialog--Actions")]//button[.//*[text()[contains(.,"Fertig")]]]')
|
||||||
except NoSuchElementException as ex:
|
except TimeoutError as ex:
|
||||||
LOG.debug(ex, exc_info = True)
|
LOG.debug(ex, exc_info = True)
|
||||||
|
|
||||||
#############################
|
#############################
|
||||||
@@ -557,11 +542,11 @@ class KleinanzeigenBot(SeleniumMixin):
|
|||||||
price_type = ad_cfg["price_type"]
|
price_type = ad_cfg["price_type"]
|
||||||
if price_type != "NOT_APPLICABLE":
|
if price_type != "NOT_APPLICABLE":
|
||||||
try:
|
try:
|
||||||
self.web_select(By.XPATH, "//select[@id='price-type-react' or @id='micro-frontend-price-type' or @id='priceType']", price_type)
|
await self.web_select(By.CSS_SELECTOR, "select#price-type-react, select#micro-frontend-price-type, select#priceType", price_type)
|
||||||
except NoSuchElementException:
|
except TimeoutError:
|
||||||
pass
|
pass
|
||||||
if safe_get(ad_cfg, "price"):
|
if safe_get(ad_cfg, "price"):
|
||||||
self.web_input(By.XPATH, "//input[@id='post-ad-frontend-price' or @id='micro-frontend-price' or @id='pstad-price']", ad_cfg["price"])
|
await self.web_input(By.CSS_SELECTOR, "input#post-ad-frontend-price, input#micro-frontend-price, input#pstad-price", ad_cfg["price"])
|
||||||
|
|
||||||
#############################
|
#############################
|
||||||
# set sell_directly
|
# set sell_directly
|
||||||
@@ -570,81 +555,80 @@ class KleinanzeigenBot(SeleniumMixin):
|
|||||||
try:
|
try:
|
||||||
if ad_cfg["shipping_type"] == "SHIPPING":
|
if ad_cfg["shipping_type"] == "SHIPPING":
|
||||||
if sell_directly and ad_cfg["shipping_options"] and price_type in {"FIXED", "NEGOTIABLE"}:
|
if sell_directly and ad_cfg["shipping_options"] and price_type in {"FIXED", "NEGOTIABLE"}:
|
||||||
if not self.webdriver.find_element(By.ID, "radio-buy-now-yes").is_selected():
|
if not await self.web_check(By.ID, "radio-buy-now-yes", Is.SELECTED):
|
||||||
self.web_click(By.XPATH, '//*[contains(@id, "radio-buy-now-yes")]')
|
await self.web_click(By.ID, 'radio-buy-now-yes')
|
||||||
elif not self.webdriver.find_element(By.ID, "radio-buy-now-no").is_selected():
|
elif not await self.web_check(By.ID, "radio-buy-now-no", Is.SELECTED):
|
||||||
self.web_click(By.XPATH, '//*[contains(@id, "radio-buy-now-no")]')
|
await self.web_click(By.ID, 'radio-buy-now-no')
|
||||||
except NoSuchElementException as ex:
|
except TimeoutError as ex:
|
||||||
LOG.debug(ex, exc_info = True)
|
LOG.debug(ex, exc_info = True)
|
||||||
|
|
||||||
#############################
|
#############################
|
||||||
# set description
|
# set description
|
||||||
#############################
|
#############################
|
||||||
self.web_execute("document.querySelector('#pstad-descrptn').value = `" + ad_cfg["description"].replace("`", "'") + "`")
|
await self.web_execute("document.querySelector('#pstad-descrptn').value = `" + ad_cfg["description"].replace("`", "'") + "`")
|
||||||
|
|
||||||
#############################
|
#############################
|
||||||
# set contact zipcode
|
# set contact zipcode
|
||||||
#############################
|
#############################
|
||||||
if ad_cfg["contact"]["zipcode"]:
|
if ad_cfg["contact"]["zipcode"]:
|
||||||
self.web_input(By.ID, "pstad-zip", ad_cfg["contact"]["zipcode"])
|
await self.web_input(By.ID, "pstad-zip", ad_cfg["contact"]["zipcode"])
|
||||||
|
|
||||||
#############################
|
#############################
|
||||||
# set contact street
|
# set contact street
|
||||||
#############################
|
#############################
|
||||||
if ad_cfg["contact"]["street"]:
|
if ad_cfg["contact"]["street"]:
|
||||||
try:
|
try:
|
||||||
if not self.webdriver.find_element(By.ID, "pstad-street").is_enabled():
|
if await self.web_check(By.ID, "pstad-street", Is.DISABLED):
|
||||||
self.webdriver.find_element(By.ID, "addressVisibility").click()
|
await self.web_click(By.ID, "addressVisibility")
|
||||||
pause(2000)
|
await self.web_sleep()
|
||||||
except NoSuchElementException:
|
except TimeoutError:
|
||||||
# ignore
|
# ignore
|
||||||
pass
|
pass
|
||||||
self.web_input(By.ID, "pstad-street", ad_cfg["contact"]["street"])
|
await self.web_input(By.ID, "pstad-street", ad_cfg["contact"]["street"])
|
||||||
|
|
||||||
#############################
|
#############################
|
||||||
# set contact name
|
# set contact name
|
||||||
#############################
|
#############################
|
||||||
if ad_cfg["contact"]["name"] and not self.webdriver.find_element(By.ID, "postad-contactname").get_attribute("readonly"):
|
if ad_cfg["contact"]["name"] and not await self.web_check(By.ID, "postad-contactname", Is.READONLY):
|
||||||
self.web_input(By.ID, "postad-contactname", ad_cfg["contact"]["name"])
|
await self.web_input(By.ID, "postad-contactname", ad_cfg["contact"]["name"])
|
||||||
|
|
||||||
#############################
|
#############################
|
||||||
# set contact phone
|
# set contact phone
|
||||||
#############################
|
#############################
|
||||||
if ad_cfg["contact"]["phone"]:
|
if ad_cfg["contact"]["phone"]:
|
||||||
if self.webdriver.find_element(By.ID, "postad-phonenumber").is_displayed():
|
if await self.web_check(By.ID, "postad-phonenumber", Is.DISPLAYED):
|
||||||
try:
|
try:
|
||||||
if not self.webdriver.find_element(By.ID, "postad-phonenumber").is_enabled():
|
if await self.web_check(By.ID, "postad-phonenumber", Is.DISABLED):
|
||||||
self.webdriver.find_element(By.ID, "phoneNumberVisibility").click()
|
await self.web_click(By.ID, "phoneNumberVisibility")
|
||||||
pause(2000)
|
await self.web_sleep()
|
||||||
except NoSuchElementException:
|
except TimeoutError:
|
||||||
# ignore
|
# ignore
|
||||||
pass
|
pass
|
||||||
self.web_input(By.ID, "postad-phonenumber", ad_cfg["contact"]["phone"])
|
await self.web_input(By.ID, "postad-phonenumber", ad_cfg["contact"]["phone"])
|
||||||
|
|
||||||
#############################
|
#############################
|
||||||
# upload images
|
# upload images
|
||||||
#############################
|
#############################
|
||||||
self.__upload_images(ad_cfg)
|
await self.__upload_images(ad_cfg)
|
||||||
|
|
||||||
#############################
|
#############################
|
||||||
# submit
|
# submit
|
||||||
#############################
|
#############################
|
||||||
self.handle_captcha_if_present("postAd-recaptcha", "but DON'T click 'Anzeige aufgeben'.")
|
|
||||||
try:
|
try:
|
||||||
self.web_click(By.ID, "pstad-submit")
|
await self.web_click(By.ID, "pstad-submit")
|
||||||
except NoSuchElementException:
|
except TimeoutError:
|
||||||
# https://github.com/Second-Hand-Friends/kleinanzeigen-bot/issues/40
|
# https://github.com/Second-Hand-Friends/kleinanzeigen-bot/issues/40
|
||||||
self.web_click(By.XPATH, "//fieldset[@id='postad-publish']//*[contains(text(),'Anzeige aufgeben')]")
|
await self.web_click(By.XPATH, "//fieldset[@id='postad-publish']//*[contains(text(),'Anzeige aufgeben')]")
|
||||||
self.web_click(By.ID, "imprint-guidance-submit")
|
await self.web_click(By.ID, "imprint-guidance-submit")
|
||||||
|
|
||||||
self.web_await(EC.url_contains("p-anzeige-aufgeben-bestaetigung.html?adId="), 20)
|
await self.web_await(lambda: "p-anzeige-aufgeben-bestaetigung.html?adId=" in self.page.url, timeout = 20)
|
||||||
|
|
||||||
ad_cfg_orig["updated_on"] = datetime.utcnow().isoformat()
|
ad_cfg_orig["updated_on"] = datetime.utcnow().isoformat()
|
||||||
if not ad_cfg["created_on"] and not ad_cfg["id"]:
|
if not ad_cfg["created_on"] and not ad_cfg["id"]:
|
||||||
ad_cfg_orig["created_on"] = ad_cfg_orig["updated_on"]
|
ad_cfg_orig["created_on"] = ad_cfg_orig["updated_on"]
|
||||||
|
|
||||||
# extract the ad id from the URL's query parameter
|
# extract the ad id from the URL's query parameter
|
||||||
current_url_query_params = urllib.parse.parse_qs(urllib.parse.urlparse(self.webdriver.current_url).query)
|
current_url_query_params = urllib_parse.parse_qs(urllib_parse.urlparse(self.page.url).query)
|
||||||
ad_id = int(current_url_query_params.get("adId", [])[0])
|
ad_id = int(current_url_query_params.get("adId", [])[0])
|
||||||
ad_cfg_orig["id"] = ad_id
|
ad_cfg_orig["id"] = ad_id
|
||||||
|
|
||||||
@@ -652,24 +636,25 @@ class KleinanzeigenBot(SeleniumMixin):
|
|||||||
|
|
||||||
utils.save_dict(ad_file, ad_cfg_orig)
|
utils.save_dict(ad_file, ad_cfg_orig)
|
||||||
|
|
||||||
def __set_category(self, ad_file:str, ad_cfg: dict[str, Any]) -> None:
|
async def __set_category(self, ad_file:str, ad_cfg: dict[str, Any]) -> None:
|
||||||
# click on something to trigger automatic category detection
|
# click on something to trigger automatic category detection
|
||||||
self.web_click(By.ID, "pstad-descrptn")
|
await self.web_click(By.ID, "pstad-descrptn")
|
||||||
|
|
||||||
try:
|
|
||||||
self.web_find(By.XPATH, "//*[@id='postad-category-path'][text()]")
|
|
||||||
is_category_auto_selected = True
|
|
||||||
except NoSuchElementException:
|
|
||||||
is_category_auto_selected = False
|
is_category_auto_selected = False
|
||||||
|
try:
|
||||||
|
if await self.web_text(By.ID, "postad-category-path"):
|
||||||
|
is_category_auto_selected = True
|
||||||
|
except TimeoutError:
|
||||||
|
pass
|
||||||
|
|
||||||
if ad_cfg["category"]:
|
if ad_cfg["category"]:
|
||||||
utils.pause(2000) # workaround for https://github.com/Second-Hand-Friends/kleinanzeigen-bot/issues/39
|
await self.web_sleep() # workaround for https://github.com/Second-Hand-Friends/kleinanzeigen-bot/issues/39
|
||||||
self.web_click(By.ID, "pstad-lnk-chngeCtgry")
|
await self.web_click(By.ID, "pstad-lnk-chngeCtgry")
|
||||||
self.web_find(By.ID, "postad-step1-sbmt")
|
await self.web_find(By.ID, "postad-step1-sbmt")
|
||||||
|
|
||||||
category_url = f"{self.root_url}/p-kategorie-aendern.html#?path={ad_cfg['category']}"
|
category_url = f"{self.root_url}/p-kategorie-aendern.html#?path={ad_cfg['category']}"
|
||||||
self.web_open(category_url)
|
await self.web_open(category_url)
|
||||||
self.web_click(By.XPATH, "//*[@id='postad-step1-sbmt']/button")
|
await self.web_click(By.XPATH, "//*[@id='postad-step1-sbmt']/button")
|
||||||
else:
|
else:
|
||||||
ensure(is_category_auto_selected, f"No category specified in [{ad_file}] and automatic category detection failed")
|
ensure(is_category_auto_selected, f"No category specified in [{ad_file}] and automatic category detection failed")
|
||||||
|
|
||||||
@@ -680,29 +665,27 @@ class KleinanzeigenBot(SeleniumMixin):
|
|||||||
try:
|
try:
|
||||||
# if the <select> element exists but is inside an invisible container, make the container visible
|
# if the <select> element exists but is inside an invisible container, make the container visible
|
||||||
select_container_xpath = f"//div[@class='l-row' and descendant::select[@id='{special_attribute_key}']]"
|
select_container_xpath = f"//div[@class='l-row' and descendant::select[@id='{special_attribute_key}']]"
|
||||||
select_container = self.web_find(By.XPATH, select_container_xpath)
|
if not await self.web_check(By.XPATH, select_container_xpath, Is.DISPLAYED):
|
||||||
if not select_container.is_displayed():
|
await (await self.web_find(By.XPATH, select_container_xpath)).apply("elem => elem.singleNodeValue.style.display = 'block'")
|
||||||
self.web_execute(f"document.evaluate(\"{select_container_xpath}\"," +
|
except TimeoutError:
|
||||||
" document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.style.display = 'block';")
|
|
||||||
except BaseException:
|
|
||||||
pass # nosec
|
pass # nosec
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.web_select(By.XPATH, f"//select[@id='{special_attribute_key}']", special_attribute_value)
|
await self.web_select(By.XPATH, f"//select[@id='{special_attribute_key}']", special_attribute_value)
|
||||||
except WebDriverException:
|
except TimeoutError:
|
||||||
LOG.debug("Attribute field '%s' is not of kind dropdown, trying to input as plain text...", special_attribute_key)
|
LOG.debug("Attribute field '%s' is not of kind dropdown, trying to input as plain text...", special_attribute_key)
|
||||||
try:
|
try:
|
||||||
self.web_input(By.ID, special_attribute_key, special_attribute_value)
|
await self.web_input(By.ID, special_attribute_key, special_attribute_value)
|
||||||
except WebDriverException:
|
except TimeoutError:
|
||||||
LOG.debug("Attribute field '%s' is not of kind plain text, trying to input as radio button...", special_attribute_key)
|
LOG.debug("Attribute field '%s' is not of kind plain text, trying to input as radio button...", special_attribute_key)
|
||||||
try:
|
try:
|
||||||
self.web_click(By.XPATH, f"//*[@id='{special_attribute_key}']/option[@value='{special_attribute_value}']")
|
await self.web_click(By.XPATH, f"//*[@id='{special_attribute_key}']/option[@value='{special_attribute_value}']")
|
||||||
except WebDriverException as ex:
|
except TimeoutError as ex:
|
||||||
LOG.debug("Attribute field '%s' is not of kind radio button.", special_attribute_key)
|
LOG.debug("Attribute field '%s' is not of kind radio button.", special_attribute_key)
|
||||||
raise NoSuchElementException(f"Failed to set special attribute [{special_attribute_key}]") from ex
|
raise TimeoutError(f"Failed to set special attribute [{special_attribute_key}]") from ex
|
||||||
LOG.debug("Successfully set attribute field [%s] to [%s]...", special_attribute_key, special_attribute_value)
|
LOG.debug("Successfully set attribute field [%s] to [%s]...", special_attribute_key, special_attribute_value)
|
||||||
|
|
||||||
def __set_shipping_options(self, ad_cfg: dict[str, Any]) -> None:
|
async def __set_shipping_options(self, ad_cfg: dict[str, Any]) -> None:
|
||||||
try:
|
try:
|
||||||
shipping_option_mapping = {
|
shipping_option_mapping = {
|
||||||
"DHL_2": ("Klein", "Paket 2 kg"),
|
"DHL_2": ("Klein", "Paket 2 kg"),
|
||||||
@@ -725,323 +708,98 @@ class KleinanzeigenBot(SeleniumMixin):
|
|||||||
raise ValueError("You can only specify shipping options for one package size!")
|
raise ValueError("You can only specify shipping options for one package size!")
|
||||||
|
|
||||||
shipping_size, = unique_shipping_sizes
|
shipping_size, = unique_shipping_sizes
|
||||||
self.web_click(By.XPATH, f'//*[contains(@class, "SingleSelectionItem--Main")]//input[@type="radio" and @data-testid="{shipping_size}"]')
|
await self.web_click(By.CSS_SELECTOR, f'.SingleSelectionItem--Main input[type=radio][data-testid="{shipping_size}"]')
|
||||||
|
|
||||||
for shipping_package in shipping_packages:
|
for shipping_package in shipping_packages:
|
||||||
self.web_click(
|
await self.web_click(
|
||||||
By.XPATH,
|
By.XPATH,
|
||||||
'//*[contains(@class, "CarrierSelectionModal")]'
|
'//*[contains(@class, "CarrierSelectionModal")]'
|
||||||
'//*[contains(@class, "CarrierOption")]'
|
'//*[contains(@class, "CarrierOption")]'
|
||||||
f'//*[contains(@class, "CarrierOption--Main") and @data-testid="{shipping_package}"]'
|
f'//*[contains(@class, "CarrierOption--Main") and @data-testid="{shipping_package}"]'
|
||||||
)
|
)
|
||||||
|
|
||||||
self.web_click(By.XPATH, '//*[contains(@class, "ModalDialog--Actions")]//button[.//*[text()[contains(.,"Fertig")]]]')
|
await self.web_click(By.XPATH, '//*[contains(@class, "ModalDialog--Actions")]//button[.//*[text()[contains(.,"Fertig")]]]')
|
||||||
except NoSuchElementException as ex:
|
except TimeoutError as ex:
|
||||||
LOG.debug(ex, exc_info = True)
|
LOG.debug(ex, exc_info = True)
|
||||||
|
|
||||||
def __upload_images(self, ad_cfg: dict[str, Any]) -> None:
|
async def __upload_images(self, ad_cfg: dict[str, Any]) -> None:
|
||||||
LOG.info(" -> found %s", pluralize("image", ad_cfg["images"]))
|
LOG.info(" -> found %s", pluralize("image", ad_cfg["images"]))
|
||||||
image_upload = self.web_find(By.XPATH, "//input[@type='file']")
|
image_upload:Element = await self.web_find(By.CSS_SELECTOR, "input[type=file]")
|
||||||
|
|
||||||
def count_uploaded_images() -> int:
|
|
||||||
return len(self.webdriver.find_elements(By.CLASS_NAME, "imagebox-new-thumbnail"))
|
|
||||||
|
|
||||||
for image in ad_cfg["images"]:
|
for image in ad_cfg["images"]:
|
||||||
LOG.info(" -> uploading image [%s]", image)
|
LOG.info(" -> uploading image [%s]", image)
|
||||||
previous_uploaded_images_count = count_uploaded_images()
|
await image_upload.send_file(image)
|
||||||
attempt = 0
|
await self.web_sleep()
|
||||||
while attempt < 3 and previous_uploaded_images_count == count_uploaded_images():
|
|
||||||
image_upload.send_keys(image)
|
|
||||||
start_at = time.time()
|
|
||||||
while previous_uploaded_images_count == count_uploaded_images() and time.time() - start_at < 60:
|
|
||||||
print(".", end = "", flush = True)
|
|
||||||
time.sleep(1)
|
|
||||||
attempt += 1
|
|
||||||
print(flush = True)
|
|
||||||
|
|
||||||
ensure(previous_uploaded_images_count < count_uploaded_images(), f"Couldn't upload image [{image}] within 60 seconds and 3 attempts")
|
async def assert_free_ad_limit_not_reached(self) -> None:
|
||||||
LOG.debug(" => uploaded image within %i seconds", time.time() - start_at)
|
|
||||||
pause(2000)
|
|
||||||
|
|
||||||
def assert_free_ad_limit_not_reached(self) -> None:
|
|
||||||
try:
|
try:
|
||||||
self.web_find(By.XPATH, '/html/body/div[1]/form/fieldset[6]/div[1]/header')
|
await self.web_find(By.XPATH, '/html/body/div[1]/form/fieldset[6]/div[1]/header', timeout = 2)
|
||||||
raise AssertionError(f"Cannot publish more ads. The monthly limit of free ads of account {self.config['login']['username']} is reached.")
|
raise AssertionError(f"Cannot publish more ads. The monthly limit of free ads of account {self.config['login']['username']} is reached.")
|
||||||
except NoSuchElementException:
|
except TimeoutError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@overrides
|
async def download_ads(self) -> None:
|
||||||
def web_open(self, url:str, timeout:float = 15, reload_if_already_open:bool = False) -> None:
|
|
||||||
start_at = time.time()
|
|
||||||
super().web_open(url, timeout, reload_if_already_open)
|
|
||||||
pause(2000)
|
|
||||||
|
|
||||||
# reload the page until no fullscreen ad is displayed anymore
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
self.web_find(By.XPATH, "/html/body/header[@id='site-header']", 2)
|
|
||||||
return
|
|
||||||
except NoSuchElementException as ex:
|
|
||||||
elapsed = time.time() - start_at
|
|
||||||
if elapsed < timeout:
|
|
||||||
super().web_open(url, timeout - elapsed, True)
|
|
||||||
else:
|
|
||||||
raise TimeoutException("Loading page failed, it still shows fullscreen ad.") from ex
|
|
||||||
|
|
||||||
def navigate_to_ad_page(self, id_:int | None = None, url:str | None = None) -> bool:
|
|
||||||
"""
|
|
||||||
Navigates to an ad page specified with an ad ID; or alternatively by a given URL.
|
|
||||||
|
|
||||||
:param id_: if provided (and no url given), the ID is used to search for the ad to navigate to
|
|
||||||
:param url: if given, this URL is used instead of an id to find the ad page
|
|
||||||
:return: whether the navigation to the ad page was successful
|
|
||||||
"""
|
|
||||||
if not (id_ or url):
|
|
||||||
raise UserWarning('This function needs either the "id_" or "url" parameter given!')
|
|
||||||
if url:
|
|
||||||
self.webdriver.get(url) # navigate to URL directly given
|
|
||||||
else:
|
|
||||||
# enter the ad ID into the search bar
|
|
||||||
self.web_input(By.XPATH, '//*[@id="site-search-query"]', str(id_))
|
|
||||||
# navigate to ad page and wait
|
|
||||||
submit_button = self.webdriver.find_element(By.XPATH, '//*[@id="site-search-submit"]')
|
|
||||||
self.web_await(EC.element_to_be_clickable(submit_button), 15)
|
|
||||||
try:
|
|
||||||
submit_button.click()
|
|
||||||
except ElementClickInterceptedException: # sometimes: special banner might pop up and intercept
|
|
||||||
LOG.warning('Waiting for unexpected element to close...')
|
|
||||||
pause(6000, 10000)
|
|
||||||
submit_button.click()
|
|
||||||
pause(1000, 2000)
|
|
||||||
|
|
||||||
# handle the case that invalid ad ID given
|
|
||||||
if self.webdriver.current_url.endswith('k0'):
|
|
||||||
LOG.error('There is no ad under the given ID.')
|
|
||||||
return False
|
|
||||||
try: # close (warning) popup, if given
|
|
||||||
self.webdriver.find_element(By.CSS_SELECTOR, '#vap-ovrly-secure')
|
|
||||||
LOG.warning('A popup appeared.')
|
|
||||||
close_button = self.webdriver.find_element(By.CLASS_NAME, 'mfp-close')
|
|
||||||
close_button.click()
|
|
||||||
time.sleep(1)
|
|
||||||
except NoSuchElementException:
|
|
||||||
print('(no popup)')
|
|
||||||
return True
|
|
||||||
|
|
||||||
def download_images_from_ad_page(self, directory:str, ad_id:int, logger:logging.Logger) -> list[str]:
|
|
||||||
"""
|
|
||||||
Downloads all images of an ad.
|
|
||||||
|
|
||||||
:param directory: the path of the directory created for this ad
|
|
||||||
:param ad_id: the ID of the ad to download the images from
|
|
||||||
:param logger: an initialized logger
|
|
||||||
:return: the relative paths for all downloaded images
|
|
||||||
"""
|
|
||||||
|
|
||||||
n_images:int
|
|
||||||
img_paths = []
|
|
||||||
try:
|
|
||||||
image_box = self.webdriver.find_element(By.CSS_SELECTOR, '.galleryimage-large')
|
|
||||||
|
|
||||||
# if gallery image box exists, proceed with image fetching
|
|
||||||
n_images = 1
|
|
||||||
|
|
||||||
# determine number of images (1 ... N)
|
|
||||||
next_button:WebElement
|
|
||||||
try: # check if multiple images given
|
|
||||||
# edge case: 'Virtueller Rundgang' div could be found by same CSS class
|
|
||||||
element_candidates = image_box.find_elements(By.CSS_SELECTOR, '.galleryimage--info')
|
|
||||||
image_counter = element_candidates[-1]
|
|
||||||
n_images = int(image_counter.text[2:])
|
|
||||||
logger.info('Found %d images.', n_images)
|
|
||||||
next_button = self.webdriver.find_element(By.CSS_SELECTOR, '.galleryimage--navigation--next')
|
|
||||||
except (NoSuchElementException, IndexError):
|
|
||||||
logger.info('Only one image found.')
|
|
||||||
|
|
||||||
# download all images from box
|
|
||||||
img_element = image_box.find_element(By.XPATH, './/div[1]/img')
|
|
||||||
img_fn_prefix = 'ad_' + str(ad_id) + '__img'
|
|
||||||
|
|
||||||
img_nr = 1
|
|
||||||
dl_counter = 0
|
|
||||||
while img_nr <= n_images: # scrolling + downloading
|
|
||||||
current_img_url = img_element.get_attribute('src') # URL of the image
|
|
||||||
if current_img_url is None:
|
|
||||||
continue
|
|
||||||
file_ending = current_img_url.split('.')[-1].lower()
|
|
||||||
img_path = directory + '/' + img_fn_prefix + str(img_nr) + '.' + file_ending
|
|
||||||
if current_img_url.startswith('https'): # verify https (for Bandit linter)
|
|
||||||
urllib.request.urlretrieve(current_img_url, img_path) # nosec B310
|
|
||||||
dl_counter += 1
|
|
||||||
img_paths.append(img_path.split('/')[-1])
|
|
||||||
|
|
||||||
# scroll to next image (if exists)
|
|
||||||
if img_nr < n_images:
|
|
||||||
try:
|
|
||||||
# click next button, wait, and reestablish reference
|
|
||||||
next_button.click()
|
|
||||||
self.web_await(lambda _: EC.staleness_of(img_element))
|
|
||||||
new_div = self.webdriver.find_element(By.CSS_SELECTOR, f'div.galleryimage-element:nth-child({img_nr + 1})')
|
|
||||||
img_element = new_div.find_element(By.XPATH, './/img')
|
|
||||||
except NoSuchElementException:
|
|
||||||
logger.error('NEXT button in image gallery somehow missing, abort image fetching.')
|
|
||||||
break
|
|
||||||
img_nr += 1
|
|
||||||
logger.info('Downloaded %d image(s).', dl_counter)
|
|
||||||
|
|
||||||
except NoSuchElementException: # some ads do not require images
|
|
||||||
logger.warning('No image area found. Continue without downloading images.')
|
|
||||||
|
|
||||||
return img_paths
|
|
||||||
|
|
||||||
def extract_ad_page_info(self, directory:str, id_:int) -> dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Extracts all necessary information from an ad´s page.
|
|
||||||
|
|
||||||
:param directory: the path of the ad´s previously created directory
|
|
||||||
:param id_: the ad ID, already extracted by a calling function
|
|
||||||
:return: a dictionary with the keys as given in an ad YAML, and their respective values
|
|
||||||
"""
|
|
||||||
info:dict[str, Any] = {'active': True}
|
|
||||||
|
|
||||||
# extract basic info
|
|
||||||
info['type'] = 'OFFER' if 's-anzeige' in self.webdriver.current_url else 'WANTED'
|
|
||||||
title:str = self.webdriver.find_element(By.CSS_SELECTOR, '#viewad-title').text
|
|
||||||
LOG.info('Extracting information from ad with title \"%s\"', title)
|
|
||||||
info['title'] = title
|
|
||||||
descr:str = self.webdriver.find_element(By.XPATH, '//*[@id="viewad-description-text"]').text
|
|
||||||
info['description'] = descr
|
|
||||||
|
|
||||||
extractor = extract.AdExtractor(self.webdriver)
|
|
||||||
|
|
||||||
# extract category
|
|
||||||
info['category'] = extractor.extract_category_from_ad_page()
|
|
||||||
|
|
||||||
# get special attributes
|
|
||||||
info['special_attributes'] = extractor.extract_special_attributes_from_ad_page()
|
|
||||||
|
|
||||||
# process pricing
|
|
||||||
info['price'], info['price_type'] = extractor.extract_pricing_info_from_ad_page()
|
|
||||||
|
|
||||||
# process shipping
|
|
||||||
info['shipping_type'], info['shipping_costs'], info['shipping_options'] = extractor.extract_shipping_info_from_ad_page()
|
|
||||||
info['sell_directly'] = extractor.extract_sell_directly_from_ad_page()
|
|
||||||
|
|
||||||
# fetch images
|
|
||||||
info['images'] = self.download_images_from_ad_page(directory, id_, LOG)
|
|
||||||
|
|
||||||
# process address
|
|
||||||
info['contact'] = extractor.extract_contact_from_ad_page()
|
|
||||||
|
|
||||||
# process meta info
|
|
||||||
info['republication_interval'] = 7 # a default value for downloaded ads
|
|
||||||
info['id'] = id_
|
|
||||||
|
|
||||||
try: # try different locations known for creation date element
|
|
||||||
creation_date = self.webdriver.find_element(By.XPATH, '/html/body/div[1]/div[2]/div/section[2]/section/section/article/div[3]/div[2]/div[2]/'
|
|
||||||
'div[1]/span').text
|
|
||||||
except NoSuchElementException:
|
|
||||||
creation_date = self.webdriver.find_element(By.CSS_SELECTOR, '#viewad-extra-info > div:nth-child(1) > span:nth-child(2)').text
|
|
||||||
|
|
||||||
# convert creation date to ISO format
|
|
||||||
created_parts = creation_date.split('.')
|
|
||||||
creation_date = created_parts[2] + '-' + created_parts[1] + '-' + created_parts[0] + ' 00:00:00'
|
|
||||||
creation_date = datetime.fromisoformat(creation_date).isoformat()
|
|
||||||
info['created_on'] = creation_date
|
|
||||||
info['updated_on'] = None # will be set later on
|
|
||||||
|
|
||||||
return info
|
|
||||||
|
|
||||||
def download_ad_page(self, id_:int) -> None:
|
|
||||||
"""
|
|
||||||
Downloads an ad to a specific location, specified by config and ad ID.
|
|
||||||
NOTE: Requires that the driver session currently is on the ad page.
|
|
||||||
|
|
||||||
:param id_: the ad ID
|
|
||||||
"""
|
|
||||||
|
|
||||||
# create sub-directory for ad(s) to download (if necessary):
|
|
||||||
relative_directory = 'downloaded-ads'
|
|
||||||
# make sure configured base directory exists
|
|
||||||
if not os.path.exists(relative_directory) or not os.path.isdir(relative_directory):
|
|
||||||
os.mkdir(relative_directory)
|
|
||||||
LOG.info('Created ads directory at /%s.', relative_directory)
|
|
||||||
|
|
||||||
new_base_dir = os.path.join(relative_directory, f'ad_{id_}')
|
|
||||||
if os.path.exists(new_base_dir):
|
|
||||||
LOG.info('Deleting current folder of ad...')
|
|
||||||
shutil.rmtree(new_base_dir)
|
|
||||||
os.mkdir(new_base_dir)
|
|
||||||
LOG.info('New directory for ad created at %s.', new_base_dir)
|
|
||||||
|
|
||||||
# call extraction function
|
|
||||||
info = self.extract_ad_page_info(new_base_dir, id_)
|
|
||||||
ad_file_path = new_base_dir + '/' + f'ad_{id_}.yaml'
|
|
||||||
utils.save_dict(ad_file_path, info)
|
|
||||||
|
|
||||||
def start_download_routine(self) -> None:
|
|
||||||
"""
|
"""
|
||||||
Determines which download mode was chosen with the arguments, and calls the specified download routine.
|
Determines which download mode was chosen with the arguments, and calls the specified download routine.
|
||||||
This downloads either all, only unsaved (new), or specific ads given by ID.
|
This downloads either all, only unsaved (new), or specific ads given by ID.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
ad_extractor = extract.AdExtractor(self.browser)
|
||||||
|
|
||||||
# use relevant download routine
|
# use relevant download routine
|
||||||
if self.ads_selector in {'all', 'new'}: # explore ads overview for these two modes
|
if self.ads_selector in {'all', 'new'}: # explore ads overview for these two modes
|
||||||
LOG.info('Scanning your ad overview...')
|
LOG.info('Scanning your ad overview...')
|
||||||
ext = extract.AdExtractor(self.webdriver)
|
own_ad_urls = await ad_extractor.extract_own_ads_urls()
|
||||||
refs = ext.extract_own_ads_references()
|
LOG.info('%d ads were found!', len(own_ad_urls))
|
||||||
LOG.info('%d ads were found!', len(refs))
|
|
||||||
|
|
||||||
if self.ads_selector == 'all': # download all of your adds
|
if self.ads_selector == 'all': # download all of your adds
|
||||||
LOG.info('Start fetch task for all your ads!')
|
LOG.info('Start fetch task for all your ads!')
|
||||||
|
|
||||||
success_count = 0
|
success_count = 0
|
||||||
# call download function for each ad page
|
# call download function for each ad page
|
||||||
for ref in refs:
|
for add_url in own_ad_urls:
|
||||||
ref_ad_id: int = utils.extract_ad_id_from_ad_link(ref)
|
ad_id = ad_extractor.extract_ad_id_from_ad_url(add_url)
|
||||||
if self.navigate_to_ad_page(url = ref):
|
if await ad_extractor.naviagte_to_ad_page(add_url):
|
||||||
self.download_ad_page(ref_ad_id)
|
await ad_extractor.download_ad(ad_id)
|
||||||
success_count += 1
|
success_count += 1
|
||||||
LOG.info("%d of %d ads were downloaded from your profile.", success_count, len(refs))
|
LOG.info("%d of %d ads were downloaded from your profile.", success_count, len(own_ad_urls))
|
||||||
|
|
||||||
elif self.ads_selector == 'new': # download only unsaved ads
|
elif self.ads_selector == 'new': # download only unsaved ads
|
||||||
# determine ad IDs from links
|
|
||||||
ref_ad_ids = [utils.extract_ad_id_from_ad_link(r) for r in refs]
|
|
||||||
ref_pairs = list(zip(refs, ref_ad_ids))
|
|
||||||
|
|
||||||
# check which ads already saved
|
# check which ads already saved
|
||||||
saved_ad_ids = []
|
saved_ad_ids = []
|
||||||
ads = self.load_ads(ignore_inactive = False, check_id = False) # do not skip because of existing IDs
|
ads = self.load_ads(ignore_inactive = False, check_id = False) # do not skip because of existing IDs
|
||||||
for ad_ in ads:
|
for ad in ads:
|
||||||
ad_id = int(ad_[2]['id'])
|
ad_id = int(ad[2]['id'])
|
||||||
saved_ad_ids.append(ad_id)
|
saved_ad_ids.append(ad_id)
|
||||||
|
|
||||||
|
# determine ad IDs from links
|
||||||
|
ad_id_by_url = {url:ad_extractor.extract_ad_id_from_ad_url(url) for url in own_ad_urls}
|
||||||
|
|
||||||
LOG.info('Start fetch task for your unsaved ads!')
|
LOG.info('Start fetch task for your unsaved ads!')
|
||||||
new_count = 0
|
new_count = 0
|
||||||
for ref_pair in ref_pairs:
|
for ad_url, ad_id in ad_id_by_url.items():
|
||||||
# check if ad with ID already saved
|
# check if ad with ID already saved
|
||||||
id_: int = ref_pair[1]
|
if ad_id in saved_ad_ids:
|
||||||
if id_ in saved_ad_ids:
|
LOG.info('The ad with id %d has already been saved.', ad_id)
|
||||||
LOG.info('The ad with id %d has already been saved.', id_)
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if self.navigate_to_ad_page(url = ref_pair[0]):
|
if await ad_extractor.naviagte_to_ad_page(ad_url):
|
||||||
self.download_ad_page(id_)
|
await ad_extractor.download_ad(ad_id)
|
||||||
new_count += 1
|
new_count += 1
|
||||||
LOG.info('%d new ad(s) were downloaded from your profile.', new_count)
|
LOG.info('%d new ad(s) were downloaded from your profile.', new_count)
|
||||||
|
|
||||||
elif re.compile(r'\d+[,\d+]*').search(self.ads_selector): # download ad(s) with specific id(s)
|
elif re.compile(r'\d+[,\d+]*').search(self.ads_selector): # download ad(s) with specific id(s)
|
||||||
ids = [int(n) for n in self.ads_selector.split(',')]
|
ids = [int(n) for n in self.ads_selector.split(',')]
|
||||||
LOG.info('Start fetch task for the ad(s) with the id(s):')
|
LOG.info('Start fetch task for the ad(s) with the id(s):')
|
||||||
LOG.info(' | '.join([str(id_) for id_ in ids]))
|
LOG.info(' | '.join([str(ad_id) for ad_id in ids]))
|
||||||
|
|
||||||
for id_ in ids: # call download routine for every id
|
for ad_id in ids: # call download routine for every id
|
||||||
exists = self.navigate_to_ad_page(id_)
|
exists = await ad_extractor.naviagte_to_ad_page(ad_id)
|
||||||
if exists:
|
if exists:
|
||||||
self.download_ad_page(id_)
|
await ad_extractor.download_ad(ad_id)
|
||||||
LOG.info('Downloaded ad with id %d', id_)
|
LOG.info('Downloaded ad with id %d', ad_id)
|
||||||
else:
|
else:
|
||||||
LOG.error('The page with the id %d does not exist!', id_)
|
LOG.error('The page with the id %d does not exist!', ad_id)
|
||||||
|
|
||||||
|
|
||||||
#############################
|
#############################
|
||||||
@@ -1065,7 +823,9 @@ def main(args:list[str]) -> None:
|
|||||||
sys.excepthook = utils.on_exception
|
sys.excepthook = utils.on_exception
|
||||||
atexit.register(utils.on_exit)
|
atexit.register(utils.on_exit)
|
||||||
|
|
||||||
KleinanzeigenBot().run(args)
|
bot = KleinanzeigenBot()
|
||||||
|
atexit.register(bot.close_browser_session)
|
||||||
|
nodriver.loop().run_until_complete(bot.run(args))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -3,51 +3,291 @@ SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
|||||||
SPDX-License-Identifier: AGPL-3.0-or-later
|
SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||||
"""
|
"""
|
||||||
import json
|
import json, logging, os, shutil
|
||||||
from decimal import DecimalException
|
import urllib.request as urllib_request
|
||||||
from typing import Any
|
from datetime import datetime
|
||||||
|
from typing import Any, Final
|
||||||
|
|
||||||
from selenium.common.exceptions import NoSuchElementException
|
from .utils import is_integer, parse_decimal, save_dict
|
||||||
from selenium.webdriver.common.by import By
|
from .web_scraping_mixin import Browser, By, Element, Is, WebScrapingMixin
|
||||||
from selenium.webdriver.remote.webdriver import WebDriver
|
|
||||||
import selenium.webdriver.support.expected_conditions as EC
|
|
||||||
|
|
||||||
from .selenium_mixin import SeleniumMixin
|
LOG:Final[logging.Logger] = logging.getLogger("kleinanzeigen_bot.AdExtractor")
|
||||||
from .utils import parse_decimal, pause
|
|
||||||
|
|
||||||
|
|
||||||
class AdExtractor(SeleniumMixin):
|
class AdExtractor(WebScrapingMixin):
|
||||||
"""
|
"""
|
||||||
Wrapper class for ad extraction that uses an active bot´s web driver to extract specific elements from an ad page.
|
Wrapper class for ad extraction that uses an active bot´s browser session to extract specific elements from an ad page.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, driver:WebDriver):
|
def __init__(self, browser:Browser):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.webdriver = driver
|
self.browser = browser
|
||||||
|
|
||||||
def extract_category_from_ad_page(self) -> str:
|
async def download_ad(self, ad_id:int) -> None:
|
||||||
|
"""
|
||||||
|
Downloads an ad to a specific location, specified by config and ad ID.
|
||||||
|
NOTE: Requires that the driver session currently is on the ad page.
|
||||||
|
|
||||||
|
:param ad_id: the ad ID
|
||||||
|
"""
|
||||||
|
|
||||||
|
# create sub-directory for ad(s) to download (if necessary):
|
||||||
|
relative_directory = 'downloaded-ads'
|
||||||
|
# make sure configured base directory exists
|
||||||
|
if not os.path.exists(relative_directory) or not os.path.isdir(relative_directory):
|
||||||
|
os.mkdir(relative_directory)
|
||||||
|
LOG.info('Created ads directory at ./%s.', relative_directory)
|
||||||
|
|
||||||
|
new_base_dir = os.path.join(relative_directory, f'ad_{ad_id}')
|
||||||
|
if os.path.exists(new_base_dir):
|
||||||
|
LOG.info('Deleting current folder of ad...')
|
||||||
|
shutil.rmtree(new_base_dir)
|
||||||
|
os.mkdir(new_base_dir)
|
||||||
|
LOG.info('New directory for ad created at %s.', new_base_dir)
|
||||||
|
|
||||||
|
# call extraction function
|
||||||
|
info = await self._extract_ad_page_info(new_base_dir, ad_id)
|
||||||
|
ad_file_path = new_base_dir + '/' + f'ad_{ad_id}.yaml'
|
||||||
|
save_dict(ad_file_path, info)
|
||||||
|
|
||||||
|
async def _download_images_from_ad_page(self, directory:str, ad_id:int) -> list[str]:
|
||||||
|
"""
|
||||||
|
Downloads all images of an ad.
|
||||||
|
|
||||||
|
:param directory: the path of the directory created for this ad
|
||||||
|
:param ad_id: the ID of the ad to download the images from
|
||||||
|
:return: the relative paths for all downloaded images
|
||||||
|
"""
|
||||||
|
|
||||||
|
n_images:int
|
||||||
|
img_paths = []
|
||||||
|
try:
|
||||||
|
# download all images from box
|
||||||
|
image_box = await self.web_find(By.CLASS_NAME, 'galleryimage-large')
|
||||||
|
|
||||||
|
n_images = len(await self.web_find_all(By.CSS_SELECTOR, '.galleryimage-element[data-ix]', parent = image_box))
|
||||||
|
LOG.info('Found %d images.', n_images)
|
||||||
|
|
||||||
|
img_element:Element = await self.web_find(By.CSS_SELECTOR, 'div:nth-child(1) > img', parent = image_box)
|
||||||
|
img_fn_prefix = 'ad_' + str(ad_id) + '__img'
|
||||||
|
|
||||||
|
img_nr = 1
|
||||||
|
dl_counter = 0
|
||||||
|
while img_nr <= n_images: # scrolling + downloading
|
||||||
|
current_img_url = img_element.attrs['src'] # URL of the image
|
||||||
|
if current_img_url is None:
|
||||||
|
continue
|
||||||
|
file_ending = current_img_url.split('.')[-1].lower()
|
||||||
|
img_path = directory + '/' + img_fn_prefix + str(img_nr) + '.' + file_ending
|
||||||
|
if current_img_url.startswith('https'): # verify https (for Bandit linter)
|
||||||
|
urllib_request.urlretrieve(current_img_url, img_path) # nosec B310
|
||||||
|
dl_counter += 1
|
||||||
|
img_paths.append(img_path.split('/')[-1])
|
||||||
|
|
||||||
|
# navigate to next image (if exists)
|
||||||
|
if img_nr < n_images:
|
||||||
|
try:
|
||||||
|
# click next button, wait, and re-establish reference
|
||||||
|
await (await self.web_find(By.CLASS_NAME, 'galleryimage--navigation--next')).click()
|
||||||
|
new_div = await self.web_find(By.CSS_SELECTOR, f'div.galleryimage-element:nth-child({img_nr + 1})')
|
||||||
|
img_element = await self.web_find(By.TAG_NAME, 'img', parent = new_div)
|
||||||
|
except TimeoutError:
|
||||||
|
LOG.error('NEXT button in image gallery somehow missing, abort image fetching.')
|
||||||
|
break
|
||||||
|
img_nr += 1
|
||||||
|
LOG.info('Downloaded %d image(s).', dl_counter)
|
||||||
|
|
||||||
|
except TimeoutError: # some ads do not require images
|
||||||
|
LOG.warning('No image area found. Continue without downloading images.')
|
||||||
|
|
||||||
|
return img_paths
|
||||||
|
|
||||||
|
def extract_ad_id_from_ad_url(self, url: str) -> int:
|
||||||
|
"""
|
||||||
|
Extracts the ID of an ad, given by its reference link.
|
||||||
|
|
||||||
|
:param url: the URL to the ad page
|
||||||
|
:return: the ad ID, a (ten-digit) integer number
|
||||||
|
"""
|
||||||
|
num_part = url.split('/')[-1] # suffix
|
||||||
|
id_part = num_part.split('-')[0]
|
||||||
|
|
||||||
|
try:
|
||||||
|
return int(id_part)
|
||||||
|
except ValueError:
|
||||||
|
LOG.warning('The ad ID could not be extracted from the given URL %s', url)
|
||||||
|
return -1
|
||||||
|
|
||||||
|
async def extract_own_ads_urls(self) -> list[str]:
|
||||||
|
"""
|
||||||
|
Extracts the references to all own ads.
|
||||||
|
|
||||||
|
:return: the links to your ad pages
|
||||||
|
"""
|
||||||
|
# navigate to "your ads" page
|
||||||
|
await self.web_open('https://www.kleinanzeigen.de/m-meine-anzeigen.html')
|
||||||
|
await self.web_sleep(2000, 3000)
|
||||||
|
|
||||||
|
# collect ad references:
|
||||||
|
pagination_section = await self.web_find(By.CSS_SELECTOR, 'section:nth-of-type(4)',
|
||||||
|
parent = await self.web_find(By.CSS_SELECTOR, '.l-splitpage'))
|
||||||
|
|
||||||
|
# scroll down to load dynamically
|
||||||
|
await self.web_scroll_page_down()
|
||||||
|
await self.web_sleep(2000, 3000)
|
||||||
|
|
||||||
|
# detect multi-page
|
||||||
|
try:
|
||||||
|
pagination = await self.web_find(By.CSS_SELECTOR, 'div > div:nth-of-type(2) > div:nth-of-type(2) > div',
|
||||||
|
parent = pagination_section)
|
||||||
|
except TimeoutError: # 0 ads - no pagination area
|
||||||
|
LOG.warning('There are currently no ads on your profile!')
|
||||||
|
return []
|
||||||
|
|
||||||
|
n_buttons = len(await self.web_find_all(By.CSS_SELECTOR, 'button',
|
||||||
|
parent = await self.web_find(By.CSS_SELECTOR, 'div:nth-of-type(1)', parent = pagination)))
|
||||||
|
if n_buttons > 1:
|
||||||
|
multi_page = True
|
||||||
|
LOG.info('It seems like you have many ads!')
|
||||||
|
else:
|
||||||
|
multi_page = False
|
||||||
|
LOG.info('It seems like all your ads fit on one overview page.')
|
||||||
|
|
||||||
|
refs:list[str] = []
|
||||||
|
while True: # loop reference extraction until no more forward page
|
||||||
|
# extract references
|
||||||
|
list_items = await self.web_find_all(By.CLASS_NAME, 'cardbox',
|
||||||
|
parent = await self.web_find(By.ID, 'my-manageads-adlist'))
|
||||||
|
refs += [
|
||||||
|
(await self.web_find(By.CSS_SELECTOR, 'article > section > section:nth-of-type(2) > h2 > div > a', parent = li)).attrs['href']
|
||||||
|
for li in list_items
|
||||||
|
]
|
||||||
|
|
||||||
|
if not multi_page: # only one iteration for single-page overview
|
||||||
|
break
|
||||||
|
# check if last page
|
||||||
|
nav_button:Element = (await self.web_find_all(By.CSS_SELECTOR, 'button.jsx-2828608826'))[-1]
|
||||||
|
if nav_button.attrs['title'] != 'Nächste':
|
||||||
|
LOG.info('Last ad overview page explored.')
|
||||||
|
break
|
||||||
|
# navigate to next overview page
|
||||||
|
await nav_button.click()
|
||||||
|
await self.web_sleep(2000, 3000)
|
||||||
|
await self.web_scroll_page_down()
|
||||||
|
|
||||||
|
return refs
|
||||||
|
|
||||||
|
async def naviagte_to_ad_page(self, id_or_url:int | str) -> bool:
|
||||||
|
"""
|
||||||
|
Navigates to an ad page specified with an ad ID; or alternatively by a given URL.
|
||||||
|
:return: whether the navigation to the ad page was successful
|
||||||
|
"""
|
||||||
|
if is_integer(id_or_url):
|
||||||
|
# enter the ad ID into the search bar
|
||||||
|
await self.web_input(By.ID, "site-search-query", id_or_url)
|
||||||
|
# navigate to ad page and wait
|
||||||
|
await self.web_check(By.ID, 'site-search-submit', Is.CLICKABLE)
|
||||||
|
submit_button = await self.web_find(By.ID, 'site-search-submit')
|
||||||
|
await submit_button.click()
|
||||||
|
else:
|
||||||
|
await self.web_open(str(id_or_url)) # navigate to URL directly given
|
||||||
|
await self.web_sleep()
|
||||||
|
|
||||||
|
# handle the case that invalid ad ID given
|
||||||
|
if self.page.url.endswith('k0'):
|
||||||
|
LOG.error('There is no ad under the given ID.')
|
||||||
|
return False
|
||||||
|
|
||||||
|
# close (warning) popup, if given
|
||||||
|
try:
|
||||||
|
await self.web_find(By.ID, 'vap-ovrly-secure')
|
||||||
|
LOG.warning('A popup appeared.')
|
||||||
|
await self.web_click(By.CLASS_NAME, 'mfp-close')
|
||||||
|
await self.web_sleep()
|
||||||
|
except TimeoutError:
|
||||||
|
pass
|
||||||
|
return True
|
||||||
|
|
||||||
|
async def _extract_ad_page_info(self, directory:str, ad_id:int) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Extracts all necessary information from an ad´s page.
|
||||||
|
|
||||||
|
:param directory: the path of the ad´s previously created directory
|
||||||
|
:param ad_id: the ad ID, already extracted by a calling function
|
||||||
|
:return: a dictionary with the keys as given in an ad YAML, and their respective values
|
||||||
|
"""
|
||||||
|
info:dict[str, Any] = {'active': True}
|
||||||
|
|
||||||
|
# extract basic info
|
||||||
|
info['type'] = 'OFFER' if 's-anzeige' in self.page.url else 'WANTED'
|
||||||
|
title:str = await self.web_text(By.ID, 'viewad-title')
|
||||||
|
LOG.info('Extracting information from ad with title \"%s\"', title)
|
||||||
|
info['title'] = title
|
||||||
|
|
||||||
|
descr:str = await self.web_text(By.ID, 'viewad-description-text')
|
||||||
|
info['description'] = descr
|
||||||
|
|
||||||
|
# extract category
|
||||||
|
info['category'] = await self._extract_category_from_ad_page()
|
||||||
|
|
||||||
|
# get special attributes
|
||||||
|
info['special_attributes'] = await self._extract_special_attributes_from_ad_page()
|
||||||
|
|
||||||
|
# process pricing
|
||||||
|
info['price'], info['price_type'] = await self._extract_pricing_info_from_ad_page()
|
||||||
|
|
||||||
|
# process shipping
|
||||||
|
info['shipping_type'], info['shipping_costs'], info['shipping_options'] = await self._extract_shipping_info_from_ad_page()
|
||||||
|
info['sell_directly'] = await self._extract_sell_directly_from_ad_page()
|
||||||
|
|
||||||
|
# fetch images
|
||||||
|
info['images'] = await self._download_images_from_ad_page(directory, ad_id)
|
||||||
|
|
||||||
|
# process address
|
||||||
|
info['contact'] = await self._extract_contact_from_ad_page()
|
||||||
|
|
||||||
|
# process meta info
|
||||||
|
info['republication_interval'] = 7 # a default value for downloaded ads
|
||||||
|
info['id'] = ad_id
|
||||||
|
|
||||||
|
try: # try different locations known for creation date element
|
||||||
|
creation_date = await self.web_text(By.XPATH,
|
||||||
|
'/html/body/div[1]/div[2]/div/section[2]/section/section/article/div[3]/div[2]/div[2]/div[1]/span')
|
||||||
|
except TimeoutError:
|
||||||
|
creation_date = await self.web_text(By.CSS_SELECTOR, '#viewad-extra-info > div:nth-child(1) > span:nth-child(2)')
|
||||||
|
|
||||||
|
# convert creation date to ISO format
|
||||||
|
created_parts = creation_date.split('.')
|
||||||
|
creation_date = created_parts[2] + '-' + created_parts[1] + '-' + created_parts[0] + ' 00:00:00'
|
||||||
|
creation_date = datetime.fromisoformat(creation_date).isoformat()
|
||||||
|
info['created_on'] = creation_date
|
||||||
|
info['updated_on'] = None # will be set later on
|
||||||
|
|
||||||
|
return info
|
||||||
|
|
||||||
|
async def _extract_category_from_ad_page(self) -> str:
|
||||||
"""
|
"""
|
||||||
Extracts a category of an ad in numerical form.
|
Extracts a category of an ad in numerical form.
|
||||||
Assumes that the web driver currently shows an ad page.
|
Assumes that the web driver currently shows an ad page.
|
||||||
|
|
||||||
:return: a category string of form abc/def, where a-f are digits
|
:return: a category string of form abc/def, where a-f are digits
|
||||||
"""
|
"""
|
||||||
category_line = self.webdriver.find_element(By.XPATH, '//*[@id="vap-brdcrmb"]')
|
category_line = await self.web_find(By.ID, 'vap-brdcrmb')
|
||||||
category_first_part = category_line.find_element(By.XPATH, './/a[2]')
|
category_first_part = await self.web_find(By.CSS_SELECTOR, 'a:nth-of-type(2)', parent = category_line)
|
||||||
category_second_part = category_line.find_element(By.XPATH, './/a[3]')
|
category_second_part = await self.web_find(By.CSS_SELECTOR, 'a:nth-of-type(3)', parent = category_line)
|
||||||
cat_num_first = category_first_part.get_attribute('href').split('/')[-1][1:]
|
cat_num_first = category_first_part.attrs['href'].split('/')[-1][1:]
|
||||||
cat_num_second = category_second_part.get_attribute('href').split('/')[-1][1:]
|
cat_num_second = category_second_part.attrs['href'].split('/')[-1][1:]
|
||||||
category:str = cat_num_first + '/' + cat_num_second
|
category:str = cat_num_first + '/' + cat_num_second
|
||||||
|
|
||||||
return category
|
return category
|
||||||
|
|
||||||
def extract_special_attributes_from_ad_page(self) -> dict[str, Any]:
|
async def _extract_special_attributes_from_ad_page(self) -> dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Extracts the special attributes from an ad page.
|
Extracts the special attributes from an ad page.
|
||||||
|
|
||||||
:return: a dictionary (possibly empty) where the keys are the attribute names, mapped to their values
|
:return: a dictionary (possibly empty) where the keys are the attribute names, mapped to their values
|
||||||
"""
|
"""
|
||||||
belen_conf = self.webdriver.execute_script("return window.BelenConf")
|
belen_conf = await self.web_execute("window.BelenConf")
|
||||||
special_attributes_str = belen_conf["universalAnalyticsOpts"]["dimensions"]["dimension108"]
|
special_attributes_str = belen_conf["universalAnalyticsOpts"]["dimensions"]["dimension108"]
|
||||||
special_attributes = json.loads(special_attributes_str)
|
special_attributes = json.loads(special_attributes_str)
|
||||||
if not isinstance(special_attributes, dict):
|
if not isinstance(special_attributes, dict):
|
||||||
@@ -58,36 +298,32 @@ class AdExtractor(SeleniumMixin):
|
|||||||
special_attributes = {k: v for k, v in special_attributes.items() if not k.endswith('.versand_s')}
|
special_attributes = {k: v for k, v in special_attributes.items() if not k.endswith('.versand_s')}
|
||||||
return special_attributes
|
return special_attributes
|
||||||
|
|
||||||
def extract_pricing_info_from_ad_page(self) -> tuple[float | None, str]:
|
async def _extract_pricing_info_from_ad_page(self) -> tuple[float | None, str]:
|
||||||
"""
|
"""
|
||||||
Extracts the pricing information (price and pricing type) from an ad page.
|
Extracts the pricing information (price and pricing type) from an ad page.
|
||||||
|
|
||||||
:return: the price of the offer (optional); and the pricing type
|
:return: the price of the offer (optional); and the pricing type
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
price_str:str = self.webdriver.find_element(By.CLASS_NAME, 'boxedarticle--price').text
|
price_str:str = await self.web_text(By.ID, 'viewad-price')
|
||||||
price_type:str
|
price:int | None = None
|
||||||
price:float | None = -1
|
|
||||||
match price_str.split()[-1]:
|
match price_str.split()[-1]:
|
||||||
case '€':
|
case '€':
|
||||||
price_type = 'FIXED'
|
price_type = 'FIXED'
|
||||||
price = float(parse_decimal(price_str.split()[0].replace('.', '')))
|
price = int(price_str.split()[0])
|
||||||
case 'VB': # can be either 'X € VB', or just 'VB'
|
case 'VB':
|
||||||
price_type = 'NEGOTIABLE'
|
price_type = 'NEGOTIABLE'
|
||||||
try:
|
if not price_str == "VB": # can be either 'X € VB', or just 'VB'
|
||||||
price = float(parse_decimal(price_str.split()[0].replace('.', '')))
|
price = int(price_str.split()[0])
|
||||||
except DecimalException:
|
|
||||||
price = None
|
|
||||||
case 'verschenken':
|
case 'verschenken':
|
||||||
price_type = 'GIVE_AWAY'
|
price_type = 'GIVE_AWAY'
|
||||||
price = None
|
|
||||||
case _:
|
case _:
|
||||||
price_type = 'NOT_APPLICABLE'
|
price_type = 'NOT_APPLICABLE'
|
||||||
return price, price_type
|
return price, price_type
|
||||||
except NoSuchElementException: # no 'commercial' ad, has no pricing box etc.
|
except TimeoutError: # no 'commercial' ad, has no pricing box etc.
|
||||||
return None, 'NOT_APPLICABLE'
|
return None, 'NOT_APPLICABLE'
|
||||||
|
|
||||||
def extract_shipping_info_from_ad_page(self) -> tuple[str, float | None, list[str] | None]:
|
async def _extract_shipping_info_from_ad_page(self) -> tuple[str, float | None, list[str] | None]:
|
||||||
"""
|
"""
|
||||||
Extracts shipping information from an ad page.
|
Extracts shipping information from an ad page.
|
||||||
|
|
||||||
@@ -95,8 +331,7 @@ class AdExtractor(SeleniumMixin):
|
|||||||
"""
|
"""
|
||||||
ship_type, ship_costs, shipping_options = 'NOT_APPLICABLE', None, None
|
ship_type, ship_costs, shipping_options = 'NOT_APPLICABLE', None, None
|
||||||
try:
|
try:
|
||||||
shipping_text = self.webdriver.find_element(By.CSS_SELECTOR, '.boxedarticle--details--shipping') \
|
shipping_text = await self.web_text(By.ID, 'boxedarticle--details--shipping')
|
||||||
.text.strip()
|
|
||||||
# e.g. '+ Versand ab 5,49 €' OR 'Nur Abholung'
|
# e.g. '+ Versand ab 5,49 €' OR 'Nur Abholung'
|
||||||
if shipping_text == 'Nur Abholung':
|
if shipping_text == 'Nur Abholung':
|
||||||
ship_type = 'PICKUP'
|
ship_type = 'PICKUP'
|
||||||
@@ -124,115 +359,58 @@ class AdExtractor(SeleniumMixin):
|
|||||||
if shipping_price in shipping_text:
|
if shipping_price in shipping_text:
|
||||||
shipping_options = [shipping_option]
|
shipping_options = [shipping_option]
|
||||||
break
|
break
|
||||||
except NoSuchElementException: # no pricing box -> no shipping given
|
except TimeoutError: # no pricing box -> no shipping given
|
||||||
ship_type = 'NOT_APPLICABLE'
|
ship_type = 'NOT_APPLICABLE'
|
||||||
|
|
||||||
return ship_type, ship_costs, shipping_options
|
return ship_type, ship_costs, shipping_options
|
||||||
|
|
||||||
def extract_sell_directly_from_ad_page(self) -> bool | None:
|
async def _extract_sell_directly_from_ad_page(self) -> bool | None:
|
||||||
"""
|
"""
|
||||||
Extracts the sell directly option from an ad page.
|
Extracts the sell directly option from an ad page.
|
||||||
|
|
||||||
:return: a boolean indicating whether the sell directly option is active (optional)
|
:return: a boolean indicating whether the sell directly option is active (optional)
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
buy_now_is_active = self.webdriver.find_element(By.ID, 'j-buy-now').text == "Direkt kaufen"
|
buy_now_is_active:bool = (await self.web_text(By.ID, 'j-buy-now')) == "Direkt kaufen"
|
||||||
return buy_now_is_active
|
return buy_now_is_active
|
||||||
except NoSuchElementException:
|
except TimeoutError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def extract_contact_from_ad_page(self) -> dict[str, (str | None)]:
|
async def _extract_contact_from_ad_page(self) -> dict[str, (str | None)]:
|
||||||
"""
|
"""
|
||||||
Processes the address part involving street (optional), zip code + city, and phone number (optional).
|
Processes the address part involving street (optional), zip code + city, and phone number (optional).
|
||||||
|
|
||||||
:return: a dictionary containing the address parts with their corresponding values
|
:return: a dictionary containing the address parts with their corresponding values
|
||||||
"""
|
"""
|
||||||
contact:dict[str, (str | None)] = {}
|
contact:dict[str, (str | None)] = {}
|
||||||
address_element = self.webdriver.find_element(By.CSS_SELECTOR, '#viewad-locality')
|
address_text = await self.web_text(By.ID, 'viewad-locality')
|
||||||
address_text = address_element.text.strip()
|
|
||||||
# format: e.g. (Beispiel Allee 42,) 12345 Bundesland - Stadt
|
# format: e.g. (Beispiel Allee 42,) 12345 Bundesland - Stadt
|
||||||
try:
|
try:
|
||||||
street_element = self.webdriver.find_element(By.XPATH, '//*[@id="street-address"]')
|
street = (await self.web_text(By.ID, 'street-address'))[:-1] # trailing comma
|
||||||
street = street_element.text[:-2] # trailing comma and whitespace
|
|
||||||
contact['street'] = street
|
contact['street'] = street
|
||||||
except NoSuchElementException:
|
except TimeoutError:
|
||||||
print('No street given in the contact.')
|
LOG.info('No street given in the contact.')
|
||||||
# construct remaining address
|
# construct remaining address
|
||||||
address_halves = address_text.split(' - ')
|
address_halves = address_text.split(' - ')
|
||||||
address_left_parts = address_halves[0].split(' ') # zip code and region/city
|
address_left_parts = address_halves[0].split(' ') # zip code and region/city
|
||||||
contact['zipcode'] = address_left_parts[0]
|
contact['zipcode'] = address_left_parts[0]
|
||||||
|
|
||||||
contact_person_element = self.webdriver.find_element(By.CSS_SELECTOR, '#viewad-contact')
|
contact_person_element:Element = await self.web_find(By.ID, 'viewad-contact')
|
||||||
name_element = contact_person_element.find_element(By.CLASS_NAME, 'iconlist-text')
|
name_element = await self.web_find(By.CLASS_NAME, 'iconlist-text', parent = contact_person_element)
|
||||||
try:
|
try:
|
||||||
name = name_element.find_element(By.TAG_NAME, 'a').text
|
name = await self.web_text(By.TAG_NAME, 'a', parent = name_element)
|
||||||
except NoSuchElementException: # edge case: name without link
|
except TimeoutError: # edge case: name without link
|
||||||
name = name_element.find_element(By.TAG_NAME, 'span').text
|
name = await self.web_text(By.TAG_NAME, 'span', parent = name_element)
|
||||||
contact['name'] = name
|
contact['name'] = name
|
||||||
|
|
||||||
if 'street' not in contact:
|
if 'street' not in contact:
|
||||||
contact['street'] = None
|
contact['street'] = None
|
||||||
try: # phone number is unusual for non-professional sellers today
|
try: # phone number is unusual for non-professional sellers today
|
||||||
phone_element = self.webdriver.find_element(By.CSS_SELECTOR, '#viewad-contact-phone')
|
phone_element = await self.web_find(By.ID, 'viewad-contact-phone')
|
||||||
phone_number = phone_element.find_element(By.TAG_NAME, 'a').text
|
phone_number = await self.web_text(By.TAG_NAME, 'a', parent = phone_element)
|
||||||
contact['phone'] = ''.join(phone_number.replace('-', ' ').split(' ')).replace('+49(0)', '0')
|
contact['phone'] = ''.join(phone_number.replace('-', ' ').split(' ')).replace('+49(0)', '0')
|
||||||
except NoSuchElementException:
|
except TimeoutError:
|
||||||
contact['phone'] = None # phone seems to be a deprecated feature (for non-professional users)
|
contact['phone'] = None # phone seems to be a deprecated feature (for non-professional users)
|
||||||
# also see 'https://themen.kleinanzeigen.de/hilfe/deine-anzeigen/Telefon/
|
# also see 'https://themen.kleinanzeigen.de/hilfe/deine-anzeigen/Telefon/
|
||||||
|
|
||||||
return contact
|
return contact
|
||||||
|
|
||||||
def extract_own_ads_references(self) -> list[str]:
|
|
||||||
"""
|
|
||||||
Extracts the references to all own ads.
|
|
||||||
|
|
||||||
:return: the links to your ad pages
|
|
||||||
"""
|
|
||||||
# navigate to your ads page
|
|
||||||
self.webdriver.get('https://www.kleinanzeigen.de/m-meine-anzeigen.html')
|
|
||||||
self.web_await(EC.url_contains('meine-anzeigen'), 15)
|
|
||||||
pause(2000, 3000)
|
|
||||||
|
|
||||||
# collect ad references:
|
|
||||||
|
|
||||||
pagination_section = self.webdriver.find_element(By.CSS_SELECTOR, '.l-splitpage')\
|
|
||||||
.find_element(By.XPATH, './/section[4]')
|
|
||||||
# scroll down to load dynamically
|
|
||||||
self.web_scroll_page_down()
|
|
||||||
pause(2000, 3000)
|
|
||||||
# detect multi-page
|
|
||||||
try:
|
|
||||||
pagination = pagination_section.find_element(By.XPATH, './/div/div[2]/div[2]/div') # Pagination
|
|
||||||
except NoSuchElementException: # 0 ads - no pagination area
|
|
||||||
print('There currently seem to be no ads on your profile!')
|
|
||||||
return []
|
|
||||||
|
|
||||||
n_buttons = len(pagination.find_element(By.XPATH, './/div[1]').find_elements(By.TAG_NAME, 'button'))
|
|
||||||
multi_page:bool
|
|
||||||
if n_buttons > 1:
|
|
||||||
multi_page = True
|
|
||||||
print('It seems like you have many ads!')
|
|
||||||
else:
|
|
||||||
multi_page = False
|
|
||||||
print('It seems like all your ads fit on one overview page.')
|
|
||||||
|
|
||||||
refs:list[str] = []
|
|
||||||
while True: # loop reference extraction until no more forward page
|
|
||||||
# extract references
|
|
||||||
list_section = self.webdriver.find_element(By.XPATH, '//*[@id="my-manageads-adlist"]')
|
|
||||||
list_items = list_section.find_elements(By.CLASS_NAME, 'cardbox')
|
|
||||||
refs += [li.find_element(By.XPATH, 'article/section/section[2]/h2/div/a').get_attribute('href') for li in list_items]
|
|
||||||
|
|
||||||
if not multi_page: # only one iteration for single-page overview
|
|
||||||
break
|
|
||||||
# check if last page
|
|
||||||
nav_button = self.webdriver.find_elements(By.CSS_SELECTOR, 'button.jsx-2828608826')[-1]
|
|
||||||
if nav_button.get_attribute('title') != 'Nächste':
|
|
||||||
print('Last ad overview page explored.')
|
|
||||||
break
|
|
||||||
# navigate to next overview page
|
|
||||||
nav_button.click()
|
|
||||||
pause(2000, 3000)
|
|
||||||
self.web_scroll_page_down()
|
|
||||||
|
|
||||||
return refs
|
|
||||||
|
|||||||
@@ -27,12 +27,7 @@ categories: []
|
|||||||
# browser configuration
|
# browser configuration
|
||||||
browser:
|
browser:
|
||||||
# https://peter.sh/experiments/chromium-command-line-switches/
|
# https://peter.sh/experiments/chromium-command-line-switches/
|
||||||
arguments:
|
arguments: []
|
||||||
# https://stackoverflow.com/a/50725918/5116073
|
|
||||||
- --disable-dev-shm-usage
|
|
||||||
- --no-sandbox
|
|
||||||
# --headless
|
|
||||||
# --start-maximized
|
|
||||||
binary_location: # path to custom browser executable, if not specified will be looked up on PATH
|
binary_location: # path to custom browser executable, if not specified will be looked up on PATH
|
||||||
extensions: [] # a list of .crx extension files to be loaded
|
extensions: [] # a list of .crx extension files to be loaded
|
||||||
use_private_window: true
|
use_private_window: true
|
||||||
|
|||||||
@@ -1,322 +0,0 @@
|
|||||||
"""
|
|
||||||
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
|
||||||
SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
|
||||||
"""
|
|
||||||
import logging, os, platform, shutil, time
|
|
||||||
from collections.abc import Callable, Iterable
|
|
||||||
from typing import Any, Final, TypeVar
|
|
||||||
|
|
||||||
from selenium import webdriver
|
|
||||||
from selenium.common.exceptions import NoSuchElementException, TimeoutException, WebDriverException
|
|
||||||
from selenium.webdriver.common.by import By
|
|
||||||
from selenium.webdriver.chromium.options import ChromiumOptions
|
|
||||||
from selenium.webdriver.chromium.webdriver import ChromiumDriver
|
|
||||||
from selenium.webdriver.remote.webdriver import WebDriver
|
|
||||||
from selenium.webdriver.remote.webelement import WebElement
|
|
||||||
from selenium.webdriver.support import expected_conditions as EC
|
|
||||||
from selenium.webdriver.support.ui import Select, WebDriverWait
|
|
||||||
import selenium_stealth
|
|
||||||
from .utils import ensure, pause, T
|
|
||||||
|
|
||||||
LOG:Final[logging.Logger] = logging.getLogger("kleinanzeigen_bot.selenium_mixin")
|
|
||||||
|
|
||||||
|
|
||||||
class BrowserConfig:
|
|
||||||
|
|
||||||
def __init__(self) -> None:
|
|
||||||
self.arguments:Iterable[str] = []
|
|
||||||
self.binary_location:str | None = None
|
|
||||||
self.extensions:Iterable[str] = []
|
|
||||||
self.use_private_window:bool = True
|
|
||||||
self.user_data_dir:str = ""
|
|
||||||
self.profile_name:str = ""
|
|
||||||
|
|
||||||
|
|
||||||
CHROMIUM_OPTIONS = TypeVar('CHROMIUM_OPTIONS', bound = ChromiumOptions) # pylint: disable=invalid-name
|
|
||||||
|
|
||||||
|
|
||||||
class SeleniumMixin:
|
|
||||||
|
|
||||||
def __init__(self) -> None:
|
|
||||||
os.environ["SE_AVOID_STATS"] = "true" # see https://www.selenium.dev/documentation/selenium_manager/
|
|
||||||
self.browser_config:Final[BrowserConfig] = BrowserConfig()
|
|
||||||
self.webdriver:WebDriver = None
|
|
||||||
|
|
||||||
def _init_browser_options(self, browser_options:CHROMIUM_OPTIONS) -> CHROMIUM_OPTIONS:
|
|
||||||
if self.browser_config.use_private_window:
|
|
||||||
if isinstance(browser_options, webdriver.EdgeOptions):
|
|
||||||
browser_options.add_argument("-inprivate")
|
|
||||||
else:
|
|
||||||
browser_options.add_argument("--incognito")
|
|
||||||
|
|
||||||
if self.browser_config.user_data_dir:
|
|
||||||
LOG.info(" -> Browser User Data Dir: %s", self.browser_config.user_data_dir)
|
|
||||||
browser_options.add_argument(f"--user-data-dir={self.browser_config.user_data_dir}")
|
|
||||||
|
|
||||||
if self.browser_config.profile_name:
|
|
||||||
LOG.info(" -> Browser Profile Name: %s", self.browser_config.profile_name)
|
|
||||||
browser_options.add_argument(f"--profile-directory={self.browser_config.profile_name}")
|
|
||||||
|
|
||||||
browser_options.add_argument("--disable-crash-reporter")
|
|
||||||
browser_options.add_argument("--no-first-run")
|
|
||||||
browser_options.add_argument("--no-service-autorun")
|
|
||||||
for chrome_option in self.browser_config.arguments:
|
|
||||||
LOG.info(" -> Custom chrome argument: %s", chrome_option)
|
|
||||||
browser_options.add_argument(chrome_option)
|
|
||||||
LOG.debug("Effective browser arguments: %s", browser_options.arguments)
|
|
||||||
|
|
||||||
for crx_extension in self.browser_config.extensions:
|
|
||||||
ensure(os.path.exists(crx_extension), f"Configured extension-file [{crx_extension}] does not exist.")
|
|
||||||
browser_options.add_extension(crx_extension)
|
|
||||||
LOG.debug("Effective browser extensions: %s", browser_options.extensions)
|
|
||||||
|
|
||||||
browser_options.add_experimental_option("excludeSwitches", ["enable-automation"])
|
|
||||||
browser_options.add_experimental_option("useAutomationExtension", False)
|
|
||||||
browser_options.add_experimental_option("prefs", {
|
|
||||||
"credentials_enable_service": False,
|
|
||||||
"profile.password_manager_enabled": False,
|
|
||||||
"profile.default_content_setting_values.notifications": 2, # 1 = allow, 2 = block browser notifications
|
|
||||||
"devtools.preferences.currentDockState": "\"bottom\""
|
|
||||||
})
|
|
||||||
|
|
||||||
if not LOG.isEnabledFor(logging.DEBUG):
|
|
||||||
browser_options.add_argument("--log-level=3") # INFO: 0, WARNING: 1, ERROR: 2, FATAL: 3
|
|
||||||
|
|
||||||
LOG.debug("Effective experimental options: %s", browser_options.experimental_options)
|
|
||||||
|
|
||||||
if self.browser_config.binary_location:
|
|
||||||
browser_options.binary_location = self.browser_config.binary_location
|
|
||||||
LOG.info(" -> Chrome binary location: %s", self.browser_config.binary_location)
|
|
||||||
return browser_options
|
|
||||||
|
|
||||||
def create_webdriver_session(self) -> None:
|
|
||||||
LOG.info("Creating WebDriver session...")
|
|
||||||
|
|
||||||
if self.browser_config.binary_location:
|
|
||||||
ensure(os.path.exists(self.browser_config.binary_location), f"Specified browser binary [{self.browser_config.binary_location}] does not exist.")
|
|
||||||
else:
|
|
||||||
self.browser_config.binary_location = self.get_compatible_browser()
|
|
||||||
|
|
||||||
if "edge" in self.browser_config.binary_location.lower():
|
|
||||||
os.environ["MSEDGEDRIVER_TELEMETRY_OPTOUT"] = "1" # https://docs.microsoft.com/en-us/microsoft-edge/privacy-whitepaper/#microsoft-edge-driver
|
|
||||||
browser_options = self._init_browser_options(webdriver.EdgeOptions())
|
|
||||||
browser_options.binary_location = self.browser_config.binary_location
|
|
||||||
self.webdriver = webdriver.Edge(options = browser_options)
|
|
||||||
else:
|
|
||||||
browser_options = self._init_browser_options(webdriver.ChromeOptions())
|
|
||||||
browser_options.binary_location = self.browser_config.binary_location
|
|
||||||
self.webdriver = webdriver.Chrome(options = browser_options)
|
|
||||||
|
|
||||||
LOG.info(" -> Chrome driver: %s", self.webdriver.service.path)
|
|
||||||
|
|
||||||
# workaround to support Edge, see https://github.com/diprajpatra/selenium-stealth/pull/25
|
|
||||||
selenium_stealth.Driver = ChromiumDriver
|
|
||||||
|
|
||||||
selenium_stealth.stealth(self.webdriver, # https://github.com/diprajpatra/selenium-stealth#args
|
|
||||||
languages = ("de-DE", "de", "en-US", "en"),
|
|
||||||
platform = "Win32",
|
|
||||||
fix_hairline = True,
|
|
||||||
)
|
|
||||||
|
|
||||||
LOG.info("New WebDriver session is: %s %s", self.webdriver.session_id, self.webdriver.command_executor._url) # pylint: disable=protected-access
|
|
||||||
|
|
||||||
def get_compatible_browser(self) -> str | None:
|
|
||||||
match platform.system():
|
|
||||||
case "Linux":
|
|
||||||
browser_paths = [
|
|
||||||
shutil.which("chromium"),
|
|
||||||
shutil.which("chromium-browser"),
|
|
||||||
shutil.which("google-chrome"),
|
|
||||||
shutil.which("microsoft-edge")
|
|
||||||
]
|
|
||||||
|
|
||||||
case "Darwin":
|
|
||||||
browser_paths = [
|
|
||||||
"/Applications/Chromium.app/Contents/MacOS/Chromium",
|
|
||||||
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
|
||||||
"/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
|
|
||||||
]
|
|
||||||
|
|
||||||
case "Windows":
|
|
||||||
browser_paths = [
|
|
||||||
os.environ.get("ProgramFiles", "C:\\Program Files") + r'\Microsoft\Edge\Application\msedge.exe',
|
|
||||||
os.environ.get("ProgramFiles(x86)", "C:\\Program Files (x86)") + r'\Microsoft\Edge\Application\msedge.exe',
|
|
||||||
|
|
||||||
os.environ["ProgramFiles"] + r'\Chromium\Application\chrome.exe',
|
|
||||||
os.environ["ProgramFiles(x86)"] + r'\Chromium\Application\chrome.exe',
|
|
||||||
os.environ["LOCALAPPDATA"] + r'\Chromium\Application\chrome.exe',
|
|
||||||
|
|
||||||
os.environ["ProgramFiles"] + r'\Chrome\Application\chrome.exe',
|
|
||||||
os.environ["ProgramFiles(x86)"] + r'\Chrome\Application\chrome.exe',
|
|
||||||
os.environ["LOCALAPPDATA"] + r'\Chrome\Application\chrome.exe',
|
|
||||||
|
|
||||||
shutil.which("msedge.exe"),
|
|
||||||
shutil.which("chromium.exe"),
|
|
||||||
shutil.which("chrome.exe")
|
|
||||||
]
|
|
||||||
|
|
||||||
case _ as os_name:
|
|
||||||
LOG.warning("Installed browser for OS [%s] could not be detected", os_name)
|
|
||||||
return None
|
|
||||||
|
|
||||||
for browser_path in browser_paths:
|
|
||||||
if browser_path and os.path.isfile(browser_path):
|
|
||||||
return browser_path
|
|
||||||
|
|
||||||
raise AssertionError("Installed browser could not be detected")
|
|
||||||
|
|
||||||
def web_await(self, condition: Callable[[WebDriver], T], timeout:float = 5, exception_on_timeout: Callable[[], Exception] | None = None) -> T:
|
|
||||||
"""
|
|
||||||
Blocks/waits until the given condition is met.
|
|
||||||
|
|
||||||
:param timeout: timeout in seconds
|
|
||||||
:raises TimeoutException: if element could not be found within time
|
|
||||||
"""
|
|
||||||
max_attempts = 2
|
|
||||||
for attempt in range(max_attempts + 1)[1:]:
|
|
||||||
try:
|
|
||||||
return WebDriverWait(self.webdriver, timeout).until(condition) # type: ignore[no-any-return]
|
|
||||||
except TimeoutException as ex:
|
|
||||||
if exception_on_timeout:
|
|
||||||
raise exception_on_timeout() from ex
|
|
||||||
raise ex
|
|
||||||
except WebDriverException as ex:
|
|
||||||
# temporary workaround for:
|
|
||||||
# - https://groups.google.com/g/chromedriver-users/c/Z_CaHJTJnLw
|
|
||||||
# - https://bugs.chromium.org/p/chromedriver/issues/detail?id=4048
|
|
||||||
if ex.msg == "target frame detached" and attempt < max_attempts:
|
|
||||||
LOG.warning(ex)
|
|
||||||
else:
|
|
||||||
raise ex
|
|
||||||
|
|
||||||
raise AssertionError("Should never be reached.")
|
|
||||||
|
|
||||||
def web_click(self, selector_type:By, selector_value:str, timeout:float = 5) -> WebElement:
|
|
||||||
"""
|
|
||||||
:param timeout: timeout in seconds
|
|
||||||
:raises NoSuchElementException: if element could not be found within time
|
|
||||||
"""
|
|
||||||
elem = self.web_await(
|
|
||||||
EC.element_to_be_clickable((selector_type, selector_value)),
|
|
||||||
timeout,
|
|
||||||
lambda: NoSuchElementException(f"Element {selector_type}:{selector_value} not found or not clickable")
|
|
||||||
)
|
|
||||||
elem.click()
|
|
||||||
pause()
|
|
||||||
return elem
|
|
||||||
|
|
||||||
def web_execute(self, javascript:str) -> Any:
|
|
||||||
"""
|
|
||||||
Executes the given JavaScript code in the context of the current page.
|
|
||||||
|
|
||||||
:return: The command's JSON response
|
|
||||||
"""
|
|
||||||
return self.webdriver.execute_script(javascript)
|
|
||||||
|
|
||||||
def web_find(self, selector_type:By, selector_value:str, timeout:float = 5) -> WebElement:
|
|
||||||
"""
|
|
||||||
Locates an HTML element.
|
|
||||||
|
|
||||||
:param timeout: timeout in seconds
|
|
||||||
:raises NoSuchElementException: if element could not be found within time
|
|
||||||
"""
|
|
||||||
return self.web_await(
|
|
||||||
EC.presence_of_element_located((selector_type, selector_value)),
|
|
||||||
timeout,
|
|
||||||
lambda: NoSuchElementException(f"Element {selector_type}='{selector_value}' not found")
|
|
||||||
)
|
|
||||||
|
|
||||||
def web_input(self, selector_type:By, selector_value:str, text:str, timeout:float = 5) -> WebElement:
|
|
||||||
"""
|
|
||||||
Enters text into an HTML input field.
|
|
||||||
|
|
||||||
:param timeout: timeout in seconds
|
|
||||||
:raises NoSuchElementException: if element could not be found within time
|
|
||||||
"""
|
|
||||||
input_field = self.web_find(selector_type, selector_value, timeout)
|
|
||||||
input_field.clear()
|
|
||||||
input_field.send_keys(text)
|
|
||||||
pause()
|
|
||||||
return input_field
|
|
||||||
|
|
||||||
def web_open(self, url:str, timeout:float = 15, reload_if_already_open:bool = False) -> None:
|
|
||||||
"""
|
|
||||||
:param url: url to open in browser
|
|
||||||
:param timeout: timespan in seconds within the page needs to be loaded
|
|
||||||
:param reload_if_already_open: if False does nothing if the URL is already open in the browser
|
|
||||||
:raises TimeoutException: if page did not open within given timespan
|
|
||||||
"""
|
|
||||||
LOG.debug(" -> Opening [%s]...", url)
|
|
||||||
if not reload_if_already_open and url == self.webdriver.current_url:
|
|
||||||
LOG.debug(" => skipping, [%s] is already open", url)
|
|
||||||
return
|
|
||||||
self.webdriver.get(url)
|
|
||||||
WebDriverWait(self.webdriver, timeout).until(lambda _: self.web_execute("return document.readyState") == "complete")
|
|
||||||
|
|
||||||
# pylint: disable=dangerous-default-value
|
|
||||||
def web_request(self, url:str, method:str = "GET", valid_response_codes:Iterable[int] = [200], headers:dict[str, str] | None = None) -> dict[str, Any]:
|
|
||||||
method = method.upper()
|
|
||||||
LOG.debug(" -> HTTP %s [%s]...", method, url)
|
|
||||||
response:dict[str, Any] = self.webdriver.execute_async_script(f"""
|
|
||||||
var callback = arguments[arguments.length - 1];
|
|
||||||
fetch("{url}", {{
|
|
||||||
method: "{method}",
|
|
||||||
redirect: "follow",
|
|
||||||
headers: {headers or {}}
|
|
||||||
}})
|
|
||||||
.then(response => response.text().then(responseText => {{
|
|
||||||
headers = {{}};
|
|
||||||
response.headers.forEach((v, k) => headers[k] = v);
|
|
||||||
callback({{
|
|
||||||
"statusCode": response.status,
|
|
||||||
"statusMessage": response.statusText,
|
|
||||||
"headers": headers,
|
|
||||||
"content": responseText
|
|
||||||
}})
|
|
||||||
}}))
|
|
||||||
""")
|
|
||||||
ensure(
|
|
||||||
response["statusCode"] in valid_response_codes,
|
|
||||||
f'Invalid response "{response["statusCode"]} response["statusMessage"]" received for HTTP {method} to {url}'
|
|
||||||
)
|
|
||||||
return response
|
|
||||||
# pylint: enable=dangerous-default-value
|
|
||||||
|
|
||||||
def web_scroll_page_down(self, scroll_length: int = 10, scroll_speed: int = 10000, scroll_back_top: bool = False) -> None:
|
|
||||||
"""
|
|
||||||
Smoothly scrolls the current web page down.
|
|
||||||
|
|
||||||
:param scroll_length: the length of a single scroll iteration, determines smoothness of scrolling, lower is smoother
|
|
||||||
:param scroll_speed: the speed of scrolling, higher is faster
|
|
||||||
:param scroll_back_top: whether to scroll the page back to the top after scrolling to the bottom
|
|
||||||
"""
|
|
||||||
current_y_pos = 0
|
|
||||||
bottom_y_pos: int = self.webdriver.execute_script('return document.body.scrollHeight;') # get bottom position by JS
|
|
||||||
while current_y_pos < bottom_y_pos: # scroll in steps until bottom reached
|
|
||||||
current_y_pos += scroll_length
|
|
||||||
self.webdriver.execute_script(f'window.scrollTo(0, {current_y_pos});') # scroll one step
|
|
||||||
time.sleep(scroll_length / scroll_speed)
|
|
||||||
|
|
||||||
if scroll_back_top: # scroll back to top in same style
|
|
||||||
while current_y_pos > 0:
|
|
||||||
current_y_pos -= scroll_length
|
|
||||||
self.webdriver.execute_script(f'window.scrollTo(0, {current_y_pos});')
|
|
||||||
time.sleep(scroll_length / scroll_speed / 2) # double speed
|
|
||||||
|
|
||||||
def web_select(self, selector_type:By, selector_value:str, selected_value:Any, timeout:float = 5) -> WebElement:
|
|
||||||
"""
|
|
||||||
Selects an <option/> of a <select/> HTML element.
|
|
||||||
|
|
||||||
:param timeout: timeout in seconds
|
|
||||||
:raises NoSuchElementException: if element could not be found within time
|
|
||||||
:raises UnexpectedTagNameException: if element is not a <select> element
|
|
||||||
"""
|
|
||||||
elem = self.web_await(
|
|
||||||
EC.element_to_be_clickable((selector_type, selector_value)),
|
|
||||||
timeout,
|
|
||||||
lambda: NoSuchElementException(f"Element {selector_type}='{selector_value}' not found or not clickable")
|
|
||||||
)
|
|
||||||
Select(elem).select_by_value(selected_value)
|
|
||||||
pause()
|
|
||||||
return elem
|
|
||||||
@@ -3,7 +3,7 @@ SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
|||||||
SPDX-License-Identifier: AGPL-3.0-or-later
|
SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||||
"""
|
"""
|
||||||
import copy, decimal, json, logging, os, re, secrets, sys, traceback, time
|
import asyncio, copy, decimal, json, logging, os, re, sys, traceback, time
|
||||||
from importlib.resources import read_text as get_resource_as_string
|
from importlib.resources import read_text as get_resource_as_string
|
||||||
from collections.abc import Callable, Sized
|
from collections.abc import Callable, Sized
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
@@ -68,6 +68,18 @@ def is_frozen() -> bool:
|
|||||||
return getattr(sys, "frozen", False)
|
return getattr(sys, "frozen", False)
|
||||||
|
|
||||||
|
|
||||||
|
def is_integer(obj:Any) -> bool:
|
||||||
|
try:
|
||||||
|
int(obj)
|
||||||
|
return True
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
async def ainput(prompt: str) -> str:
|
||||||
|
return await asyncio.to_thread(input, f'{prompt} ')
|
||||||
|
|
||||||
|
|
||||||
def apply_defaults(
|
def apply_defaults(
|
||||||
target:dict[Any, Any],
|
target:dict[Any, Any],
|
||||||
defaults:dict[Any, Any],
|
defaults:dict[Any, Any],
|
||||||
@@ -119,7 +131,7 @@ def configure_console_logging() -> None:
|
|||||||
stdout_log = logging.StreamHandler(sys.stderr)
|
stdout_log = logging.StreamHandler(sys.stderr)
|
||||||
stdout_log.setLevel(logging.DEBUG)
|
stdout_log.setLevel(logging.DEBUG)
|
||||||
stdout_log.setFormatter(coloredlogs.ColoredFormatter("[%(levelname)s] %(message)s"))
|
stdout_log.setFormatter(coloredlogs.ColoredFormatter("[%(levelname)s] %(message)s"))
|
||||||
stdout_log.addFilter(type("", (logging.Filter,), {
|
stdout_log.addFilter(type("", (logging.Filter,), { # pyright: ignore
|
||||||
"filter": lambda rec: rec.levelno <= logging.INFO
|
"filter": lambda rec: rec.levelno <= logging.INFO
|
||||||
}))
|
}))
|
||||||
LOG_ROOT.addHandler(stdout_log)
|
LOG_ROOT.addHandler(stdout_log)
|
||||||
@@ -151,12 +163,6 @@ def on_sigint(_sig:int, _frame:FrameType | None) -> None:
|
|||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
|
|
||||||
def pause(min_ms:int = 200, max_ms:int = 2000) -> None:
|
|
||||||
duration = max_ms <= min_ms and min_ms or secrets.randbelow(max_ms - min_ms) + min_ms
|
|
||||||
LOG.log(logging.INFO if duration > 1500 else logging.DEBUG, " ... pausing for %d ms ...", duration)
|
|
||||||
time.sleep(duration / 1000)
|
|
||||||
|
|
||||||
|
|
||||||
def pluralize(noun:str, count:int | Sized, prefix_with_count:bool = True) -> str:
|
def pluralize(noun:str, count:int | Sized, prefix_with_count:bool = True) -> str:
|
||||||
"""
|
"""
|
||||||
>>> pluralize("field", 1)
|
>>> pluralize("field", 1)
|
||||||
@@ -272,20 +278,3 @@ def parse_datetime(date:datetime | str | None) -> datetime | None:
|
|||||||
if isinstance(date, datetime):
|
if isinstance(date, datetime):
|
||||||
return date
|
return date
|
||||||
return datetime.fromisoformat(date)
|
return datetime.fromisoformat(date)
|
||||||
|
|
||||||
|
|
||||||
def extract_ad_id_from_ad_link(url: str) -> int:
|
|
||||||
"""
|
|
||||||
Extracts the ID of an ad, given by its reference link.
|
|
||||||
|
|
||||||
:param url: the URL to the ad page
|
|
||||||
:return: the ad ID, a (ten-digit) integer number
|
|
||||||
"""
|
|
||||||
num_part = url.split('/')[-1] # suffix
|
|
||||||
id_part = num_part.split('-')[0]
|
|
||||||
|
|
||||||
try:
|
|
||||||
return int(id_part)
|
|
||||||
except ValueError:
|
|
||||||
print('The ad ID could not be extracted from the given ad reference!')
|
|
||||||
return -1
|
|
||||||
|
|||||||
532
src/kleinanzeigen_bot/web_scraping_mixin.py
Normal file
532
src/kleinanzeigen_bot/web_scraping_mixin.py
Normal file
@@ -0,0 +1,532 @@
|
|||||||
|
"""
|
||||||
|
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
||||||
|
SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||||
|
"""
|
||||||
|
import asyncio, enum, inspect, json, logging, os, platform, secrets, shutil, time
|
||||||
|
from collections.abc import Callable, Coroutine, Iterable
|
||||||
|
from typing import cast, Any, Final
|
||||||
|
|
||||||
|
try:
|
||||||
|
from typing import Never # type: ignore[attr-defined,unused-ignore] # mypy
|
||||||
|
except ImportError:
|
||||||
|
from typing import NoReturn as Never # Python <3.11
|
||||||
|
|
||||||
|
import nodriver, psutil
|
||||||
|
from nodriver.core.browser import Browser
|
||||||
|
from nodriver.core.config import Config
|
||||||
|
from nodriver.core.element import Element
|
||||||
|
from nodriver.core.tab import Tab as Page
|
||||||
|
|
||||||
|
from .utils import ensure, T
|
||||||
|
|
||||||
|
|
||||||
|
LOG:Final[logging.Logger] = logging.getLogger("kleinanzeigen_bot.selenium_mixin")
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"Browser",
|
||||||
|
"BrowserConfig",
|
||||||
|
"By",
|
||||||
|
"Element",
|
||||||
|
"Page",
|
||||||
|
"Is",
|
||||||
|
"WebScrapingMixin"
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class By(enum.Enum):
|
||||||
|
ID = enum.auto()
|
||||||
|
CLASS_NAME = enum.auto()
|
||||||
|
CSS_SELECTOR = enum.auto()
|
||||||
|
TAG_NAME = enum.auto()
|
||||||
|
TEXT = enum.auto()
|
||||||
|
XPATH = enum.auto()
|
||||||
|
|
||||||
|
|
||||||
|
class Is(enum.Enum):
|
||||||
|
CLICKABLE = enum.auto()
|
||||||
|
DISPLAYED = enum.auto()
|
||||||
|
DISABLED = enum.auto()
|
||||||
|
READONLY = enum.auto()
|
||||||
|
SELECTED = enum.auto()
|
||||||
|
|
||||||
|
|
||||||
|
class BrowserConfig:
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.arguments:Iterable[str] = []
|
||||||
|
self.binary_location:str | None = None
|
||||||
|
self.extensions:Iterable[str] = []
|
||||||
|
self.use_private_window:bool = True
|
||||||
|
self.user_data_dir:str = ""
|
||||||
|
self.profile_name:str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class WebScrapingMixin:
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.browser_config:Final[BrowserConfig] = BrowserConfig()
|
||||||
|
self.browser:Browser = None # pyright: ignore
|
||||||
|
self.page:Page = None # pyright: ignore
|
||||||
|
|
||||||
|
async def create_browser_session(self) -> None:
|
||||||
|
LOG.info("Creating Browser session...")
|
||||||
|
|
||||||
|
if self.browser_config.binary_location:
|
||||||
|
ensure(os.path.exists(self.browser_config.binary_location), f"Specified browser binary [{self.browser_config.binary_location}] does not exist.")
|
||||||
|
else:
|
||||||
|
self.browser_config.binary_location = self.get_compatible_browser()
|
||||||
|
LOG.info(" -> Chrome binary location: %s", self.browser_config.binary_location)
|
||||||
|
|
||||||
|
# default_browser_args: @ https://github.com/ultrafunkamsterdam/nodriver/blob/main/nodriver/core/config.py
|
||||||
|
# https://peter.sh/experiments/chromium-command-line-switches/
|
||||||
|
# https://github.com/GoogleChrome/chrome-launcher/blob/main/docs/chrome-flags-for-tools.md
|
||||||
|
browser_args = [
|
||||||
|
# "--disable-dev-shm-usage", # https://stackoverflow.com/a/50725918/5116073
|
||||||
|
"--disable-crash-reporter",
|
||||||
|
"--disable-domain-reliability",
|
||||||
|
"--disable-sync",
|
||||||
|
"--no-experiments",
|
||||||
|
|
||||||
|
"--disable-features=MediaRouter",
|
||||||
|
"--use-mock-keychain",
|
||||||
|
|
||||||
|
"--test-type", # https://stackoverflow.com/a/36746675/5116073
|
||||||
|
# https://chromium.googlesource.com/chromium/src/+/master/net/dns/README.md#request-remapping
|
||||||
|
'--host-resolver-rules="MAP connect.facebook.net 127.0.0.1, MAP securepubads.g.doubleclick.net 127.0.0.1, MAP www.googletagmanager.com 127.0.0.1"'
|
||||||
|
]
|
||||||
|
|
||||||
|
is_edge = "edge" in self.browser_config.binary_location.lower()
|
||||||
|
|
||||||
|
if is_edge:
|
||||||
|
os.environ["MSEDGEDRIVER_TELEMETRY_OPTOUT"] = "1" # https://docs.microsoft.com/en-us/microsoft-edge/privacy-whitepaper/#microsoft-edge-driver
|
||||||
|
|
||||||
|
if self.browser_config.use_private_window:
|
||||||
|
browser_args.append("-inprivate" if is_edge else "--incognito")
|
||||||
|
|
||||||
|
if self.browser_config.profile_name:
|
||||||
|
LOG.info(" -> Browser profile name: %s", self.browser_config.profile_name)
|
||||||
|
browser_args.append(f"--profile-directory={self.browser_config.profile_name}")
|
||||||
|
|
||||||
|
for browser_arg in self.browser_config.arguments:
|
||||||
|
LOG.info(" -> Custom Chrome argument: %s", browser_arg)
|
||||||
|
browser_args.append(browser_arg)
|
||||||
|
|
||||||
|
if not LOG.isEnabledFor(logging.DEBUG):
|
||||||
|
browser_args.append("--log-level=3") # INFO: 0, WARNING: 1, ERROR: 2, FATAL: 3
|
||||||
|
|
||||||
|
if self.browser_config.user_data_dir:
|
||||||
|
LOG.info(" -> Browser user data dir: %s", self.browser_config.user_data_dir)
|
||||||
|
|
||||||
|
cfg = Config(
|
||||||
|
headless = False,
|
||||||
|
browser_executable_path = self.browser_config.binary_location,
|
||||||
|
browser_args = browser_args,
|
||||||
|
user_data_dir = self.browser_config.user_data_dir
|
||||||
|
)
|
||||||
|
# already logged by nodriver:
|
||||||
|
# LOG.debug("-> Effective browser arguments: \n\t\t%s", "\n\t\t".join(cfg.browser_args))
|
||||||
|
|
||||||
|
profile_dir = os.path.join(cfg.user_data_dir, self.browser_config.profile_name or "Default")
|
||||||
|
os.makedirs(profile_dir, exist_ok = True)
|
||||||
|
prefs_file = os.path.join(profile_dir, "Preferences")
|
||||||
|
if not os.path.exists(prefs_file):
|
||||||
|
LOG.info("-> Setting chrome prefs [%s]...", prefs_file)
|
||||||
|
with open(prefs_file, "w", encoding='UTF-8') as fd:
|
||||||
|
json.dump({
|
||||||
|
"credentials_enable_service": False,
|
||||||
|
"enable_do_not_track": True,
|
||||||
|
"google": {
|
||||||
|
"services": {
|
||||||
|
"consented_to_sync": False
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"profile": {
|
||||||
|
"default_content_setting_values": {
|
||||||
|
"popups": 0,
|
||||||
|
"notifications": 2 # 1 = allow, 2 = block browser notifications
|
||||||
|
},
|
||||||
|
"password_manager_enabled": False
|
||||||
|
},
|
||||||
|
"signin": {
|
||||||
|
"allowed": False
|
||||||
|
},
|
||||||
|
"translate_site_blacklist": [
|
||||||
|
"www.kleinanzeigen.de"
|
||||||
|
],
|
||||||
|
"devtools": {
|
||||||
|
"preferences": {
|
||||||
|
"currentDockState": '"bottom"'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, fd)
|
||||||
|
|
||||||
|
# load extensions
|
||||||
|
for crx_extension in self.browser_config.extensions:
|
||||||
|
LOG.info(" -> Adding extension: [%s]", crx_extension)
|
||||||
|
ensure(os.path.exists(crx_extension), f"Configured extension-file [{crx_extension}] does not exist.")
|
||||||
|
cfg.add_extension(crx_extension)
|
||||||
|
|
||||||
|
self.browser = await nodriver.start(cfg)
|
||||||
|
LOG.info("New Browser session is %s", self.browser.websocket_url)
|
||||||
|
|
||||||
|
def close_browser_session(self) -> None:
|
||||||
|
if self.browser:
|
||||||
|
LOG.debug("Closing Browser session...")
|
||||||
|
self.page = None # pyright: ignore
|
||||||
|
browser_process = psutil.Process(self.browser._process_pid) # pylint: disable=protected-access
|
||||||
|
browser_children:list[psutil.Process] = browser_process.children()
|
||||||
|
self.browser.stop()
|
||||||
|
for p in browser_children:
|
||||||
|
if p.is_running():
|
||||||
|
p.kill() # terminate orphaned browser processes
|
||||||
|
self.browser = None # pyright: ignore
|
||||||
|
|
||||||
|
def get_compatible_browser(self) -> str:
|
||||||
|
match platform.system():
|
||||||
|
case "Linux":
|
||||||
|
browser_paths = [
|
||||||
|
shutil.which("chromium"),
|
||||||
|
shutil.which("chromium-browser"),
|
||||||
|
shutil.which("google-chrome"),
|
||||||
|
shutil.which("microsoft-edge")
|
||||||
|
]
|
||||||
|
|
||||||
|
case "Darwin":
|
||||||
|
browser_paths = [
|
||||||
|
"/Applications/Chromium.app/Contents/MacOS/Chromium",
|
||||||
|
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
||||||
|
"/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
|
||||||
|
]
|
||||||
|
|
||||||
|
case "Windows":
|
||||||
|
browser_paths = [
|
||||||
|
os.environ.get("ProgramFiles", "C:\\Program Files") + r'\Microsoft\Edge\Application\msedge.exe',
|
||||||
|
os.environ.get("ProgramFiles(x86)", "C:\\Program Files (x86)") + r'\Microsoft\Edge\Application\msedge.exe',
|
||||||
|
|
||||||
|
os.environ["ProgramFiles"] + r'\Chromium\Application\chrome.exe',
|
||||||
|
os.environ["ProgramFiles(x86)"] + r'\Chromium\Application\chrome.exe',
|
||||||
|
os.environ["LOCALAPPDATA"] + r'\Chromium\Application\chrome.exe',
|
||||||
|
|
||||||
|
os.environ["ProgramFiles"] + r'\Chrome\Application\chrome.exe',
|
||||||
|
os.environ["ProgramFiles(x86)"] + r'\Chrome\Application\chrome.exe',
|
||||||
|
os.environ["LOCALAPPDATA"] + r'\Chrome\Application\chrome.exe',
|
||||||
|
|
||||||
|
shutil.which("msedge.exe"),
|
||||||
|
shutil.which("chromium.exe"),
|
||||||
|
shutil.which("chrome.exe")
|
||||||
|
]
|
||||||
|
|
||||||
|
case _ as os_name:
|
||||||
|
raise AssertionError(f"Installed browser for OS [{os_name}] could not be detected")
|
||||||
|
|
||||||
|
for browser_path in browser_paths:
|
||||||
|
if browser_path and os.path.isfile(browser_path):
|
||||||
|
return browser_path
|
||||||
|
|
||||||
|
raise AssertionError("Installed browser could not be detected")
|
||||||
|
|
||||||
|
async def web_await(self, condition: Callable[[], T | Never | Coroutine[Any,Any,T | Never]], *,
|
||||||
|
timeout:int | float = 5, timeout_error_message: str = "") -> T:
|
||||||
|
"""
|
||||||
|
Blocks/waits until the given condition is met.
|
||||||
|
|
||||||
|
:param timeout: timeout in seconds
|
||||||
|
:raises TimeoutError: if element could not be found within time
|
||||||
|
"""
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
start_at = loop.time()
|
||||||
|
|
||||||
|
while True:
|
||||||
|
await self.page
|
||||||
|
ex:Exception | None = None
|
||||||
|
try:
|
||||||
|
result_raw = condition()
|
||||||
|
result:T = (await result_raw) if inspect.isawaitable(result_raw) else result_raw
|
||||||
|
if result:
|
||||||
|
return result
|
||||||
|
except Exception as ex1:
|
||||||
|
ex = ex1
|
||||||
|
if loop.time() - start_at > timeout:
|
||||||
|
if ex:
|
||||||
|
raise ex
|
||||||
|
raise TimeoutError(timeout_error_message or f"Condition not met within {timeout} seconds")
|
||||||
|
await self.page.sleep(0.5)
|
||||||
|
|
||||||
|
async def web_check(self, selector_type:By, selector_value:str, attr:Is, *, timeout:int | float = 5) -> bool:
|
||||||
|
"""
|
||||||
|
Locates an HTML element and returns a state.
|
||||||
|
|
||||||
|
:param timeout: timeout in seconds
|
||||||
|
:raises TimeoutError: if element could not be found within time
|
||||||
|
"""
|
||||||
|
|
||||||
|
def is_disabled(elem:Element) -> bool:
|
||||||
|
return elem.attrs.get("disabled") is not None
|
||||||
|
|
||||||
|
async def is_displayed(elem:Element) -> bool:
|
||||||
|
return cast(bool, await elem.apply("""
|
||||||
|
function (element) {
|
||||||
|
var style = window.getComputedStyle(element);
|
||||||
|
return style.display !== 'none'
|
||||||
|
&& style.visibility !== 'hidden'
|
||||||
|
&& style.opacity !== '0'
|
||||||
|
&& element.offsetWidth > 0
|
||||||
|
&& element.offsetHeight > 0
|
||||||
|
}
|
||||||
|
"""))
|
||||||
|
elem:Element = await self.web_find(selector_type, selector_value, timeout = timeout)
|
||||||
|
|
||||||
|
match attr:
|
||||||
|
case Is.CLICKABLE:
|
||||||
|
return not is_disabled(elem) or await is_displayed(elem)
|
||||||
|
case Is.DISPLAYED:
|
||||||
|
return await is_displayed(elem)
|
||||||
|
case Is.DISABLED:
|
||||||
|
return is_disabled(elem)
|
||||||
|
case Is.READONLY:
|
||||||
|
return elem.attrs.get("readonly") is not None
|
||||||
|
case Is.SELECTED:
|
||||||
|
return cast(bool, await elem.apply("""
|
||||||
|
function (element) {
|
||||||
|
if (element.tagName.toLowerCase() === 'input') {
|
||||||
|
if (element.type === 'checkbox' || element.type === 'radio') {
|
||||||
|
return element.checked
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
"""))
|
||||||
|
raise AssertionError(f"Unsupported attribute: {attr}")
|
||||||
|
|
||||||
|
async def web_click(self, selector_type:By, selector_value:str, *, timeout:int | float = 5) -> Element:
|
||||||
|
"""
|
||||||
|
Locates an HTML element by ID.
|
||||||
|
|
||||||
|
:param timeout: timeout in seconds
|
||||||
|
:raises TimeoutError: if element could not be found within time
|
||||||
|
"""
|
||||||
|
elem = await self.web_find(selector_type, selector_value, timeout = timeout)
|
||||||
|
await elem.click()
|
||||||
|
await self.web_sleep()
|
||||||
|
return elem
|
||||||
|
|
||||||
|
async def web_execute(self, javascript:str) -> Any:
|
||||||
|
"""
|
||||||
|
Executes the given JavaScript code in the context of the current page.
|
||||||
|
|
||||||
|
:return: The javascript's return value
|
||||||
|
"""
|
||||||
|
return await self.page.evaluate(javascript, True)
|
||||||
|
|
||||||
|
async def web_find(self, selector_type:By, selector_value:str, *, parent:Element = None, timeout:int | float = 5) -> Element:
|
||||||
|
"""
|
||||||
|
Locates an HTML element by the given selector type and value.
|
||||||
|
|
||||||
|
:param timeout: timeout in seconds
|
||||||
|
:raises TimeoutError: if element could not be found within time
|
||||||
|
"""
|
||||||
|
match selector_type:
|
||||||
|
case By.ID:
|
||||||
|
return await self.web_await(
|
||||||
|
lambda: self.page.query_selector(f"#{selector_value}", parent),
|
||||||
|
timeout = timeout,
|
||||||
|
timeout_error_message = f"No HTML element found with ID '{selector_value}' within {timeout} seconds.")
|
||||||
|
case By.CLASS_NAME:
|
||||||
|
return await self.web_await(
|
||||||
|
lambda: self.page.query_selector(f".{selector_value}", parent),
|
||||||
|
timeout = timeout,
|
||||||
|
timeout_error_message = f"No HTML element found with ID '{selector_value}' within {timeout} seconds.")
|
||||||
|
case By.TAG_NAME:
|
||||||
|
return await self.web_await(
|
||||||
|
lambda: self.page.query_selector(selector_value, parent),
|
||||||
|
timeout = timeout,
|
||||||
|
timeout_error_message = f"No HTML element found of tag <{selector_value}> within {timeout} seconds.")
|
||||||
|
case By.CSS_SELECTOR:
|
||||||
|
return await self.web_await(
|
||||||
|
lambda: self.page.query_selector(selector_value, parent),
|
||||||
|
timeout = timeout,
|
||||||
|
timeout_error_message = f"No HTML element found using CSS selector '{selector_value}' within {timeout} seconds.")
|
||||||
|
case By.TEXT:
|
||||||
|
if parent:
|
||||||
|
raise AssertionError(f"Specifying a parent element currently not supported with selector type: {selector_type}")
|
||||||
|
return await self.web_await(
|
||||||
|
lambda: self.page.find_element_by_text(selector_value, True),
|
||||||
|
timeout = timeout,
|
||||||
|
timeout_error_message = f"No HTML element found containing text '{selector_value}' within {timeout} seconds.")
|
||||||
|
case By.XPATH:
|
||||||
|
if parent:
|
||||||
|
raise AssertionError(f"Specifying a parent element currently not supported with selector type: {selector_type}")
|
||||||
|
return await self.web_await(
|
||||||
|
lambda: self.page.find_element_by_text(selector_value, True),
|
||||||
|
timeout = timeout,
|
||||||
|
timeout_error_message = f"No HTML element found using XPath '{selector_value}' within {timeout} seconds.")
|
||||||
|
|
||||||
|
raise AssertionError(f"Unsupported selector type: {selector_type}")
|
||||||
|
|
||||||
|
async def web_find_all(self, selector_type:By, selector_value:str, *, parent:Element = None, timeout:int | float = 5) -> list[Element]:
|
||||||
|
"""
|
||||||
|
Locates an HTML element by ID.
|
||||||
|
|
||||||
|
:param timeout: timeout in seconds
|
||||||
|
:raises TimeoutError: if element could not be found within time
|
||||||
|
"""
|
||||||
|
match selector_type:
|
||||||
|
case By.CLASS_NAME:
|
||||||
|
return await self.web_await(
|
||||||
|
lambda: self.page.query_selector_all(f".{selector_value}", parent),
|
||||||
|
timeout = timeout,
|
||||||
|
timeout_error_message = f"No HTML elements found with CSS class '{selector_value}' within {timeout} seconds.")
|
||||||
|
case By.CSS_SELECTOR:
|
||||||
|
return await self.web_await(
|
||||||
|
lambda: self.page.query_selector_all(selector_value, parent),
|
||||||
|
timeout = timeout,
|
||||||
|
timeout_error_message = f"No HTML elements found using CSS selector '{selector_value}' within {timeout} seconds.")
|
||||||
|
case By.TAG_NAME:
|
||||||
|
return await self.web_await(
|
||||||
|
lambda: self.page.query_selector_all(selector_value, parent),
|
||||||
|
timeout = timeout,
|
||||||
|
timeout_error_message = f"No HTML elements found of tag <{selector_value}> within {timeout} seconds.")
|
||||||
|
case By.TEXT:
|
||||||
|
if parent:
|
||||||
|
raise AssertionError(f"Specifying a parent element currently not supported with selector type: {selector_type}")
|
||||||
|
return await self.web_await(
|
||||||
|
lambda: self.page.find_elements_by_text(selector_value),
|
||||||
|
timeout = timeout,
|
||||||
|
timeout_error_message = f"No HTML elements found containing text '{selector_value}' within {timeout} seconds.")
|
||||||
|
case By.XPATH:
|
||||||
|
if parent:
|
||||||
|
raise AssertionError(f"Specifying a parent element currently not supported with selector type: {selector_type}")
|
||||||
|
return await self.web_await(
|
||||||
|
lambda: self.page.find_elements_by_text(selector_value),
|
||||||
|
timeout = timeout,
|
||||||
|
timeout_error_message = f"No HTML elements found using XPath '{selector_value}' within {timeout} seconds.")
|
||||||
|
|
||||||
|
raise AssertionError(f"Unsupported selector type: {selector_type}")
|
||||||
|
|
||||||
|
async def web_input(self, selector_type:By, selector_value:str, text:str | int, *, timeout:int | float = 5) -> Element:
|
||||||
|
"""
|
||||||
|
Enters text into an HTML input field.
|
||||||
|
|
||||||
|
:param timeout: timeout in seconds
|
||||||
|
:raises TimeoutError: if element could not be found within time
|
||||||
|
"""
|
||||||
|
input_field = await self.web_find(selector_type, selector_value, timeout = timeout)
|
||||||
|
await input_field.clear_input()
|
||||||
|
await input_field.send_keys(str(text))
|
||||||
|
await self.web_sleep()
|
||||||
|
return input_field
|
||||||
|
|
||||||
|
async def web_open(self, url:str, *, timeout:int | float = 15000, reload_if_already_open:bool = False) -> None:
|
||||||
|
"""
|
||||||
|
:param url: url to open in browser
|
||||||
|
:param timeout: timespan in seconds within the page needs to be loaded
|
||||||
|
:param reload_if_already_open: if False does nothing if the URL is already open in the browser
|
||||||
|
:raises TimeoutException: if page did not open within given timespan
|
||||||
|
"""
|
||||||
|
LOG.debug(" -> Opening [%s]...", url)
|
||||||
|
if not reload_if_already_open and self.page and url == self.page.url:
|
||||||
|
LOG.debug(" => skipping, [%s] is already open", url)
|
||||||
|
return
|
||||||
|
self.page = await self.browser.get(url, False, False)
|
||||||
|
await self.web_await(lambda: self.web_execute("document.readyState == 'complete'"), timeout = timeout,
|
||||||
|
timeout_error_message = f"Page did not finish loading within {timeout} seconds.")
|
||||||
|
|
||||||
|
async def web_text(self, selector_type:By, selector_value:str, *, parent:Element = None, timeout:int | float = 5) -> str:
|
||||||
|
return str(await (await self.web_find(selector_type, selector_value, parent = parent, timeout = timeout)).apply("""
|
||||||
|
function (elem) {
|
||||||
|
let sel = window.getSelection()
|
||||||
|
sel.removeAllRanges()
|
||||||
|
let range = document.createRange()
|
||||||
|
range.selectNode(elem)
|
||||||
|
sel.addRange(range)
|
||||||
|
let visibleText = sel.toString().trim()
|
||||||
|
sel.removeAllRanges()
|
||||||
|
return visibleText
|
||||||
|
}
|
||||||
|
"""))
|
||||||
|
|
||||||
|
async def web_sleep(self, min_ms:int = 1000, max_ms:int = 2500) -> None:
|
||||||
|
duration = max_ms <= min_ms and min_ms or secrets.randbelow(max_ms - min_ms) + min_ms
|
||||||
|
LOG.log(logging.INFO if duration > 1500 else logging.DEBUG, " ... pausing for %d ms ...", duration)
|
||||||
|
await self.page.sleep(duration / 1000)
|
||||||
|
|
||||||
|
async def web_request(self, url:str, method:str = "GET", valid_response_codes:int | Iterable[int] = 200,
|
||||||
|
headers:dict[str, str] | None = None) -> dict[str, Any]:
|
||||||
|
method = method.upper()
|
||||||
|
LOG.debug(" -> HTTP %s [%s]...", method, url)
|
||||||
|
response = cast(dict[str, Any], await self.page.evaluate(f"""
|
||||||
|
fetch("{url}", {{
|
||||||
|
method: "{method}",
|
||||||
|
redirect: "follow",
|
||||||
|
headers: {headers or {}}
|
||||||
|
}})
|
||||||
|
.then(response => response.text().then(responseText => {{
|
||||||
|
headers = {{}};
|
||||||
|
response.headers.forEach((v, k) => headers[k] = v);
|
||||||
|
return {{
|
||||||
|
statusCode: response.status,
|
||||||
|
statusMessage: response.statusText,
|
||||||
|
headers: headers,
|
||||||
|
content: responseText
|
||||||
|
}}
|
||||||
|
}}))
|
||||||
|
""", await_promise=True))
|
||||||
|
if isinstance(valid_response_codes, int):
|
||||||
|
valid_response_codes = [valid_response_codes]
|
||||||
|
ensure(
|
||||||
|
response["statusCode"] in valid_response_codes,
|
||||||
|
f'Invalid response "{response["statusCode"]} response["statusMessage"]" received for HTTP {method} to {url}'
|
||||||
|
)
|
||||||
|
return response
|
||||||
|
# pylint: enable=dangerous-default-value
|
||||||
|
|
||||||
|
async def web_scroll_page_down(self, scroll_length: int = 10, scroll_speed: int = 10000, scroll_back_top: bool = False) -> None:
|
||||||
|
"""
|
||||||
|
Smoothly scrolls the current web page down.
|
||||||
|
|
||||||
|
:param scroll_length: the length of a single scroll iteration, determines smoothness of scrolling, lower is smoother
|
||||||
|
:param scroll_speed: the speed of scrolling, higher is faster
|
||||||
|
:param scroll_back_top: whether to scroll the page back to the top after scrolling to the bottom
|
||||||
|
"""
|
||||||
|
current_y_pos = 0
|
||||||
|
bottom_y_pos: int = await self.web_execute('document.body.scrollHeight') # get bottom position
|
||||||
|
while current_y_pos < bottom_y_pos: # scroll in steps until bottom reached
|
||||||
|
current_y_pos += scroll_length
|
||||||
|
await self.web_execute(f'window.scrollTo(0, {current_y_pos})') # scroll one step
|
||||||
|
time.sleep(scroll_length / scroll_speed)
|
||||||
|
|
||||||
|
if scroll_back_top: # scroll back to top in same style
|
||||||
|
while current_y_pos > 0:
|
||||||
|
current_y_pos -= scroll_length
|
||||||
|
await self.web_execute(f'window.scrollTo(0, {current_y_pos})')
|
||||||
|
time.sleep(scroll_length / scroll_speed / 2) # double speed
|
||||||
|
|
||||||
|
async def web_select(self, selector_type:By, selector_value:str, selected_value:Any, timeout:int | float = 5) -> Element:
|
||||||
|
"""
|
||||||
|
Selects an <option/> of a <select/> HTML element.
|
||||||
|
|
||||||
|
:param timeout: timeout in seconds
|
||||||
|
:raises TimeoutError: if element could not be found within time
|
||||||
|
:raises UnexpectedTagNameException: if element is not a <select> element
|
||||||
|
"""
|
||||||
|
await self.web_await(
|
||||||
|
lambda: self.web_check(selector_type, selector_value, Is.CLICKABLE), timeout = timeout,
|
||||||
|
timeout_error_message = f"No clickable HTML element with selector: {selector_type}='{selector_value}' found"
|
||||||
|
)
|
||||||
|
elem = await self.web_find(selector_type, selector_value)
|
||||||
|
await elem.apply(f"""
|
||||||
|
function (element) {{
|
||||||
|
for(let i=0; i < element.options.length; i++)
|
||||||
|
{{
|
||||||
|
if(element.options[i].value == "{selected_value}") {{
|
||||||
|
element.selectedIndex = i;
|
||||||
|
break;
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
throw new Error("Option with value {selected_value} not found.");
|
||||||
|
}}
|
||||||
|
""")
|
||||||
|
await self.web_sleep()
|
||||||
|
return elem
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
"""
|
|
||||||
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
|
||||||
SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
|
||||||
"""
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from kleinanzeigen_bot.selenium_mixin import SeleniumMixin
|
|
||||||
from kleinanzeigen_bot import utils
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.itest
|
|
||||||
def test_webdriver_auto_init():
|
|
||||||
selenium_mixin = SeleniumMixin()
|
|
||||||
selenium_mixin.browser_config.arguments = ["--no-sandbox"]
|
|
||||||
|
|
||||||
browser_path = selenium_mixin.get_compatible_browser()
|
|
||||||
utils.ensure(browser_path is not None, "Browser not auto-detected")
|
|
||||||
|
|
||||||
selenium_mixin.webdriver = None
|
|
||||||
selenium_mixin.create_webdriver_session()
|
|
||||||
selenium_mixin.webdriver.quit()
|
|
||||||
@@ -3,12 +3,11 @@ SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
|||||||
SPDX-License-Identifier: AGPL-3.0-or-later
|
SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||||
"""
|
"""
|
||||||
import os, sys, time
|
|
||||||
import pytest
|
import pytest
|
||||||
from kleinanzeigen_bot import utils
|
from kleinanzeigen_bot import utils
|
||||||
|
|
||||||
|
|
||||||
def test_ensure():
|
def test_ensure() -> None:
|
||||||
utils.ensure(True, "TRUE")
|
utils.ensure(True, "TRUE")
|
||||||
utils.ensure("Some Value", "TRUE")
|
utils.ensure("Some Value", "TRUE")
|
||||||
utils.ensure(123, "TRUE")
|
utils.ensure(123, "TRUE")
|
||||||
@@ -29,13 +28,3 @@ def test_ensure():
|
|||||||
|
|
||||||
with pytest.raises(AssertionError):
|
with pytest.raises(AssertionError):
|
||||||
utils.ensure(lambda: False, "FALSE", timeout = 2)
|
utils.ensure(lambda: False, "FALSE", timeout = 2)
|
||||||
|
|
||||||
|
|
||||||
def test_pause():
|
|
||||||
start = time.time()
|
|
||||||
utils.pause(100, 100)
|
|
||||||
elapsed = 1000 * (time.time() - start)
|
|
||||||
if sys.platform == "darwin" and os.getenv("GITHUB_ACTIONS", "true") == "true":
|
|
||||||
assert 99 < elapsed < 300
|
|
||||||
else:
|
|
||||||
assert 99 < elapsed < 120
|
|
||||||
|
|||||||
41
tests/test_web_scraping_mixin.py
Normal file
41
tests/test_web_scraping_mixin.py
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
"""
|
||||||
|
SPDX-FileCopyrightText: © Sebastian Thomschke and contributors
|
||||||
|
SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
SPDX-ArtifactOfProjectHomePage: https://github.com/Second-Hand-Friends/kleinanzeigen-bot/
|
||||||
|
"""
|
||||||
|
import logging, os, time
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import nodriver, pytest
|
||||||
|
from flaky import flaky
|
||||||
|
|
||||||
|
from kleinanzeigen_bot.web_scraping_mixin import WebScrapingMixin
|
||||||
|
from kleinanzeigen_bot.utils import ensure
|
||||||
|
|
||||||
|
if os.environ.get("CI"):
|
||||||
|
logging.getLogger("kleinanzeigen_bot").setLevel(logging.DEBUG)
|
||||||
|
logging.getLogger("nodriver").setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
|
||||||
|
def delay_rerun(*args:Any) -> bool: # pylint: disable=unused-argument
|
||||||
|
time.sleep(5)
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
async def atest_init() -> None:
|
||||||
|
web_scraping_mixin = WebScrapingMixin()
|
||||||
|
|
||||||
|
browser_path = web_scraping_mixin.get_compatible_browser()
|
||||||
|
ensure(browser_path is not None, "Browser not auto-detected")
|
||||||
|
|
||||||
|
web_scraping_mixin.close_browser_session()
|
||||||
|
try:
|
||||||
|
await web_scraping_mixin.create_browser_session()
|
||||||
|
finally:
|
||||||
|
web_scraping_mixin.close_browser_session()
|
||||||
|
|
||||||
|
|
||||||
|
@flaky(max_runs = 3, min_passes = 1, rerun_filter = delay_rerun) # type: ignore[misc] # mypy
|
||||||
|
@pytest.mark.itest
|
||||||
|
def test_init() -> None:
|
||||||
|
nodriver.loop().run_until_complete(atest_init())
|
||||||
Reference in New Issue
Block a user