mirror of
https://github.com/Second-Hand-Friends/kleinanzeigen-bot.git
synced 2026-03-12 10:31:50 +01:00
fix: ruff PLC0207 missing-maxsplit-arg
This commit is contained in:
committed by
Sebastian Thomschke
parent
67805e633f
commit
3978d85cb4
@@ -112,13 +112,13 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
:param url: the URL to the ad page
|
:param url: the URL to the ad page
|
||||||
:return: the ad ID, a (ten-digit) integer number
|
:return: the ad ID, a (ten-digit) integer number
|
||||||
"""
|
"""
|
||||||
num_part = url.split("/")[-1] # suffix
|
num_part = url.rsplit("/", maxsplit = 1)[-1] # suffix
|
||||||
id_part = num_part.split("-")[0]
|
id_part = num_part.split("-", maxsplit = 1)[0]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
path = url.split("?", 1)[0] # Remove query string if present
|
path = url.split("?", maxsplit = 1)[0] # Remove query string if present
|
||||||
last_segment = path.rstrip("/").split("/")[-1] # Get last path component
|
last_segment = path.rstrip("/").rsplit("/", maxsplit = 1)[-1] # Get last path component
|
||||||
id_part = last_segment.split("-")[0] # Extract part before first hyphen
|
id_part = last_segment.split("-", maxsplit = 1)[0] # Extract part before first hyphen
|
||||||
return int(id_part)
|
return int(id_part)
|
||||||
except (IndexError, ValueError) as ex:
|
except (IndexError, ValueError) as ex:
|
||||||
LOG.warning("Failed to extract ad ID from URL '%s': %s", url, ex)
|
LOG.warning("Failed to extract ad ID from URL '%s': %s", url, ex)
|
||||||
@@ -340,8 +340,8 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
category_line = await self.web_find(By.ID, "vap-brdcrmb")
|
category_line = await self.web_find(By.ID, "vap-brdcrmb")
|
||||||
category_first_part = await self.web_find(By.CSS_SELECTOR, "a:nth-of-type(2)", parent = category_line)
|
category_first_part = await self.web_find(By.CSS_SELECTOR, "a:nth-of-type(2)", parent = category_line)
|
||||||
category_second_part = await self.web_find(By.CSS_SELECTOR, "a:nth-of-type(3)", parent = category_line)
|
category_second_part = await self.web_find(By.CSS_SELECTOR, "a:nth-of-type(3)", parent = category_line)
|
||||||
cat_num_first = category_first_part.attrs["href"].split("/")[-1][1:]
|
cat_num_first = category_first_part.attrs["href"].rsplit("/", maxsplit = 1)[-1][1:]
|
||||||
cat_num_second = category_second_part.attrs["href"].split("/")[-1][1:]
|
cat_num_second = category_second_part.attrs["href"].rsplit("/", maxsplit = 1)[-1][1:]
|
||||||
category:str = cat_num_first + "/" + cat_num_second
|
category:str = cat_num_first + "/" + cat_num_second
|
||||||
|
|
||||||
return category
|
return category
|
||||||
@@ -371,15 +371,15 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
try:
|
try:
|
||||||
price_str:str = await self.web_text(By.ID, "viewad-price")
|
price_str:str = await self.web_text(By.ID, "viewad-price")
|
||||||
price:int | None = None
|
price:int | None = None
|
||||||
match price_str.split()[-1]:
|
match price_str.rsplit(maxsplit = 1)[-1]:
|
||||||
case "€":
|
case "€":
|
||||||
price_type = "FIXED"
|
price_type = "FIXED"
|
||||||
# replace('.', '') is to remove the thousands separator before parsing as int
|
# replace('.', '') is to remove the thousands separator before parsing as int
|
||||||
price = int(price_str.replace(".", "").split()[0])
|
price = int(price_str.replace(".", "").split(maxsplit = 1)[0])
|
||||||
case "VB":
|
case "VB":
|
||||||
price_type = "NEGOTIABLE"
|
price_type = "NEGOTIABLE"
|
||||||
if price_str != "VB": # can be either 'X € VB', or just 'VB'
|
if price_str != "VB": # can be either 'X € VB', or just 'VB'
|
||||||
price = int(price_str.replace(".", "").split()[0])
|
price = int(price_str.replace(".", "").split(maxsplit = 1)[0])
|
||||||
case "verschenken":
|
case "verschenken":
|
||||||
price_type = "GIVE_AWAY"
|
price_type = "GIVE_AWAY"
|
||||||
case _:
|
case _:
|
||||||
@@ -490,7 +490,7 @@ class AdExtractor(WebScrapingMixin):
|
|||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
LOG.info("No street given in the contact.")
|
LOG.info("No street given in the contact.")
|
||||||
|
|
||||||
(zipcode, location) = address_text.split(" ", 1)
|
(zipcode, location) = address_text.split(" ", maxsplit = 1)
|
||||||
contact["zipcode"] = zipcode # e.g. 19372
|
contact["zipcode"] = zipcode # e.g. 19372
|
||||||
contact["location"] = location # e.g. Mecklenburg-Vorpommern - Steinbeck
|
contact["location"] = location # e.g. Mecklenburg-Vorpommern - Steinbeck
|
||||||
|
|
||||||
|
|||||||
@@ -87,9 +87,9 @@ class WebScrapingMixin:
|
|||||||
remote_port = 0
|
remote_port = 0
|
||||||
for arg in self.browser_config.arguments:
|
for arg in self.browser_config.arguments:
|
||||||
if arg.startswith("--remote-debugging-host="):
|
if arg.startswith("--remote-debugging-host="):
|
||||||
remote_host = arg.split("=", 2)[1]
|
remote_host = arg.split("=", maxsplit = 1)[1]
|
||||||
if arg.startswith("--remote-debugging-port="):
|
if arg.startswith("--remote-debugging-port="):
|
||||||
remote_port = int(arg.split("=", 2)[1])
|
remote_port = int(arg.split("=", maxsplit = 1)[1])
|
||||||
|
|
||||||
if remote_port > 0:
|
if remote_port > 0:
|
||||||
LOG.info("Using existing browser process at %s:%s", remote_host, remote_port)
|
LOG.info("Using existing browser process at %s:%s", remote_host, remote_port)
|
||||||
|
|||||||
Reference in New Issue
Block a user