From 2c93b79e95342e99c5bc81451e9473d327107304 Mon Sep 17 00:00:00 2001 From: Anders Jess Pedersen Date: Thu, 30 Nov 2023 10:38:49 +0100 Subject: [PATCH] fix: retry in case we get errors when dynamically getting soup --- src/tts_text/utils.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/tts_text/utils.py b/src/tts_text/utils.py index 309ded4..08c71e9 100644 --- a/src/tts_text/utils.py +++ b/src/tts_text/utils.py @@ -157,19 +157,21 @@ def get_soup( if not (retries is None or retries >= 0): raise ValueError("Number of retries must be non-negative.") + html: str = "" if dynamic: options = Options() options.add_argument("--headless") driver = webdriver.Chrome(options=options) - try: - driver.get(url=url) - html = driver.page_source - except TimeoutException: - logger.warning(f"Timed out while getting soup from {url}.") - html = "" - except WebDriverException: - logger.warning(f"Could not get soup from {url}.") - html = "" + while not html: + try: + driver.get(url=url) + html = driver.page_source + except TimeoutException: + logger.warning(f"Timed out while getting soup from {url}.") + html = "" + except WebDriverException: + logger.warning(f"Could not get soup from {url}.") + html = "" else: response = rq.get(url=url) @@ -190,6 +192,9 @@ def get_soup( html = response.text + if not html: + return BeautifulSoup("") + soup: BeautifulSoup = BeautifulSoup("") if retries is None: while not soup.contents: