Skip to content

Commit

Permalink
fix: retry in case we get errors when dynamically getting soup
Browse files Browse the repository at this point in the history
  • Loading branch information
AJDERS committed Nov 30, 2023
1 parent 7458702 commit 2c93b79
Showing 1 changed file with 14 additions and 9 deletions.
23 changes: 14 additions & 9 deletions src/tts_text/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,19 +157,21 @@ def get_soup(
if not (retries is None or retries >= 0):
raise ValueError("Number of retries must be non-negative.")

html: str = ""
if dynamic:
options = Options()
options.add_argument("--headless")
driver = webdriver.Chrome(options=options)
try:
driver.get(url=url)
html = driver.page_source
except TimeoutException:
logger.warning(f"Timed out while getting soup from {url}.")
html = ""
except WebDriverException:
logger.warning(f"Could not get soup from {url}.")
html = ""
while not html:
try:
driver.get(url=url)
html = driver.page_source
except TimeoutException:
logger.warning(f"Timed out while getting soup from {url}.")
html = ""
except WebDriverException:
logger.warning(f"Could not get soup from {url}.")
html = ""
else:
response = rq.get(url=url)

Expand All @@ -190,6 +192,9 @@ def get_soup(

html = response.text

if not html:
return BeautifulSoup("")

soup: BeautifulSoup = BeautifulSoup("")
if retries is None:
while not soup.contents:
Expand Down

0 comments on commit 2c93b79

Please sign in to comment.