Skip to content

Commit

Permalink
Retry a few times if Genius returns an invalid response
Browse files Browse the repository at this point in the history
  • Loading branch information
ritiek committed May 18, 2020
1 parent 85c12a9 commit 64d54d7
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 11 deletions.
4 changes: 4 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ The release dates mentioned follow the format `DD-MM-YYYY`.

## [Unreleased]

## [2.0.3] (Hotfix Release) - 18-05-2020
### Fixed
- Genius would sometimes return invalid lyrics. Retry a few times in such a case.

## [2.0.2] (Hotfix Release) - 18-05-2020
### Fixed
- Skipping tracks with `-m` would crash.
Expand Down
29 changes: 22 additions & 7 deletions spotdl/lyrics/providers/genius.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,12 @@ def _fetch_url_page(self, url, timeout=None):
else:
return response.read()

def _get_lyrics_text(self, html):
def _get_lyrics_text(self, paragraph):
"""
Extracts and returns the lyric content from the provided HTML.
"""
soup = BeautifulSoup(html, "html.parser")
lyrics_paragraph = soup.find("p")
if lyrics_paragraph:
return lyrics_paragraph.get_text()
if paragraph:
return paragraph.get_text()
else:
raise LyricsNotFoundError(
"The lyrics for this track are yet to be released on Genius."
Expand Down Expand Up @@ -125,12 +123,29 @@ def from_artist_and_track(self, artist, track, linesep="\n", timeout=None):
lyric_url = self.guess_lyric_url_from_artist_and_track(artist, track)
return self.from_url(lyric_url, linesep, timeout)

def from_url(self, url, linesep="\n", timeout=None):
def from_url(self, url, linesep="\n", retries=5, timeout=None):
"""
Returns the lyric string for the given URL.
"""
logger.debug('Fetching lyric text from "{}".'.format(url))
lyric_html_page = self._fetch_url_page(url, timeout=timeout)
lyrics = self._get_lyrics_text(lyric_html_page)
soup = BeautifulSoup(lyric_html_page, "html.parser")
paragraph = soup.find("p")
# If <p> has a class (like <p class="bla">), then we got an invalid
# response. Retry in such a case.
invalid_response = paragraph.get("class") is not None
to_retry = retries > 0 and invalid_response
if to_retry:
logger.debug(
"Retrying since Genius returned invalid response for search "
"results. Retries left: {retries}.".format(retries=retries)
)
return self.from_url(url, linesep=linesep, retries=retries-1, timeout=timeout)

if invalid_response:
raise LyricsNotFoundError(
'Genius returned invalid response for the search URL "{}".'.format(url)
)
lyrics = self._get_lyrics_text(paragraph)
return lyrics.replace("\n", linesep)

5 changes: 2 additions & 3 deletions spotdl/metadata/providers/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def generate_search_url(self, query):
quoted_query = urllib.request.quote(query)
return self.base_search_url.format(quoted_query)

def _fetch_response_html(self, url, retries=5):
def _fetch_response_html(self, url):
response = urllib.request.urlopen(url)
soup = BeautifulSoup(response.read(), "html.parser")
return soup
Expand Down Expand Up @@ -119,12 +119,11 @@ def search(self, query, limit=10, retries=5):
videos = self._fetch_search_results(html, limit=limit)
to_retry = retries > 0 and self._is_server_side_invalid_response(videos, html)
if to_retry:
retries -= 1
logger.debug(
"Retrying since YouTube returned invalid response for search "
"results. Retries left: {retries}.".format(retries=retries)
)
return self.search(query, limit=limit, retries=retries)
return self.search(query, limit=limit, retries=retries-1)
return YouTubeVideos(videos)


Expand Down
2 changes: 1 addition & 1 deletion spotdl/version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__version__ = "2.0.2"
__version__ = "2.0.3"

0 comments on commit 64d54d7

Please sign in to comment.