diff --git a/CHANGES.md b/CHANGES.md index eba3e3e48..443062b8e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -8,6 +8,10 @@ The release dates mentioned follow the format `DD-MM-YYYY`. ## [Unreleased] +## [2.0.3] (Hotfix Release) - 18-05-2020 +### Fixed +- Genius would sometimes return invalid lyrics. Retry a few times in such a case. + ## [2.0.2] (Hotfix Release) - 18-05-2020 ### Fixed - Skipping tracks with `-m` would crash. diff --git a/spotdl/lyrics/providers/genius.py b/spotdl/lyrics/providers/genius.py index 0971ca3ff..c7b88cd0b 100644 --- a/spotdl/lyrics/providers/genius.py +++ b/spotdl/lyrics/providers/genius.py @@ -48,14 +48,12 @@ def _fetch_url_page(self, url, timeout=None): else: return response.read() - def _get_lyrics_text(self, html): + def _get_lyrics_text(self, paragraph): """ Extracts and returns the lyric content from the provided HTML. """ - soup = BeautifulSoup(html, "html.parser") - lyrics_paragraph = soup.find("p") - if lyrics_paragraph: - return lyrics_paragraph.get_text() + if paragraph: + return paragraph.get_text() else: raise LyricsNotFoundError( "The lyrics for this track are yet to be released on Genius." @@ -125,12 +123,29 @@ def from_artist_and_track(self, artist, track, linesep="\n", timeout=None): lyric_url = self.guess_lyric_url_from_artist_and_track(artist, track) return self.from_url(lyric_url, linesep, timeout) - def from_url(self, url, linesep="\n", timeout=None): + def from_url(self, url, linesep="\n", retries=5, timeout=None): """ Returns the lyric string for the given URL. """ logger.debug('Fetching lyric text from "{}".'.format(url)) lyric_html_page = self._fetch_url_page(url, timeout=timeout) - lyrics = self._get_lyrics_text(lyric_html_page) + soup = BeautifulSoup(lyric_html_page, "html.parser") + paragraph = soup.find("p") + # If
has a class (like
), then we got an invalid + # response. Retry in such a case. + invalid_response = paragraph.get("class") is not None + to_retry = retries > 0 and invalid_response + if to_retry: + logger.debug( + "Retrying since Genius returned invalid response for search " + "results. Retries left: {retries}.".format(retries=retries) + ) + return self.from_url(url, linesep=linesep, retries=retries-1, timeout=timeout) + + if invalid_response: + raise LyricsNotFoundError( + 'Genius returned invalid response for the search URL "{}".'.format(url) + ) + lyrics = self._get_lyrics_text(paragraph) return lyrics.replace("\n", linesep) diff --git a/spotdl/metadata/providers/youtube.py b/spotdl/metadata/providers/youtube.py index 5a4bb70e0..af673dea5 100644 --- a/spotdl/metadata/providers/youtube.py +++ b/spotdl/metadata/providers/youtube.py @@ -53,7 +53,7 @@ def generate_search_url(self, query): quoted_query = urllib.request.quote(query) return self.base_search_url.format(quoted_query) - def _fetch_response_html(self, url, retries=5): + def _fetch_response_html(self, url): response = urllib.request.urlopen(url) soup = BeautifulSoup(response.read(), "html.parser") return soup @@ -119,12 +119,11 @@ def search(self, query, limit=10, retries=5): videos = self._fetch_search_results(html, limit=limit) to_retry = retries > 0 and self._is_server_side_invalid_response(videos, html) if to_retry: - retries -= 1 logger.debug( "Retrying since YouTube returned invalid response for search " "results. Retries left: {retries}.".format(retries=retries) ) - return self.search(query, limit=limit, retries=retries) + return self.search(query, limit=limit, retries=retries-1) return YouTubeVideos(videos) diff --git a/spotdl/version.py b/spotdl/version.py index 47d77ba6c..a33f3fe0d 100644 --- a/spotdl/version.py +++ b/spotdl/version.py @@ -1,2 +1,2 @@ -__version__ = "2.0.2" +__version__ = "2.0.3"