From 0292091b238e338b69cc43f7365506fd252f925e Mon Sep 17 00:00:00 2001
From: Marcel van der Veldt <m.vanderveldt@outlook.com>
Date: Thu, 17 Oct 2024 12:43:10 +0200
Subject: [PATCH 1/3] fix encoding detection

---
 music_assistant/server/helpers/audio.py | 12 +++++++-----
 music_assistant/server/helpers/util.py  |  9 +++++++++
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/music_assistant/server/helpers/audio.py b/music_assistant/server/helpers/audio.py
index eda1fbb49..30a05ecb3 100644
--- a/music_assistant/server/helpers/audio.py
+++ b/music_assistant/server/helpers/audio.py
@@ -46,7 +46,7 @@
 from .process import AsyncProcess, check_output, communicate
 from .tags import parse_tags
 from .throttle_retry import BYPASS_THROTTLER
-from .util import TimedAsyncGenerator, create_tempfile
+from .util import TimedAsyncGenerator, create_tempfile, detect_charset
 
 if TYPE_CHECKING:
     from music_assistant.common.models.player_queue import QueueItem
@@ -603,8 +603,9 @@ async def get_hls_radio_stream(
             substream_url, headers=HTTP_HEADERS, timeout=timeout
         ) as resp:
             resp.raise_for_status()
-            charset = resp.charset or "utf-8"
-            substream_m3u_data = await resp.text(charset)
+            raw_data = await resp.read()
+            encoding = resp.charset or await detect_charset(raw_data)
+            substream_m3u_data = raw_data.decode(encoding)
         # get chunk-parts from the substream
         hls_chunks = parse_m3u(substream_m3u_data)
         chunk_seconds = 0
@@ -679,8 +680,9 @@ async def get_hls_substream(
         url, allow_redirects=True, headers=HTTP_HEADERS, timeout=timeout
     ) as resp:
         resp.raise_for_status()
-        charset = resp.charset or "utf-8"
-        master_m3u_data = await resp.text(charset)
+        raw_data = await resp.read()
+        encoding = resp.charset or await detect_charset(raw_data)
+        master_m3u_data = raw_data.decode(encoding)
     substreams = parse_m3u(master_m3u_data)
     if any(x for x in substreams if x.length and not x.key):
         # this is already a substream!
diff --git a/music_assistant/server/helpers/util.py b/music_assistant/server/helpers/util.py
index 95896f6d9..6cdde467b 100644
--- a/music_assistant/server/helpers/util.py
+++ b/music_assistant/server/helpers/util.py
@@ -19,6 +19,7 @@
 from types import TracebackType
 from typing import TYPE_CHECKING, Any, ParamSpec, Self, TypeVar
 
+import cchardet as chardet
 import ifaddr
 import memory_tempfile
 from zeroconf import IPVersion
@@ -181,6 +182,14 @@ async def close_async_generator(agen: AsyncGenerator[Any, None]) -> None:
     await agen.aclose()
 
 
+async def detect_charset(data: bytes, fallback="utf-8") -> str:
+    """Detect charset of raw data."""
+    try:
+        return (await asyncio.to_thread(chardet.detect, data))["encoding"]
+    except (ImportError, AttributeError):
+        return fallback
+
+
 class TaskManager:
     """
     Helper class to run many tasks at once.

From 20cc3da09073ea209b6507f7357f2c768b692725 Mon Sep 17 00:00:00 2001
From: Marcel van der Veldt <m.vanderveldt@outlook.com>
Date: Thu, 17 Oct 2024 12:54:59 +0200
Subject: [PATCH 2/3] small tweak to encoding detect

---
 music_assistant/server/helpers/util.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/music_assistant/server/helpers/util.py b/music_assistant/server/helpers/util.py
index 6cdde467b..34ef1610d 100644
--- a/music_assistant/server/helpers/util.py
+++ b/music_assistant/server/helpers/util.py
@@ -185,9 +185,12 @@ async def close_async_generator(agen: AsyncGenerator[Any, None]) -> None:
 async def detect_charset(data: bytes, fallback="utf-8") -> str:
     """Detect charset of raw data."""
     try:
-        return (await asyncio.to_thread(chardet.detect, data))["encoding"]
-    except (ImportError, AttributeError):
-        return fallback
+        detected = await asyncio.to_thread(chardet.detect, data)
+        if detected and detected["encoding"] and detected["confidence"] > 0.75:
+            return detected["encoding"]
+    except Exception as err:
+        LOGGER.debug("Failed to detect charset: %s", err)
+    return fallback
 
 
 class TaskManager:

From 04c063a474fc7ffef6047afab33f05d33da1a86a Mon Sep 17 00:00:00 2001
From: Marcel van der Veldt <m.vanderveldt@outlook.com>
Date: Thu, 17 Oct 2024 15:58:29 +0200
Subject: [PATCH 3/3] Always detect charset for m3u playlists

---
 music_assistant/server/helpers/audio.py     | 6 ++++--
 music_assistant/server/helpers/playlists.py | 9 ++++++---
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/music_assistant/server/helpers/audio.py b/music_assistant/server/helpers/audio.py
index 30a05ecb3..cce84fafc 100644
--- a/music_assistant/server/helpers/audio.py
+++ b/music_assistant/server/helpers/audio.py
@@ -604,7 +604,8 @@ async def get_hls_radio_stream(
         ) as resp:
             resp.raise_for_status()
             raw_data = await resp.read()
-            encoding = resp.charset or await detect_charset(raw_data)
+            # NOTE: using resp.charset is not reliable, we need to detect it ourselves
+            encoding = await detect_charset(raw_data)
             substream_m3u_data = raw_data.decode(encoding)
         # get chunk-parts from the substream
         hls_chunks = parse_m3u(substream_m3u_data)
@@ -681,7 +682,8 @@ async def get_hls_substream(
     ) as resp:
         resp.raise_for_status()
         raw_data = await resp.read()
-        encoding = resp.charset or await detect_charset(raw_data)
+        # NOTE: using resp.charset is not reliable, we need to detect it ourselves
+        encoding = await detect_charset(raw_data)
         master_m3u_data = raw_data.decode(encoding)
     substreams = parse_m3u(master_m3u_data)
     if any(x for x in substreams if x.length and not x.key):
diff --git a/music_assistant/server/helpers/playlists.py b/music_assistant/server/helpers/playlists.py
index 8986de7fe..a4b14acde 100644
--- a/music_assistant/server/helpers/playlists.py
+++ b/music_assistant/server/helpers/playlists.py
@@ -11,6 +11,7 @@
 from aiohttp import client_exceptions
 
 from music_assistant.common.models.errors import InvalidDataError
+from music_assistant.server.helpers.util import detect_charset
 
 if TYPE_CHECKING:
     from music_assistant.server import MusicAssistant
@@ -146,10 +147,12 @@ async def fetch_playlist(mass: MusicAssistant, url: str) -> list[PlaylistItem]:
     """Parse an online m3u or pls playlist."""
     try:
         async with mass.http_session.get(url, allow_redirects=True, timeout=5) as resp:
-            charset = resp.charset or "utf-8"
             try:
-                playlist_data = (await resp.content.read(64 * 1024)).decode(charset)
-            except ValueError as err:
+                raw_data = await resp.content.read(64 * 1024)
+                # NOTE: using resp.charset is not reliable, we need to detect it ourselves
+                encoding = await detect_charset(raw_data)
+                playlist_data = raw_data.decode(encoding, errors="replace")
+            except (ValueError, UnicodeDecodeError) as err:
                 msg = f"Could not decode playlist {url}"
                 raise InvalidDataError(msg) from err
     except TimeoutError as err: