Skip to content

Commit

Permalink
Fix "stream did not contain valid UTF-8" using String::from_utf8_lossy (
Browse files Browse the repository at this point in the history
#380)

Co-authored-by: Kenneth Loeffler <[email protected]>
  • Loading branch information
krakow10 and kennethloeffler authored Sep 26, 2024
1 parent 4530e2b commit 0f2edcf
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 3 deletions.
1 change: 1 addition & 0 deletions rbx_binary/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## Unreleased
* Added the ability to specify what type of compression to use for serializing. This takes the form of `Serializer::compression_type`. ([#446])
* Added support for ZSTD compressed files ([#446])
* Implicit lossy conversion of non-UTF-8 `Instance.Name` and `*Script.Source` properties when decoding. The previous behaviour was returning an error. ([#380])

[#446]: https://github.com/rojo-rbx/rbx-dom/pull/446

Expand Down
34 changes: 31 additions & 3 deletions rbx_binary/src/deserializer/state.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::{
borrow::Cow,
collections::{HashMap, HashSet, VecDeque},
convert::TryInto,
io::Read,
Expand Down Expand Up @@ -374,7 +375,20 @@ impl<'db, R: Read> DeserializerState<'db, R> {

for referent in &type_info.referents {
let instance = self.instances_by_ref.get_mut(referent).unwrap();
let value = chunk.read_string()?;
let binary_string = chunk.read_binary_string()?;
let value = match std::str::from_utf8(&binary_string) {
Ok(value) => Cow::Borrowed(value),
Err(_) => {
log::warn!(
"Performing lossy string conversion on property {}.{} because it did not contain UTF-8.
This may cause unexpected or broken behavior in your final results if you rely on this property being non UTF-8.",
type_info.type_name,
prop_name
);

String::from_utf8_lossy(binary_string.as_ref())
}
};
instance.builder.set_name(value);
}

Expand All @@ -399,8 +413,22 @@ impl<'db, R: Read> DeserializerState<'db, R> {
VariantType::String => {
for referent in &type_info.referents {
let instance = self.instances_by_ref.get_mut(referent).unwrap();
let value = chunk.read_string()?;
add_property(instance, &property, value.into());
let binary_string = chunk.read_binary_string()?;
let value = match std::str::from_utf8(&binary_string) {
Ok(value) => Cow::Borrowed(value),
Err(_) => {
log::warn!(
"Performing lossy string conversion on property {}.{} because it did not contain UTF-8.
This may cause unexpected or broken behavior in your final results if you rely on this property being non UTF-8.",
type_info.type_name,
property.name
);

String::from_utf8_lossy(&binary_string)
}
};

add_property(instance, &property, value.as_ref().into());
}
}
VariantType::Content => {
Expand Down

0 comments on commit 0f2edcf

Please sign in to comment.