Skip to content

Commit

Permalink
Fix error handling. Add test.
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewliebenow committed Oct 22, 2024
1 parent f697649 commit 0f3bc3f
Show file tree
Hide file tree
Showing 6 changed files with 105 additions and 70 deletions.
2 changes: 1 addition & 1 deletion src/uu/echo/src/echo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ fn execute(
arguments_after_options: ValuesRef<'_, OsString>,
) -> UResult<()> {
for (i, input) in arguments_after_options.enumerate() {
let bytes = uucore::format::bytes_from_os_str(input)?;
let bytes = uucore::format::try_get_bytes_from_os_str(input)?;

if i > 0 {
stdout_lock.write_all(b" ")?;
Expand Down
6 changes: 4 additions & 2 deletions src/uu/printf/src/printf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ use std::ffi::OsString;
use std::io::stdout;
use std::ops::ControlFlow;
use uucore::error::{UResult, UUsageError};
use uucore::format::{bytes_from_os_str, parse_spec_and_escape, FormatArgument, FormatItem};
use uucore::format::{
parse_spec_and_escape, try_get_bytes_from_os_str, FormatArgument, FormatItem,
};
use uucore::{format_usage, help_about, help_section, help_usage};

const VERSION: &str = "version";
Expand All @@ -33,7 +35,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
.get_one::<OsString>(options::FORMAT)
.ok_or_else(|| UUsageError::new(1, "missing operand"))?;

let format_bytes = bytes_from_os_str(format)?;
let format_bytes = try_get_bytes_from_os_str(format)?;

let values = match matches.get_many::<OsString>(options::ARGUMENT) {
Some(os_string) => os_string
Expand Down
128 changes: 70 additions & 58 deletions src/uucore/src/lib/features/format/argument.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,19 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.

use super::FormatError;
use crate::{
error::{set_exit_code, UResult, USimpleError},
error::{set_exit_code, UError},
features::format::num_parser::{ParseError, ParsedNumber},
quoting_style::{escape_name, Quotes, QuotingStyle},
show, show_error, show_warning,
show_error, show_warning,
};
use os_display::Quotable;
use std::ffi::{OsStr, OsString};
use std::{
error::Error,
ffi::{OsStr, OsString},
fmt::Display,
};

/// An argument for formatting
///
Expand All @@ -31,70 +36,70 @@ pub enum FormatArgument {
}

pub trait ArgumentIter<'a>: Iterator<Item = &'a FormatArgument> {
fn get_char(&mut self) -> u8;
fn get_i64(&mut self) -> i64;
fn get_u64(&mut self) -> u64;
fn get_f64(&mut self) -> f64;
fn get_char(&mut self) -> Result<u8, FormatError>;
fn get_i64(&mut self) -> Result<i64, FormatError>;
fn get_u64(&mut self) -> Result<u64, FormatError>;
fn get_f64(&mut self) -> Result<f64, FormatError>;
fn get_str(&mut self) -> &'a OsStr;
}

impl<'a, T: Iterator<Item = &'a FormatArgument>> ArgumentIter<'a> for T {
fn get_char(&mut self) -> u8 {
fn get_char(&mut self) -> Result<u8, FormatError> {
let Some(next) = self.next() else {
return b'\0';
return Ok(b'\0');
};
match next {
FormatArgument::Char(c) => *c as u8,
FormatArgument::Unparsed(os) => match bytes_from_os_str(os).unwrap().first() {
Some(&byte) => byte,
None => b'\0',
FormatArgument::Char(c) => Ok(*c as u8),
FormatArgument::Unparsed(os) => match try_get_bytes_from_os_str(os)?.first() {
Some(&byte) => Ok(byte),
None => Ok(b'\0'),
},
_ => b'\0',
_ => Ok(b'\0'),
}
}

fn get_u64(&mut self) -> u64 {
fn get_u64(&mut self) -> Result<u64, FormatError> {
let Some(next) = self.next() else {
return 0;
return Ok(0);
};
match next {
FormatArgument::UnsignedInt(n) => *n,
FormatArgument::UnsignedInt(n) => Ok(*n),
FormatArgument::Unparsed(os) => {
let str = get_str_or_exit_with_error(os);
let str = try_get_str_from_os_str(os)?;

extract_value(ParsedNumber::parse_u64(str), str)
Ok(extract_value(ParsedNumber::parse_u64(str), str))
}
_ => 0,
_ => Ok(0),
}
}

fn get_i64(&mut self) -> i64 {
fn get_i64(&mut self) -> Result<i64, FormatError> {
let Some(next) = self.next() else {
return 0;
return Ok(0);
};
match next {
FormatArgument::SignedInt(n) => *n,
FormatArgument::SignedInt(n) => Ok(*n),
FormatArgument::Unparsed(os) => {
let str = get_str_or_exit_with_error(os);
let str = try_get_str_from_os_str(os)?;

extract_value(ParsedNumber::parse_i64(str), str)
Ok(extract_value(ParsedNumber::parse_i64(str), str))
}
_ => 0,
_ => Ok(0),
}
}

fn get_f64(&mut self) -> f64 {
fn get_f64(&mut self) -> Result<f64, FormatError> {
let Some(next) = self.next() else {
return 0.0;
return Ok(0.0);
};
match next {
FormatArgument::Float(n) => *n,
FormatArgument::Float(n) => Ok(*n),
FormatArgument::Unparsed(os) => {
let str = get_str_or_exit_with_error(os);
let str = try_get_str_from_os_str(os)?;

extract_value(ParsedNumber::parse_f64(str), str)
Ok(extract_value(ParsedNumber::parse_f64(str), str))
}
_ => 0.0,
_ => Ok(0.0),
}
}

Expand Down Expand Up @@ -135,14 +140,41 @@ fn extract_value<T: Default>(p: Result<T, ParseError<'_, T>>, input: &str) -> T
} else {
show_error!("{}: value not completely converted", input.quote());
}

v
}
}
}
}
}

pub fn bytes_from_os_str(input: &OsStr) -> UResult<&[u8]> {
#[derive(Debug)]
pub struct NonUtf8OsStr(pub String);

impl Display for NonUtf8OsStr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_fmt(format_args!(
"invalid (non-UTF-8) string like {} encountered",
self.0.quote(),
))
}
}

impl Error for NonUtf8OsStr {}
impl UError for NonUtf8OsStr {}

pub fn try_get_str_from_os_str(os_str: &OsStr) -> Result<&str, NonUtf8OsStr> {
match os_str.to_str() {
Some(st) => Ok(st),
None => {
let cow = os_str.to_string_lossy();

Err(NonUtf8OsStr(cow.to_string()))
}
}
}

pub fn try_get_bytes_from_os_str(input: &OsStr) -> Result<&[u8], NonUtf8OsStr> {
let result = {
#[cfg(target_family = "unix")]
{
Expand All @@ -153,38 +185,18 @@ pub fn bytes_from_os_str(input: &OsStr) -> UResult<&[u8]> {

#[cfg(not(target_family = "unix"))]
{
use crate::error::USimpleError;

// TODO
// Verify that this works correctly on these platforms
match input.to_str().map(|st| st.as_bytes()) {
Some(sl) => Ok(sl),
None => Err(USimpleError::new(
1,
"non-UTF-8 string encountered when not allowed",
)),
None => {
let cow = input.to_string_lossy();

Err(NonUtf8OsStr(cow.to_string()))
}
}
}
};

result
}

fn get_str_or_exit_with_error(os_str: &OsStr) -> &str {
match os_str.to_str() {
Some(st) => st,
None => {
let cow = os_str.to_string_lossy();

let quoted = cow.quote();

let error = format!(
"argument like {quoted} is not a valid UTF-8 string, and could not be parsed as an integer",
);

show!(USimpleError::new(1, error.clone()));

panic!("{error}");
}
}
}
15 changes: 15 additions & 0 deletions src/uucore/src/lib/features/format/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ pub mod num_parser;
mod spec;

pub use argument::*;
use os_display::Quotable;
use spec::Spec;
use std::{
error::Error,
Expand All @@ -63,6 +64,7 @@ pub enum FormatError {
NeedAtLeastOneSpec(Vec<u8>),
WrongSpecType,
InvalidPrecision(String),
InvalidEncoding(NonUtf8OsStr),
}

impl Error for FormatError {}
Expand All @@ -74,6 +76,12 @@ impl From<std::io::Error> for FormatError {
}
}

impl From<NonUtf8OsStr> for FormatError {
fn from(value: NonUtf8OsStr) -> FormatError {
FormatError::InvalidEncoding(value)
}
}

impl Display for FormatError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Expand All @@ -98,6 +106,13 @@ impl Display for FormatError {
Self::IoError(_) => write!(f, "io error"),
Self::NoMoreArguments => write!(f, "no more arguments"),
Self::InvalidArgument(_) => write!(f, "invalid argument"),
Self::InvalidEncoding(no) => {
write!(
f,
"invalid (non-UTF-8) argument like {} encountered",
no.0.quote()
)
}
}
}
}
Expand Down
17 changes: 8 additions & 9 deletions src/uucore/src/lib/features/format/spec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,11 @@
// spell-checker:ignore (vars) intmax ptrdiff padlen

use super::{
bytes_from_os_str,
num_format::{
self, Case, FloatVariant, ForceDecimal, Formatter, NumberAlignment, PositiveSign, Prefix,
UnsignedIntVariant,
},
parse_escape_only, ArgumentIter, FormatChar, FormatError,
parse_escape_only, try_get_bytes_from_os_str, ArgumentIter, FormatChar, FormatError,
};
use crate::quoting_style::{escape_name, QuotingStyle};
use std::{io::Write, ops::ControlFlow};
Expand Down Expand Up @@ -315,7 +314,7 @@ impl Spec {
match self {
Self::Char { width, align_left } => {
let width = resolve_asterisk(*width, &mut args)?.unwrap_or(0);
write_padded(writer, &[args.get_char()], width, *align_left)
write_padded(writer, &[args.get_char()?], width, *align_left)
}
Self::String {
width,
Expand All @@ -334,7 +333,7 @@ impl Spec {

let os_str = args.get_str();

let bytes = bytes_from_os_str(os_str).unwrap();
let bytes = try_get_bytes_from_os_str(os_str).unwrap();

let truncated = match precision {
Some(p) if p < os_str.len() => &bytes[..p],
Expand All @@ -346,7 +345,7 @@ impl Spec {
Self::EscapedString => {
let os_str = args.get_str();

let bytes = bytes_from_os_str(os_str).unwrap();
let bytes = try_get_bytes_from_os_str(os_str).unwrap();

let mut parsed = Vec::<u8>::new();

Expand Down Expand Up @@ -386,7 +385,7 @@ impl Spec {
} => {
let width = resolve_asterisk(*width, &mut args)?.unwrap_or(0);
let precision = resolve_asterisk(*precision, &mut args)?.unwrap_or(0);
let i = args.get_i64();
let i = args.get_i64()?;

if precision as u64 > i32::MAX as u64 {
return Err(FormatError::InvalidPrecision(precision.to_string()));
Expand All @@ -409,7 +408,7 @@ impl Spec {
} => {
let width = resolve_asterisk(*width, &mut args)?.unwrap_or(0);
let precision = resolve_asterisk(*precision, &mut args)?.unwrap_or(0);
let i = args.get_u64();
let i = args.get_u64()?;

if precision as u64 > i32::MAX as u64 {
return Err(FormatError::InvalidPrecision(precision.to_string()));
Expand All @@ -435,7 +434,7 @@ impl Spec {
} => {
let width = resolve_asterisk(*width, &mut args)?.unwrap_or(0);
let precision = resolve_asterisk(*precision, &mut args)?.unwrap_or(6);
let f = args.get_f64();
let f = args.get_f64()?;

if precision as u64 > i32::MAX as u64 {
return Err(FormatError::InvalidPrecision(precision.to_string()));
Expand Down Expand Up @@ -463,7 +462,7 @@ fn resolve_asterisk<'a>(
) -> Result<Option<usize>, FormatError> {
Ok(match option {
None => None,
Some(CanAsterisk::Asterisk) => Some(usize::try_from(args.get_u64()).ok().unwrap_or(0)),
Some(CanAsterisk::Asterisk) => Some(usize::try_from(args.get_u64()?).ok().unwrap_or(0)),
Some(CanAsterisk::Fixed(w)) => Some(w),
})
}
Expand Down
7 changes: 7 additions & 0 deletions tests/by-util/test_printf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -941,4 +941,11 @@ fn non_utf_8_input() {
.arg(os_str)
.succeeds()
.stdout_only_bytes(INPUT_AND_OUTPUT);

new_ucmd!()
.arg("%d")
.arg(os_str)
.fails()
.stderr_only("printf: invalid (non-UTF-8) argument like 'Swer an rehte g�ete wendet s�n gem�ete, dem volget s�lde und �re.' encountered
");
}

0 comments on commit 0f3bc3f

Please sign in to comment.