Skip to content

Add inherent constructors on str #131118

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions library/alloc/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@
#![feature(fn_traits)]
#![feature(formatting_options)]
#![feature(hasher_prefixfree_extras)]
#![feature(inherent_str_constructors)]
#![feature(inplace_iteration)]
#![feature(iter_advance_by)]
#![feature(iter_next_chunk)]
Expand Down
10 changes: 6 additions & 4 deletions library/alloc/src/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ use core::iter::FusedIterator;
use core::mem::MaybeUninit;
#[stable(feature = "encode_utf16", since = "1.8.0")]
pub use core::str::EncodeUtf16;
#[stable(feature = "rust1", since = "1.0.0")]
pub use core::str::ParseBoolError;
#[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
pub use core::str::SplitAsciiWhitespace;
#[stable(feature = "split_inclusive", since = "1.51.0")]
Expand All @@ -22,7 +24,7 @@ pub use core::str::SplitWhitespace;
pub use core::str::pattern;
use core::str::pattern::{DoubleEndedSearcher, Pattern, ReverseSearcher, Searcher, Utf8Pattern};
#[stable(feature = "rust1", since = "1.0.0")]
pub use core::str::{Bytes, CharIndices, Chars, from_utf8, from_utf8_mut};
pub use core::str::{Bytes, CharIndices, Chars};
#[stable(feature = "str_escape", since = "1.34.0")]
pub use core::str::{EscapeDebug, EscapeDefault, EscapeUnicode};
#[stable(feature = "rust1", since = "1.0.0")]
Expand All @@ -35,8 +37,6 @@ pub use core::str::{MatchIndices, RMatchIndices};
#[stable(feature = "rust1", since = "1.0.0")]
pub use core::str::{Matches, RMatches};
#[stable(feature = "rust1", since = "1.0.0")]
pub use core::str::{ParseBoolError, from_utf8_unchecked, from_utf8_unchecked_mut};
#[stable(feature = "rust1", since = "1.0.0")]
pub use core::str::{RSplit, Split};
#[stable(feature = "rust1", since = "1.0.0")]
pub use core::str::{RSplitN, SplitN};
Expand All @@ -46,6 +46,8 @@ pub use core::str::{RSplitTerminator, SplitTerminator};
pub use core::str::{Utf8Chunk, Utf8Chunks};
#[unstable(feature = "str_from_raw_parts", issue = "119206")]
pub use core::str::{from_raw_parts, from_raw_parts_mut};
#[stable(feature = "rust1", since = "1.0.0")]
pub use core::str::{from_utf8, from_utf8_mut, from_utf8_unchecked, from_utf8_unchecked_mut};
use core::unicode::conversions;
use core::{mem, ptr};

Expand Down Expand Up @@ -698,7 +700,7 @@ pub fn convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> (String, &str) {

// SAFETY: we know this is a valid char boundary
// since we only skipped over leading ascii bytes
let rest = core::str::from_utf8_unchecked(slice);
let rest = str::from_utf8_unchecked(slice);

(ascii_string, rest)
}
Expand Down
4 changes: 2 additions & 2 deletions library/alloc/src/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ use crate::alloc::Allocator;
use crate::borrow::{Cow, ToOwned};
use crate::boxed::Box;
use crate::collections::TryReserveError;
use crate::str::{self, CharIndices, Chars, Utf8Error, from_utf8_unchecked_mut};
use crate::str::{CharIndices, Chars, Utf8Error};
#[cfg(not(no_global_oom_handling))]
use crate::str::{FromStr, from_boxed_utf8_unchecked};
use crate::vec::{self, Vec};
Expand Down Expand Up @@ -2110,7 +2110,7 @@ impl String {
#[inline]
pub fn leak<'a>(self) -> &'a mut str {
let slice = self.vec.leak();
unsafe { from_utf8_unchecked_mut(slice) }
unsafe { str::from_utf8_unchecked_mut(slice) }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are a lot of places in this PR that change from std:str::* to the new inherent methods. Is there a reason for this? It would be much cleaner to add the inherent methods now, but don't replace any existing uses until a follow up PR (and once the feature is closer to stable).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

because I've tried to future-deprecation-warning them, so I already cleaned up all usages

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't do that in the same PR. Add the unstable API first, we can consider encouraging the newer APIs somehow once this is closer to stabilization - but certainly not while it is unstable.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So that's basically undoing the whole PR.

}
}

Expand Down
2 changes: 1 addition & 1 deletion library/alloc/src/sync.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3508,7 +3508,7 @@ impl Default for Arc<str> {
#[inline]
fn default() -> Self {
let arc: Arc<[u8]> = Default::default();
debug_assert!(core::str::from_utf8(&*arc).is_ok());
debug_assert!(str::from_utf8(&*arc).is_ok());
let (ptr, alloc) = Arc::into_inner_with_allocator(arc);
unsafe { Arc::from_ptr_in(ptr.as_ptr() as *mut ArcInner<str>, alloc) }
}
Expand Down
3 changes: 1 addition & 2 deletions library/core/src/char/methods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
use super::*;
use crate::panic::const_panic;
use crate::slice;
use crate::str::from_utf8_unchecked_mut;
use crate::unicode::printable::is_printable;
use crate::unicode::{self, conversions};

Expand Down Expand Up @@ -701,7 +700,7 @@ impl char {
#[inline]
pub const fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
// SAFETY: `char` is not a surrogate, so this is valid UTF-8.
unsafe { from_utf8_unchecked_mut(encode_utf8_raw(self as u32, dst)) }
unsafe { str::from_utf8_unchecked_mut(encode_utf8_raw(self as u32, dst)) }
}

/// Encodes this character as UTF-16 into the provided `u16` buffer,
Expand Down
4 changes: 2 additions & 2 deletions library/core/src/ffi/c_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::iter::FusedIterator;
use crate::marker::PhantomData;
use crate::ptr::NonNull;
use crate::slice::memchr;
use crate::{fmt, ops, slice, str};
use crate::{fmt, ops, slice};

// FIXME: because this is doc(inline)d, we *have* to use intra-doc links because the actual link
// depends on where the item is being documented. however, since this is libcore, we can't
Expand Down Expand Up @@ -651,7 +651,7 @@ impl CStr {
/// ```
#[stable(feature = "cstr_to_str", since = "1.4.0")]
#[rustc_const_stable(feature = "const_cstr_methods", since = "1.72.0")]
pub const fn to_str(&self) -> Result<&str, str::Utf8Error> {
pub const fn to_str(&self) -> Result<&str, crate::str::Utf8Error> {
// N.B., when `CStr` is changed to perform the length check in `.to_bytes()`
// instead of in `from_ptr()`, it may be worth considering if this should
// be rewritten to do the UTF-8 check inline with the length calculation
Expand Down
2 changes: 1 addition & 1 deletion library/core/src/fmt/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::char::EscapeDebugExtArgs;
use crate::marker::PhantomData;
use crate::num::fmt as numfmt;
use crate::ops::Deref;
use crate::{iter, mem, result, str};
use crate::{iter, mem, result};

mod builders;
#[cfg(not(no_fp_fmt_parse))]
Expand Down
2 changes: 1 addition & 1 deletion library/core/src/fmt/num.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
use crate::mem::MaybeUninit;
use crate::num::fmt as numfmt;
use crate::ops::{Div, Rem, Sub};
use crate::{fmt, ptr, slice, str};
use crate::{fmt, ptr, slice};

#[doc(hidden)]
trait DisplayInt:
Expand Down
2 changes: 1 addition & 1 deletion library/core/src/net/display_buffer.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::fmt;
use crate::mem::MaybeUninit;
use crate::{fmt, str};

/// Used for slow path in `Display` implementations when alignment is required.
pub(super) struct DisplayBuffer<const SIZE: usize> {
Expand Down
2 changes: 1 addition & 1 deletion library/core/src/slice/ascii.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ impl<'a> fmt::Display for EscapeAscii<'a> {
// SAFETY: prefix length was derived by counting bytes in the same splice, so it's in-bounds
let (prefix, remainder) = unsafe { bytes.split_at_unchecked(prefix) };
// SAFETY: prefix is a valid utf8 sequence, as it's a subset of ASCII
let prefix = unsafe { crate::str::from_utf8_unchecked(prefix) };
let prefix = unsafe { str::from_utf8_unchecked(prefix) };

f.write_str(prefix)?; // the fast part

Expand Down
37 changes: 9 additions & 28 deletions library/core/src/str/converts.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
//! Ways to create a `str` from bytes slice.

use super::Utf8Error;
use super::validations::run_utf8_validation;
use crate::{mem, ptr};
use crate::ptr;

/// Converts a slice of bytes to a string slice.
///
Expand Down Expand Up @@ -85,14 +84,7 @@ use crate::{mem, ptr};
#[rustc_const_stable(feature = "const_str_from_utf8_shared", since = "1.63.0")]
#[rustc_diagnostic_item = "str_from_utf8"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't move the existing diagnostic items. This feature will be unstable for a while, we don't want to break everything that relies on these in the meantime.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

restored

pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
// FIXME(const-hack): This should use `?` again, once it's `const`
match run_utf8_validation(v) {
Ok(_) => {
// SAFETY: validation succeeded.
Ok(unsafe { from_utf8_unchecked(v) })
}
Err(err) => Err(err),
}
str::from_utf8(v)
}

/// Converts a mutable slice of bytes to a mutable string slice.
Expand Down Expand Up @@ -129,14 +121,7 @@ pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
#[rustc_const_unstable(feature = "const_str_from_utf8", issue = "91006")]
#[rustc_diagnostic_item = "str_from_utf8_mut"]
pub const fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
// FIXME(const-hack): This should use `?` again, once it's `const`
match run_utf8_validation(v) {
Ok(_) => {
// SAFETY: validation succeeded.
Ok(unsafe { from_utf8_unchecked_mut(v) })
}
Err(err) => Err(err),
}
str::from_utf8_mut(v)
}

/// Converts a slice of bytes to a string slice without checking
Expand Down Expand Up @@ -168,11 +153,10 @@ pub const fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
#[must_use]
#[stable(feature = "rust1", since = "1.0.0")]
#[rustc_const_stable(feature = "const_str_from_utf8_unchecked", since = "1.55.0")]
#[rustc_diagnostic_item = "str_from_utf8_unchecked"]
#[rustc_diagnostic_item = "from_utf8_unchecked"]
pub const unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
// SAFETY: the caller must guarantee that the bytes `v` are valid UTF-8.
// Also relies on `&str` and `&[u8]` having the same layout.
unsafe { mem::transmute(v) }
// SAFETY: same requirements
unsafe { str::from_utf8_unchecked(v) }
}

/// Converts a slice of bytes to a string slice without checking
Expand All @@ -196,13 +180,10 @@ pub const unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
#[must_use]
#[stable(feature = "str_mut_extras", since = "1.20.0")]
#[rustc_const_stable(feature = "const_str_from_utf8_unchecked_mut", since = "1.83.0")]
#[rustc_diagnostic_item = "str_from_utf8_unchecked_mut"]
#[rustc_diagnostic_item = "from_utf8_unchecked_mut"]
pub const unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str {
// SAFETY: the caller must guarantee that the bytes `v`
// are valid UTF-8, thus the cast to `*mut str` is safe.
// Also, the pointer dereference is safe because that pointer
// comes from a reference which is guaranteed to be valid for writes.
unsafe { &mut *(v as *mut [u8] as *mut str) }
// SAFETY: same requirements
unsafe { str::from_utf8_unchecked_mut(v) }
}

/// Creates a `&str` from a pointer and a length.
Expand Down
6 changes: 3 additions & 3 deletions library/core/src/str/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use super::pattern::{DoubleEndedSearcher, Pattern, ReverseSearcher, Searcher};
use super::validations::{next_code_point, next_code_point_reverse};
use super::{
BytesIsNotEmpty, CharEscapeDebugContinue, CharEscapeDefault, CharEscapeUnicode,
IsAsciiWhitespace, IsNotEmpty, IsWhitespace, LinesMap, UnsafeBytesToStr, from_utf8_unchecked,
IsAsciiWhitespace, IsNotEmpty, IsWhitespace, LinesMap, UnsafeBytesToStr,
};
use crate::fmt::{self, Write};
use crate::iter::{
Expand Down Expand Up @@ -158,7 +158,7 @@ impl<'a> Chars<'a> {
#[inline]
pub fn as_str(&self) -> &'a str {
// SAFETY: `Chars` is only made from a str, which guarantees the iter is valid UTF-8.
unsafe { from_utf8_unchecked(self.iter.as_slice()) }
unsafe { str::from_utf8_unchecked(self.iter.as_slice()) }
}
}

Expand Down Expand Up @@ -1413,7 +1413,7 @@ impl<'a> SplitAsciiWhitespace<'a> {
}

// SAFETY: Slice is created from str.
Some(unsafe { crate::str::from_utf8_unchecked(&self.inner.iter.iter.v) })
Some(unsafe { str::from_utf8_unchecked(&self.inner.iter.iter.v) })
}
}

Expand Down
3 changes: 1 addition & 2 deletions library/core/src/str/lossy.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use super::from_utf8_unchecked;
use super::validations::utf8_char_width;
use crate::fmt;
use crate::fmt::{Formatter, Write};
Expand Down Expand Up @@ -281,7 +280,7 @@ impl<'a> Iterator for Utf8Chunks<'a> {

Some(Utf8Chunk {
// SAFETY: All bytes up to `valid_up_to` are valid UTF-8.
valid: unsafe { from_utf8_unchecked(valid) },
valid: unsafe { str::from_utf8_unchecked(valid) },
invalid,
})
}
Expand Down
Loading
Loading