Skip to content

Commit b39e6f1

Browse files
committed
Auto merge of #94079 - petrochenkov:cstr, r=joshtriplett
library: Move `CStr` to libcore, and `CString` to liballoc Closes rust-lang/rust#46736 Interesting points: - Stability: - To make `CStr(ing)` from libcore/liballoc unusable without enabling features I had to make these structures unstable, and reexport them from libstd using stable type aliases instead of `pub use` reexports. (Because stability of `use` items is not checked.) - Relying on target ABI in libcore is ok: - rust-lang/rust#94079 (comment) - `trait CStrExt` (UPDATE: used only in `cfg(bootstrap)` mode, otherwise lang items are used instead) - rust-lang/rust#94079 (comment) - `strlen` - rust-lang/rust#94079 (comment) Otherwise it's just a code move + some minor hackery usual for liballoc in `cfg(test)` mode.
2 parents ac7235e + 51893cd commit b39e6f1

File tree

21 files changed

+908
-681
lines changed

21 files changed

+908
-681
lines changed

std/src/ffi/c_str.rs renamed to alloc/src/ffi/c_str.rs

Lines changed: 132 additions & 611 deletions
Large diffs are not rendered by default.

std/src/ffi/c_str/tests.rs renamed to alloc/src/ffi/c_str/tests.rs

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
use super::*;
2-
use crate::borrow::Cow::{Borrowed, Owned};
3-
use crate::collections::hash_map::DefaultHasher;
4-
use crate::hash::{Hash, Hasher};
5-
use crate::os::raw::c_char;
62
use crate::rc::Rc;
73
use crate::sync::Arc;
4+
use core::assert_matches::assert_matches;
5+
use core::ffi::FromBytesUntilNulError;
6+
use core::hash::{Hash, Hasher};
7+
8+
#[allow(deprecated)]
9+
use core::hash::SipHasher13 as DefaultHasher;
810

911
#[test]
1012
fn c_to_rust() {
@@ -47,22 +49,6 @@ fn borrowed() {
4749
}
4850
}
4951

50-
#[test]
51-
fn to_str() {
52-
let data = b"123\xE2\x80\xA6\0";
53-
let ptr = data.as_ptr() as *const c_char;
54-
unsafe {
55-
assert_eq!(CStr::from_ptr(ptr).to_str(), Ok("123…"));
56-
assert_eq!(CStr::from_ptr(ptr).to_string_lossy(), Borrowed("123…"));
57-
}
58-
let data = b"123\xE2\0";
59-
let ptr = data.as_ptr() as *const c_char;
60-
unsafe {
61-
assert!(CStr::from_ptr(ptr).to_str().is_err());
62-
assert_eq!(CStr::from_ptr(ptr).to_string_lossy(), Owned::<str>(format!("123\u{FFFD}")));
63-
}
64-
}
65-
6652
#[test]
6753
fn to_owned() {
6854
let data = b"123\0";
@@ -78,9 +64,11 @@ fn equal_hash() {
7864
let ptr = data.as_ptr() as *const c_char;
7965
let cstr: &'static CStr = unsafe { CStr::from_ptr(ptr) };
8066

67+
#[allow(deprecated)]
8168
let mut s = DefaultHasher::new();
8269
cstr.hash(&mut s);
8370
let cstr_hash = s.finish();
71+
#[allow(deprecated)]
8472
let mut s = DefaultHasher::new();
8573
CString::new(&data[..data.len() - 1]).unwrap().hash(&mut s);
8674
let cstring_hash = s.finish();
@@ -122,11 +110,11 @@ fn cstr_from_bytes_until_nul() {
122110
// Test an empty slice. This should fail because it
123111
// does not contain a nul byte.
124112
let b = b"";
125-
assert_eq!(CStr::from_bytes_until_nul(&b[..]), Err(FromBytesUntilNulError(())));
113+
assert_matches!(CStr::from_bytes_until_nul(&b[..]), Err(FromBytesUntilNulError { .. }));
126114

127115
// Test a non-empty slice, that does not contain a nul byte.
128116
let b = b"hello";
129-
assert_eq!(CStr::from_bytes_until_nul(&b[..]), Err(FromBytesUntilNulError(())));
117+
assert_matches!(CStr::from_bytes_until_nul(&b[..]), Err(FromBytesUntilNulError { .. }));
130118

131119
// Test an empty nul-terminated string
132120
let b = b"\0";

alloc/src/ffi/mod.rs

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
//! Utilities related to FFI bindings.
2+
//!
3+
//! This module provides utilities to handle data across non-Rust
4+
//! interfaces, like other programming languages and the underlying
5+
//! operating system. It is mainly of use for FFI (Foreign Function
6+
//! Interface) bindings and code that needs to exchange C-like strings
7+
//! with other languages.
8+
//!
9+
//! # Overview
10+
//!
11+
//! Rust represents owned strings with the [`String`] type, and
12+
//! borrowed slices of strings with the [`str`] primitive. Both are
13+
//! always in UTF-8 encoding, and may contain nul bytes in the middle,
14+
//! i.e., if you look at the bytes that make up the string, there may
15+
//! be a `\0` among them. Both `String` and `str` store their length
16+
//! explicitly; there are no nul terminators at the end of strings
17+
//! like in C.
18+
//!
19+
//! C strings are different from Rust strings:
20+
//!
21+
//! * **Encodings** - Rust strings are UTF-8, but C strings may use
22+
//! other encodings. If you are using a string from C, you should
23+
//! check its encoding explicitly, rather than just assuming that it
24+
//! is UTF-8 like you can do in Rust.
25+
//!
26+
//! * **Character size** - C strings may use `char` or `wchar_t`-sized
27+
//! characters; please **note** that C's `char` is different from Rust's.
28+
//! The C standard leaves the actual sizes of those types open to
29+
//! interpretation, but defines different APIs for strings made up of
30+
//! each character type. Rust strings are always UTF-8, so different
31+
//! Unicode characters will be encoded in a variable number of bytes
32+
//! each. The Rust type [`char`] represents a '[Unicode scalar
33+
//! value]', which is similar to, but not the same as, a '[Unicode
34+
//! code point]'.
35+
//!
36+
//! * **Nul terminators and implicit string lengths** - Often, C
37+
//! strings are nul-terminated, i.e., they have a `\0` character at the
38+
//! end. The length of a string buffer is not stored, but has to be
39+
//! calculated; to compute the length of a string, C code must
40+
//! manually call a function like `strlen()` for `char`-based strings,
41+
//! or `wcslen()` for `wchar_t`-based ones. Those functions return
42+
//! the number of characters in the string excluding the nul
43+
//! terminator, so the buffer length is really `len+1` characters.
44+
//! Rust strings don't have a nul terminator; their length is always
45+
//! stored and does not need to be calculated. While in Rust
46+
//! accessing a string's length is an *O*(1) operation (because the
47+
//! length is stored); in C it is an *O*(*n*) operation because the
48+
//! length needs to be computed by scanning the string for the nul
49+
//! terminator.
50+
//!
51+
//! * **Internal nul characters** - When C strings have a nul
52+
//! terminator character, this usually means that they cannot have nul
53+
//! characters in the middle — a nul character would essentially
54+
//! truncate the string. Rust strings *can* have nul characters in
55+
//! the middle, because nul does not have to mark the end of the
56+
//! string in Rust.
57+
//!
58+
//! # Representations of non-Rust strings
59+
//!
60+
//! [`CString`] and [`CStr`] are useful when you need to transfer
61+
//! UTF-8 strings to and from languages with a C ABI, like Python.
62+
//!
63+
//! * **From Rust to C:** [`CString`] represents an owned, C-friendly
64+
//! string: it is nul-terminated, and has no internal nul characters.
65+
//! Rust code can create a [`CString`] out of a normal string (provided
66+
//! that the string doesn't have nul characters in the middle), and
67+
//! then use a variety of methods to obtain a raw <code>\*mut [u8]</code> that can
68+
//! then be passed as an argument to functions which use the C
69+
//! conventions for strings.
70+
//!
71+
//! * **From C to Rust:** [`CStr`] represents a borrowed C string; it
72+
//! is what you would use to wrap a raw <code>\*const [u8]</code> that you got from
73+
//! a C function. A [`CStr`] is guaranteed to be a nul-terminated array
74+
//! of bytes. Once you have a [`CStr`], you can convert it to a Rust
75+
//! <code>&[str]</code> if it's valid UTF-8, or lossily convert it by adding
76+
//! replacement characters.
77+
//!
78+
//! [`String`]: crate::string::String
79+
//! [`CStr`]: core::ffi::CStr
80+
81+
#![unstable(feature = "alloc_ffi", issue = "94079")]
82+
83+
#[cfg(bootstrap)]
84+
#[unstable(feature = "cstr_internals", issue = "none")]
85+
pub use self::c_str::CStrExt;
86+
#[unstable(feature = "alloc_c_string", issue = "94079")]
87+
pub use self::c_str::FromVecWithNulError;
88+
#[unstable(feature = "alloc_c_string", issue = "94079")]
89+
pub use self::c_str::{CString, IntoStringError, NulError};
90+
91+
mod c_str;

alloc/src/lib.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,11 +86,13 @@
8686
#![allow(explicit_outlives_requirements)]
8787
//
8888
// Library features:
89+
#![cfg_attr(not(no_global_oom_handling), feature(alloc_c_string))]
8990
#![feature(alloc_layout_extra)]
9091
#![feature(allocator_api)]
9192
#![feature(array_chunks)]
9293
#![feature(array_methods)]
9394
#![feature(array_windows)]
95+
#![feature(assert_matches)]
9496
#![feature(async_iterator)]
9597
#![feature(coerce_unsized)]
9698
#![cfg_attr(not(no_global_oom_handling), feature(const_alloc_error))]
@@ -104,9 +106,12 @@
104106
#![feature(const_maybe_uninit_write)]
105107
#![feature(const_maybe_uninit_as_mut_ptr)]
106108
#![feature(const_refs_to_cell)]
109+
#![feature(core_c_str)]
107110
#![feature(core_intrinsics)]
111+
#![feature(core_ffi_c)]
108112
#![feature(const_eval_select)]
109113
#![feature(const_pin)]
114+
#![feature(cstr_from_bytes_until_nul)]
110115
#![feature(dispatch_from_dyn)]
111116
#![feature(exact_size_is_empty)]
112117
#![feature(extend_one)]
@@ -152,6 +157,7 @@
152157
#![feature(exclusive_range_pattern)]
153158
#![feature(fundamental)]
154159
#![cfg_attr(not(test), feature(generator_trait))]
160+
#![feature(hashmap_internals)]
155161
#![feature(lang_items)]
156162
#![feature(let_else)]
157163
#![feature(min_specialization)]
@@ -160,6 +166,7 @@
160166
#![feature(nll)] // Not necessary, but here to test the `nll` feature.
161167
#![feature(rustc_allow_const_fn_unstable)]
162168
#![feature(rustc_attrs)]
169+
#![feature(slice_internals)]
163170
#![feature(staged_api)]
164171
#![cfg_attr(test, feature(test))]
165172
#![feature(unboxed_closures)]
@@ -205,6 +212,8 @@ mod boxed {
205212
}
206213
pub mod borrow;
207214
pub mod collections;
215+
#[cfg(not(no_global_oom_handling))]
216+
pub mod ffi;
208217
pub mod fmt;
209218
pub mod rc;
210219
pub mod slice;

alloc/src/slice.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ pub use hack::to_vec;
153153
// functions are actually methods that are in `impl [T]` but not in
154154
// `core::slice::SliceExt` - we need to supply these functions for the
155155
// `test_permutations` test
156-
mod hack {
156+
pub(crate) mod hack {
157157
use core::alloc::Allocator;
158158

159159
use crate::boxed::Box;

alloc/tests/c_str.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
use std::borrow::Cow::{Borrowed, Owned};
2+
use std::ffi::{c_char, CStr};
3+
4+
#[test]
5+
fn to_str() {
6+
let data = b"123\xE2\x80\xA6\0";
7+
let ptr = data.as_ptr() as *const c_char;
8+
unsafe {
9+
assert_eq!(CStr::from_ptr(ptr).to_str(), Ok("123…"));
10+
assert_eq!(CStr::from_ptr(ptr).to_string_lossy(), Borrowed("123…"));
11+
}
12+
let data = b"123\xE2\0";
13+
let ptr = data.as_ptr() as *const c_char;
14+
unsafe {
15+
assert!(CStr::from_ptr(ptr).to_str().is_err());
16+
assert_eq!(CStr::from_ptr(ptr).to_string_lossy(), Owned::<str>(format!("123\u{FFFD}")));
17+
}
18+
}

alloc/tests/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
#![feature(const_nonnull_slice_from_raw_parts)]
1313
#![feature(const_ptr_write)]
1414
#![feature(const_try)]
15+
#![feature(core_c_str)]
16+
#![feature(core_ffi_c)]
1517
#![feature(core_intrinsics)]
1618
#![feature(drain_filter)]
1719
#![feature(exact_size_is_empty)]
@@ -49,6 +51,7 @@ mod binary_heap;
4951
mod borrow;
5052
mod boxed;
5153
mod btree_set_hash;
54+
mod c_str;
5255
mod const_fns;
5356
mod cow_str;
5457
mod fmt;

0 commit comments

Comments
 (0)