@@ -6,9 +6,10 @@ use crate::cmp;
6
6
use crate :: collections:: TryReserveError ;
7
7
use crate :: fmt;
8
8
use crate :: hash:: { Hash , Hasher } ;
9
- use crate :: ops;
9
+ use crate :: ops:: { self , Range } ;
10
10
use crate :: rc:: Rc ;
11
- use crate :: str:: FromStr ;
11
+ use crate :: slice;
12
+ use crate :: str:: { from_utf8 as str_from_utf8, FromStr } ;
12
13
use crate :: sync:: Arc ;
13
14
14
15
use crate :: sys:: os_str:: { Buf , Slice } ;
@@ -963,6 +964,82 @@ impl OsStr {
963
964
self . inner . as_encoded_bytes ( )
964
965
}
965
966
967
+ /// Takes a substring based on a range that corresponds to the return value of
968
+ /// [`OsStr::as_encoded_bytes`].
969
+ ///
970
+ /// The range's start and end must lie on valid `OsStr` boundaries.
971
+ /// A valid `OsStr` boundary is one of:
972
+ /// - The start of the string
973
+ /// - The end of the string
974
+ /// - Immediately before a valid non-empty UTF-8 substring
975
+ /// - Immediately after a valid non-empty UTF-8 substring
976
+ ///
977
+ /// # Panics
978
+ ///
979
+ /// Panics if `range` does not lie on valid `OsStr` boundaries or if it
980
+ /// exceeds the end of the string.
981
+ ///
982
+ /// # Example
983
+ ///
984
+ /// ```
985
+ /// #![feature(os_str_slice)]
986
+ ///
987
+ /// use std::ffi::OsStr;
988
+ ///
989
+ /// let os_str = OsStr::new("foo=bar");
990
+ /// let bytes = os_str.as_encoded_bytes();
991
+ /// if let Some(index) = bytes.iter().position(|b| *b == b'=') {
992
+ /// let key = os_str.slice_encoded_bytes(..index);
993
+ /// let value = os_str.slice_encoded_bytes(index + 1..);
994
+ /// assert_eq!(key, "foo");
995
+ /// assert_eq!(value, "bar");
996
+ /// }
997
+ /// ```
998
+ #[ unstable( feature = "os_str_slice" , issue = "118485" ) ]
999
+ pub fn slice_encoded_bytes < R : ops:: RangeBounds < usize > > ( & self , range : R ) -> & Self {
1000
+ fn is_valid_boundary ( bytes : & [ u8 ] , index : usize ) -> bool {
1001
+ if index == 0 || index == bytes. len ( ) {
1002
+ return true ;
1003
+ }
1004
+
1005
+ // Fast path
1006
+ if bytes[ index - 1 ] . is_ascii ( ) || bytes[ index] . is_ascii ( ) {
1007
+ return true ;
1008
+ }
1009
+
1010
+ let ( before, after) = bytes. split_at ( index) ;
1011
+
1012
+ // UTF-8 takes at most 4 bytes per codepoint, so we don't
1013
+ // need to check more than that.
1014
+ let after = after. get ( ..4 ) . unwrap_or ( after) ;
1015
+ match str_from_utf8 ( after) {
1016
+ Ok ( _) => return true ,
1017
+ Err ( err) if err. valid_up_to ( ) != 0 => return true ,
1018
+ Err ( _) => ( ) ,
1019
+ }
1020
+
1021
+ for len in 2 ..=4 . min ( index) {
1022
+ let before = & before[ index - len..] ;
1023
+ if str_from_utf8 ( before) . is_ok ( ) {
1024
+ return true ;
1025
+ }
1026
+ }
1027
+
1028
+ false
1029
+ }
1030
+
1031
+ let encoded_bytes = self . as_encoded_bytes ( ) ;
1032
+ let Range { start, end } = slice:: range ( range, ..encoded_bytes. len ( ) ) ;
1033
+ assert ! ( is_valid_boundary( encoded_bytes, start) ) ;
1034
+ assert ! ( is_valid_boundary( encoded_bytes, end) ) ;
1035
+
1036
+ // SAFETY: `slice::range` ensures that `start` and `end` are valid
1037
+ let slice = unsafe { encoded_bytes. get_unchecked ( start..end) } ;
1038
+
1039
+ // SAFETY: `slice` comes from `self` and we validated the boundaries
1040
+ unsafe { Self :: from_encoded_bytes_unchecked ( slice) }
1041
+ }
1042
+
966
1043
/// Converts this string to its ASCII lower case equivalent in-place.
967
1044
///
968
1045
/// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
0 commit comments