Skip to content

Commit 8834629

Browse files
committed
Auto merge of rust-lang#94887 - dylni:move-normpath-crate-impl-to-libstd, r=ChrisDenton
Improve Windows path prefix parsing This PR fixes improves parsing of Windows path prefixes. `parse_prefix` now supports both types of separators on Windows (`/` and `\`).
2 parents c2b4c2d + fb9731e commit 8834629

File tree

6 files changed

+142
-57
lines changed

6 files changed

+142
-57
lines changed

library/std/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@
240240
#![feature(exhaustive_patterns)]
241241
#![feature(intra_doc_pointers)]
242242
#![feature(lang_items)]
243+
#![feature(let_chains)]
243244
#![feature(linkage)]
244245
#![feature(min_specialization)]
245246
#![feature(must_not_suspend)]

library/std/src/path.rs

+20-22
Original file line numberDiff line numberDiff line change
@@ -168,8 +168,8 @@ pub enum Prefix<'a> {
168168

169169
/// Device namespace prefix, e.g., `\\.\COM42`.
170170
///
171-
/// Device namespace prefixes consist of `\\.\` immediately followed by the
172-
/// device name.
171+
/// Device namespace prefixes consist of `\\.\` (possibly using `/`
172+
/// instead of `\`), immediately followed by the device name.
173173
#[stable(feature = "rust1", since = "1.0.0")]
174174
DeviceNS(#[stable(feature = "rust1", since = "1.0.0")] &'a OsStr),
175175

@@ -193,7 +193,7 @@ impl<'a> Prefix<'a> {
193193
fn len(&self) -> usize {
194194
use self::Prefix::*;
195195
fn os_str_len(s: &OsStr) -> usize {
196-
os_str_as_u8_slice(s).len()
196+
s.bytes().len()
197197
}
198198
match *self {
199199
Verbatim(x) => 4 + os_str_len(x),
@@ -299,19 +299,17 @@ where
299299
}
300300
}
301301

302-
// See note at the top of this module to understand why these are used:
303-
//
304-
// These casts are safe as OsStr is internally a wrapper around [u8] on all
305-
// platforms.
306-
//
307-
// Note that currently this relies on the special knowledge that libstd has;
308-
// these types are single-element structs but are not marked repr(transparent)
309-
// or repr(C) which would make these casts allowable outside std.
310-
fn os_str_as_u8_slice(s: &OsStr) -> &[u8] {
311-
unsafe { &*(s as *const OsStr as *const [u8]) }
312-
}
313302
unsafe fn u8_slice_as_os_str(s: &[u8]) -> &OsStr {
314-
// SAFETY: see the comment of `os_str_as_u8_slice`
303+
// SAFETY: See note at the top of this module to understand why this and
304+
// `OsStr::bytes` are used:
305+
//
306+
// This casts are safe as OsStr is internally a wrapper around [u8] on all
307+
// platforms.
308+
//
309+
// Note that currently this relies on the special knowledge that libstd has;
310+
// these types are single-element structs but are not marked
311+
// repr(transparent) or repr(C) which would make these casts not allowable
312+
// outside std.
315313
unsafe { &*(s as *const [u8] as *const OsStr) }
316314
}
317315

@@ -332,15 +330,15 @@ fn has_physical_root(s: &[u8], prefix: Option<Prefix<'_>>) -> bool {
332330

333331
// basic workhorse for splitting stem and extension
334332
fn rsplit_file_at_dot(file: &OsStr) -> (Option<&OsStr>, Option<&OsStr>) {
335-
if os_str_as_u8_slice(file) == b".." {
333+
if file.bytes() == b".." {
336334
return (Some(file), None);
337335
}
338336

339337
// The unsafety here stems from converting between &OsStr and &[u8]
340338
// and back. This is safe to do because (1) we only look at ASCII
341339
// contents of the encoding and (2) new &OsStr values are produced
342340
// only from ASCII-bounded slices of existing &OsStr values.
343-
let mut iter = os_str_as_u8_slice(file).rsplitn(2, |b| *b == b'.');
341+
let mut iter = file.bytes().rsplitn(2, |b| *b == b'.');
344342
let after = iter.next();
345343
let before = iter.next();
346344
if before == Some(b"") {
@@ -351,7 +349,7 @@ fn rsplit_file_at_dot(file: &OsStr) -> (Option<&OsStr>, Option<&OsStr>) {
351349
}
352350

353351
fn split_file_at_dot(file: &OsStr) -> (&OsStr, Option<&OsStr>) {
354-
let slice = os_str_as_u8_slice(file);
352+
let slice = file.bytes();
355353
if slice == b".." {
356354
return (file, None);
357355
}
@@ -1445,17 +1443,17 @@ impl PathBuf {
14451443
fn _set_extension(&mut self, extension: &OsStr) -> bool {
14461444
let file_stem = match self.file_stem() {
14471445
None => return false,
1448-
Some(f) => os_str_as_u8_slice(f),
1446+
Some(f) => f.bytes(),
14491447
};
14501448

14511449
// truncate until right after the file stem
14521450
let end_file_stem = file_stem[file_stem.len()..].as_ptr().addr();
1453-
let start = os_str_as_u8_slice(&self.inner).as_ptr().addr();
1451+
let start = self.inner.bytes().as_ptr().addr();
14541452
let v = self.as_mut_vec();
14551453
v.truncate(end_file_stem.wrapping_sub(start));
14561454

14571455
// add the new extension, if any
1458-
let new = os_str_as_u8_slice(extension);
1456+
let new = extension.bytes();
14591457
if !new.is_empty() {
14601458
v.reserve_exact(new.len() + 1);
14611459
v.push(b'.');
@@ -1948,7 +1946,7 @@ impl Path {
19481946
}
19491947
// The following (private!) function reveals the byte encoding used for OsStr.
19501948
fn as_u8_slice(&self) -> &[u8] {
1951-
os_str_as_u8_slice(&self.inner)
1949+
self.inner.bytes()
19521950
}
19531951

19541952
/// Directly wraps a string slice as a `Path` slice.

library/std/src/path/tests.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -971,15 +971,15 @@ pub fn test_decompositions_windows() {
971971
file_prefix: None
972972
);
973973

974-
t!("\\\\?\\C:/foo",
975-
iter: ["\\\\?\\C:/foo"],
974+
t!("\\\\?\\C:/foo/bar",
975+
iter: ["\\\\?\\C:", "\\", "foo/bar"],
976976
has_root: true,
977977
is_absolute: true,
978-
parent: None,
979-
file_name: None,
980-
file_stem: None,
978+
parent: Some("\\\\?\\C:/"),
979+
file_name: Some("foo/bar"),
980+
file_stem: Some("foo/bar"),
981981
extension: None,
982-
file_prefix: None
982+
file_prefix: Some("foo/bar")
983983
);
984984

985985
t!("\\\\.\\foo\\bar",

library/std/src/sys/windows/mod.rs

+4
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,10 @@ where
190190
{
191191
// Start off with a stack buf but then spill over to the heap if we end up
192192
// needing more space.
193+
//
194+
// This initial size also works around `GetFullPathNameW` returning
195+
// incorrect size hints for some short paths:
196+
// https://github.com/dylni/normpath/issues/5
193197
let mut stack_buf = [0u16; 512];
194198
let mut heap_buf = Vec::new();
195199
unsafe {

library/std/src/sys/windows/path.rs

+91-29
Original file line numberDiff line numberDiff line change
@@ -50,37 +50,101 @@ pub(crate) fn append_suffix(path: PathBuf, suffix: &OsStr) -> PathBuf {
5050
path.into()
5151
}
5252

53+
struct PrefixParser<'a, const LEN: usize> {
54+
path: &'a OsStr,
55+
prefix: [u8; LEN],
56+
}
57+
58+
impl<'a, const LEN: usize> PrefixParser<'a, LEN> {
59+
#[inline]
60+
fn get_prefix(path: &OsStr) -> [u8; LEN] {
61+
let mut prefix = [0; LEN];
62+
// SAFETY: Only ASCII characters are modified.
63+
for (i, &ch) in path.bytes().iter().take(LEN).enumerate() {
64+
prefix[i] = if ch == b'/' { b'\\' } else { ch };
65+
}
66+
prefix
67+
}
68+
69+
fn new(path: &'a OsStr) -> Self {
70+
Self { path, prefix: Self::get_prefix(path) }
71+
}
72+
73+
fn as_slice(&self) -> PrefixParserSlice<'a, '_> {
74+
PrefixParserSlice {
75+
path: self.path,
76+
prefix: &self.prefix[..LEN.min(self.path.len())],
77+
index: 0,
78+
}
79+
}
80+
}
81+
82+
struct PrefixParserSlice<'a, 'b> {
83+
path: &'a OsStr,
84+
prefix: &'b [u8],
85+
index: usize,
86+
}
87+
88+
impl<'a> PrefixParserSlice<'a, '_> {
89+
fn strip_prefix(&self, prefix: &str) -> Option<Self> {
90+
self.prefix[self.index..]
91+
.starts_with(prefix.as_bytes())
92+
.then(|| Self { index: self.index + prefix.len(), ..*self })
93+
}
94+
95+
fn prefix_bytes(&self) -> &'a [u8] {
96+
&self.path.bytes()[..self.index]
97+
}
98+
99+
fn finish(self) -> &'a OsStr {
100+
// SAFETY: The unsafety here stems from converting between &OsStr and
101+
// &[u8] and back. This is safe to do because (1) we only look at ASCII
102+
// contents of the encoding and (2) new &OsStr values are produced only
103+
// from ASCII-bounded slices of existing &OsStr values.
104+
unsafe { bytes_as_os_str(&self.path.bytes()[self.index..]) }
105+
}
106+
}
107+
53108
pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> {
54109
use Prefix::{DeviceNS, Disk, Verbatim, VerbatimDisk, VerbatimUNC, UNC};
55110

56-
if let Some(path) = strip_prefix(path, r"\\") {
111+
let parser = PrefixParser::<8>::new(path);
112+
let parser = parser.as_slice();
113+
if let Some(parser) = parser.strip_prefix(r"\\") {
57114
// \\
58-
if let Some(path) = strip_prefix(path, r"?\") {
115+
116+
// The meaning of verbatim paths can change when they use a different
117+
// separator.
118+
if let Some(parser) = parser.strip_prefix(r"?\") && !parser.prefix_bytes().iter().any(|&x| x == b'/') {
59119
// \\?\
60-
if let Some(path) = strip_prefix(path, r"UNC\") {
120+
if let Some(parser) = parser.strip_prefix(r"UNC\") {
61121
// \\?\UNC\server\share
62122

123+
let path = parser.finish();
63124
let (server, path) = parse_next_component(path, true);
64125
let (share, _) = parse_next_component(path, true);
65126

66127
Some(VerbatimUNC(server, share))
67128
} else {
68-
let (prefix, _) = parse_next_component(path, true);
129+
let path = parser.finish();
69130

70131
// in verbatim paths only recognize an exact drive prefix
71-
if let Some(drive) = parse_drive_exact(prefix) {
132+
if let Some(drive) = parse_drive_exact(path) {
72133
// \\?\C:
73134
Some(VerbatimDisk(drive))
74135
} else {
75136
// \\?\prefix
137+
let (prefix, _) = parse_next_component(path, true);
76138
Some(Verbatim(prefix))
77139
}
78140
}
79-
} else if let Some(path) = strip_prefix(path, r".\") {
141+
} else if let Some(parser) = parser.strip_prefix(r".\") {
80142
// \\.\COM42
143+
let path = parser.finish();
81144
let (prefix, _) = parse_next_component(path, false);
82145
Some(DeviceNS(prefix))
83146
} else {
147+
let path = parser.finish();
84148
let (server, path) = parse_next_component(path, false);
85149
let (share, _) = parse_next_component(path, false);
86150

@@ -102,31 +166,26 @@ pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> {
102166
}
103167

104168
// Parses a drive prefix, e.g. "C:" and "C:\whatever"
105-
fn parse_drive(prefix: &OsStr) -> Option<u8> {
169+
fn parse_drive(path: &OsStr) -> Option<u8> {
106170
// In most DOS systems, it is not possible to have more than 26 drive letters.
107171
// See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>.
108172
fn is_valid_drive_letter(drive: &u8) -> bool {
109173
drive.is_ascii_alphabetic()
110174
}
111175

112-
match prefix.bytes() {
176+
match path.bytes() {
113177
[drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()),
114178
_ => None,
115179
}
116180
}
117181

118182
// Parses a drive prefix exactly, e.g. "C:"
119-
fn parse_drive_exact(prefix: &OsStr) -> Option<u8> {
183+
fn parse_drive_exact(path: &OsStr) -> Option<u8> {
120184
// only parse two bytes: the drive letter and the drive separator
121-
if prefix.len() == 2 { parse_drive(prefix) } else { None }
122-
}
123-
124-
fn strip_prefix<'a>(path: &'a OsStr, prefix: &str) -> Option<&'a OsStr> {
125-
// `path` and `prefix` are valid wtf8 and utf8 encoded slices respectively, `path[prefix.len()]`
126-
// is thus a code point boundary and `path[prefix.len()..]` is a valid wtf8 encoded slice.
127-
match path.bytes().strip_prefix(prefix.as_bytes()) {
128-
Some(path) => unsafe { Some(bytes_as_os_str(path)) },
129-
None => None,
185+
if path.bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) {
186+
parse_drive(path)
187+
} else {
188+
None
130189
}
131190
}
132191

@@ -219,15 +278,7 @@ pub(crate) fn maybe_verbatim(path: &Path) -> io::Result<Vec<u16>> {
219278
// SAFETY: `fill_utf16_buf` ensures the `buffer` and `size` are valid.
220279
// `lpfilename` is a pointer to a null terminated string that is not
221280
// invalidated until after `GetFullPathNameW` returns successfully.
222-
|buffer, size| unsafe {
223-
// While the docs for `GetFullPathNameW` have the standard note
224-
// about needing a `\\?\` path for a long lpfilename, this does not
225-
// appear to be true in practice.
226-
// See:
227-
// https://stackoverflow.com/questions/38036943/getfullpathnamew-and-long-windows-file-paths
228-
// https://googleprojectzero.blogspot.com/2016/02/the-definitive-guide-on-win32-to-nt.html
229-
c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut())
230-
},
281+
|buffer, size| unsafe { c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut()) },
231282
|mut absolute| {
232283
path.clear();
233284

@@ -263,9 +314,20 @@ pub(crate) fn maybe_verbatim(path: &Path) -> io::Result<Vec<u16>> {
263314

264315
/// Make a Windows path absolute.
265316
pub(crate) fn absolute(path: &Path) -> io::Result<PathBuf> {
266-
if path.as_os_str().bytes().starts_with(br"\\?\") {
267-
return Ok(path.into());
317+
let path = path.as_os_str();
318+
let prefix = parse_prefix(path);
319+
// Verbatim paths should not be modified.
320+
if prefix.map(|x| x.is_verbatim()).unwrap_or(false) {
321+
// NULs in verbatim paths are rejected for consistency.
322+
if path.bytes().contains(&0) {
323+
return Err(io::const_io_error!(
324+
io::ErrorKind::InvalidInput,
325+
"strings passed to WinAPI cannot contain NULs",
326+
));
327+
}
328+
return Ok(path.to_owned().into());
268329
}
330+
269331
let path = to_u16s(path)?;
270332
let lpfilename = path.as_ptr();
271333
fill_utf16_buf(

library/std/src/sys/windows/path/tests.rs

+20
Original file line numberDiff line numberDiff line change
@@ -94,3 +94,23 @@ fn verbatim() {
9494
// A path that contains null is not a valid path.
9595
assert!(maybe_verbatim(Path::new("\0")).is_err());
9696
}
97+
98+
fn parse_prefix(path: &str) -> Option<Prefix<'_>> {
99+
super::parse_prefix(OsStr::new(path))
100+
}
101+
102+
#[test]
103+
fn test_parse_prefix_verbatim() {
104+
let prefix = Some(Prefix::VerbatimDisk(b'C'));
105+
assert_eq!(prefix, parse_prefix(r"\\?\C:/windows/system32/notepad.exe"));
106+
assert_eq!(prefix, parse_prefix(r"\\?\C:\windows\system32\notepad.exe"));
107+
}
108+
109+
#[test]
110+
fn test_parse_prefix_verbatim_device() {
111+
let prefix = Some(Prefix::UNC(OsStr::new("?"), OsStr::new("C:")));
112+
assert_eq!(prefix, parse_prefix(r"//?/C:/windows/system32/notepad.exe"));
113+
assert_eq!(prefix, parse_prefix(r"//?/C:\windows\system32\notepad.exe"));
114+
assert_eq!(prefix, parse_prefix(r"/\?\C:\windows\system32\notepad.exe"));
115+
assert_eq!(prefix, parse_prefix(r"\\?/C:\windows\system32\notepad.exe"));
116+
}

0 commit comments

Comments
 (0)