Commit 3cb0cba

committed

Handle win32 separator & prefixes for cygwin paths

1 parent 015c777 commit 3cb0cbaCopy full SHA for 3cb0cba

File tree

7 files changed

+772

-173

lines changed

library/std
- src
  - path.rs
  - sys/path
    - cygwin.rs
    - mod.rs
    - windows.rs
    - windows_prefix.rs
    - windows
      - tests.rs
- tests
  - path.rs

7 files changed

+772

-173

lines changed

`‎library/std/src/path.rs‎`

Lines changed: 16 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -1316,8 +1316,17 @@ impl PathBuf {`
`1316`	`1316`	`need_sep = false`
`1317`	`1317`	`}`
`1318`	`1318`
	`1319`	`+ let need_clear = if cfg!(target_os = "cygwin") {`
	`1320`	+ // If path is absolute and its prefix is none, it is like `/foo`,
	`1321`	`+ // and will be handled below.`
	`1322`	`+ path.prefix().is_some()`
	`1323`	`+ } else {`
	`1324`	`+ // On Unix: prefix is always None.`
	`1325`	`+ path.is_absolute() \|\| path.prefix().is_some()`
	`1326`	`+ };`
	`1327`	`+`
`1319`	`1328`	// absolute `path` replaces `self`
`1320`		`- if path.is_absolute() \|\| path.prefix().is_some() {`
	`1329`	`+ if need_clear {`
`1321`	`1330`	`self.inner.truncate(0);`
`1322`	`1331`
`1323`	`1332`	`// verbatim paths need . and .. removed`
`@@ -3616,6 +3625,11 @@ impl Error for NormalizeError {}`
`3616`	`3625`	`/// paths, this is currently equivalent to calling`
`3617`	`3626`	/// [`GetFullPathNameW`][windows-path].
`3618`	`3627`	`///`
	`3628`	+/// On Cygwin, this is currently equivalent to calling [`cygwin_conv_path`][cygwin-path]
	`3629`	+/// with mode `CCP_WIN_A_TO_POSIX`, and then being processed like other POSIX platforms.
	`3630`	`+/// If a Windows path is given, it will be converted to an absolute POSIX path without`
	`3631`	+/// keeping `..`.
	`3632`	`+///`
`3619`	`3633`	`/// Note that these [may change in the future][changes].`
`3620`	`3634`	`///`
`3621`	`3635`	`/// # Errors`
`@@ -3673,6 +3687,7 @@ impl Error for NormalizeError {}`
`3673`	`3687`	`/// [changes]: io#platform-specific-behavior`
`3674`	`3688`	`/// [posix-semantics]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13`
`3675`	`3689`	`/// [windows-path]: https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfullpathnamew`
	`3690`	`+/// [cygwin-path]: https://cygwin.com/cygwin-api/func-cygwin-conv-path.html`
`3676`	`3691`	`#[stable(feature = "absolute_path", since = "1.79.0")]`
`3677`	`3692`	`pub fn absolute<P: AsRef<Path>>(path: P) -> io::Result<PathBuf> {`
`3678`	`3693`	`let path = path.as_ref();`

`‎library/std/src/sys/path/cygwin.rs‎`

Lines changed: 92 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,92 @@`
	`1`	`+use crate::ffi::OsString;`
	`2`	`+use crate::os::unix::ffi::OsStringExt;`
	`3`	`+use crate::path::{Path, PathBuf};`
	`4`	`+use crate::sys::common::small_c_string::run_path_with_cstr;`
	`5`	`+use crate::sys::cvt;`
	`6`	`+use crate::{io, ptr};`
	`7`	`+`
	`8`	`+#[inline]`
	`9`	`+pub fn is_sep_byte(b: u8) -> bool {`
	`10`	`+ b == b'/' \|\| b == b'\\'`
	`11`	`+}`
	`12`	`+`
	`13`	+/// Cygwin allways prefers `/` over `\`, and it always converts all `/` to `\`
	`14`	`+/// internally when calling Win32 APIs. Therefore, the server component of path`
	`15`	+/// `\\?\UNC\localhost/share` is `localhost/share` on Win32, but `localhost`
	`16`	`+/// on Cygwin.`
	`17`	`+#[inline]`
	`18`	`+pub fn is_verbatim_sep(b: u8) -> bool {`
	`19`	`+ b == b'/' \|\| b == b'\\'`
	`20`	`+}`
	`21`	`+`
	`22`	`+pub use super::windows_prefix::parse_prefix;`
	`23`	`+`
	`24`	`+pub const MAIN_SEP_STR: &str = "/";`
	`25`	`+pub const MAIN_SEP: char = '/';`
	`26`	`+`
	`27`	`+unsafe extern "C" {`
	`28`	`+ // Doc: https://cygwin.com/cygwin-api/func-cygwin-conv-path.html`
	`29`	`+ // Src: https://github.com/cygwin/cygwin/blob/718a15ba50e0d01c79800bd658c2477f9a603540/winsup/cygwin/path.cc#L3902`
	`30`	`+ // Safety:`
	`31`	+ // * `what` should be `CCP_WIN_A_TO_POSIX` here
	`32`	+ // * `from` is null-terminated UTF-8 path
	`33`	+ // * `to` is buffer, the buffer size is `size`.
	`34`	`+ //`
	`35`	`+ // Converts a path to an absolute POSIX path, no matter the input is Win32 path or POSIX path.`
	`36`	`+ fn cygwin_conv_path(`
	`37`	`+ what: libc::c_uint,`
	`38`	`+ from: *const libc::c_char,`
	`39`	`+ to: *mut u8,`
	`40`	`+ size: libc::size_t,`
	`41`	`+ ) -> libc::ssize_t;`
	`42`	`+}`
	`43`	`+`
	`44`	`+const CCP_WIN_A_TO_POSIX: libc::c_uint = 2;`
	`45`	`+`
	`46`	`+/// Make a POSIX path absolute.`
	`47`	`+pub(crate) fn absolute(path: &Path) -> io::Result<PathBuf> {`
	`48`	`+ run_path_with_cstr(path, &\|path\| {`
	`49`	`+ let conv = CCP_WIN_A_TO_POSIX;`
	`50`	`+ let size = cvt(unsafe { cygwin_conv_path(conv, path.as_ptr(), ptr::null_mut(), 0) })?;`
	`51`	`+ // If success, size should not be 0.`
	`52`	`+ debug_assert!(size >= 1);`
	`53`	`+ let size = size as usize;`
	`54`	`+ let mut buffer = Vec::with_capacity(size);`
	`55`	`+ cvt(unsafe { cygwin_conv_path(conv, path.as_ptr(), buffer.as_mut_ptr(), size) })?;`
	`56`	`+ unsafe {`
	`57`	`+ buffer.set_len(size - 1);`
	`58`	`+ }`
	`59`	`+ Ok(PathBuf::from(OsString::from_vec(buffer)))`
	`60`	`+ })`
	`61`	`+ .map(\|path\| {`
	`62`	`+ if path.prefix().is_some() {`
	`63`	`+ return path;`
	`64`	`+ }`
	`65`	`+`
	`66`	`+ // From unix.rs`
	`67`	`+ let mut components = path.components();`
	`68`	`+ let path_os = path.as_os_str().as_encoded_bytes();`
	`69`	`+`
	`70`	`+ let mut normalized = if path_os.starts_with(b"//") && !path_os.starts_with(b"///") {`
	`71`	`+ components.next();`
	`72`	`+ PathBuf::from("//")`
	`73`	`+ } else {`
	`74`	`+ PathBuf::new()`
	`75`	`+ };`
	`76`	`+ normalized.extend(components);`
	`77`	`+`
	`78`	`+ if path_os.ends_with(b"/") {`
	`79`	`+ normalized.push("");`
	`80`	`+ }`
	`81`	`+`
	`82`	`+ normalized`
	`83`	`+ })`
	`84`	`+}`
	`85`	`+`
	`86`	`+pub(crate) fn is_absolute(path: &Path) -> bool {`
	`87`	`+ if path.as_os_str().as_encoded_bytes().starts_with(b"\\") {`
	`88`	`+ path.has_root() && path.prefix().is_some()`
	`89`	`+ } else {`
	`90`	`+ path.has_root()`
	`91`	`+ }`
	`92`	`+}`

`‎library/std/src/sys/path/mod.rs‎`

Lines changed: 5 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,7 @@`
`1`	`1`	`cfg_if::cfg_if! {`
`2`	`2`	`if #[cfg(target_os = "windows")] {`
`3`	`3`	`mod windows;`
	`4`	`+ mod windows_prefix;`
`4`	`5`	`pub use windows::*;`
`5`	`6`	`} else if #[cfg(all(target_vendor = "fortanix", target_env = "sgx"))] {`
`6`	`7`	`mod sgx;`
`@@ -11,6 +12,10 @@ cfg_if::cfg_if! {`
`11`	`12`	`} else if #[cfg(target_os = "uefi")] {`
`12`	`13`	`mod uefi;`
`13`	`14`	`pub use uefi::*;`
	`15`	`+ } else if #[cfg(target_os = "cygwin")] {`
	`16`	`+ mod cygwin;`
	`17`	`+ mod windows_prefix;`
	`18`	`+ pub use cygwin::*;`
`14`	`19`	`} else {`
`15`	`20`	`mod unix;`
`16`	`21`	`pub use unix::*;`

`‎library/std/src/sys/path/windows.rs‎`

Lines changed: 3 additions & 172 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,12 +1,14 @@`
`1`	`1`	`use crate::ffi::{OsStr, OsString};`
`2`		`-use crate::path::{Path, PathBuf,Prefix};`
	`2`	`+use crate::path::{Path, PathBuf};`
`3`	`3`	`use crate::sys::api::utf16;`
`4`	`4`	`use crate::sys::pal::{c, fill_utf16_buf, os2path, to_u16s};`
`5`	`5`	`use crate::{io, ptr};`
`6`	`6`
`7`	`7`	`#[cfg(test)]`
`8`	`8`	`mod tests;`
`9`	`9`
	`10`	`+pub use super::windows_prefix::parse_prefix;`
	`11`	`+`
`10`	`12`	`pub const MAIN_SEP_STR: &str = "\\";`
`11`	`13`	`pub const MAIN_SEP: char = '\\';`
`12`	`14`
`@@ -77,177 +79,6 @@ pub(crate) fn append_suffix(path: PathBuf, suffix: &OsStr) -> PathBuf {`
`77`	`79`	`path.into()`
`78`	`80`	`}`
`79`	`81`
`80`		`-struct PrefixParser<'a, const LEN: usize> {`
`81`		`- path: &'a OsStr,`
`82`		`- prefix: [u8; LEN],`
`83`		`-}`
`84`		`-`
`85`		`-impl<'a, const LEN: usize> PrefixParser<'a, LEN> {`
`86`		`- #[inline]`
`87`		`- fn get_prefix(path: &OsStr) -> [u8; LEN] {`
`88`		`- let mut prefix = [0; LEN];`
`89`		`- // SAFETY: Only ASCII characters are modified.`
`90`		`- for (i, &ch) in path.as_encoded_bytes().iter().take(LEN).enumerate() {`
`91`		`- prefix[i] = if ch == b'/' { b'\\' } else { ch };`
`92`		`- }`
`93`		`- prefix`
`94`		`- }`
`95`		`-`
`96`		`- fn new(path: &'a OsStr) -> Self {`
`97`		`- Self { path, prefix: Self::get_prefix(path) }`
`98`		`- }`
`99`		`-`
`100`		`- fn as_slice(&self) -> PrefixParserSlice<'a, '_> {`
`101`		`- PrefixParserSlice {`
`102`		`- path: self.path,`
`103`		`- prefix: &self.prefix[..LEN.min(self.path.len())],`
`104`		`- index: 0,`
`105`		`- }`
`106`		`- }`
`107`		`-}`
`108`		`-`
`109`		`-struct PrefixParserSlice<'a, 'b> {`
`110`		`- path: &'a OsStr,`
`111`		`- prefix: &'b [u8],`
`112`		`- index: usize,`
`113`		`-}`
`114`		`-`
`115`		`-impl<'a> PrefixParserSlice<'a, '_> {`
`116`		`- fn strip_prefix(&self, prefix: &str) -> Option<Self> {`
`117`		`- self.prefix[self.index..]`
`118`		`- .starts_with(prefix.as_bytes())`
`119`		`- .then_some(Self { index: self.index + prefix.len(), ..*self })`
`120`		`- }`
`121`		`-`
`122`		`- fn prefix_bytes(&self) -> &'a [u8] {`
`123`		`- &self.path.as_encoded_bytes()[..self.index]`
`124`		`- }`
`125`		`-`
`126`		`- fn finish(self) -> &'a OsStr {`
`127`		`- // SAFETY: The unsafety here stems from converting between &OsStr and`
`128`		`- // &[u8] and back. This is safe to do because (1) we only look at ASCII`
`129`		`- // contents of the encoding and (2) new &OsStr values are produced only`
`130`		`- // from ASCII-bounded slices of existing &OsStr values.`
`131`		`- unsafe { OsStr::from_encoded_bytes_unchecked(&self.path.as_encoded_bytes()[self.index..]) }`
`132`		`- }`
`133`		`-}`
`134`		`-`
`135`		`-pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> {`
`136`		`- use Prefix::{DeviceNS, Disk, UNC, Verbatim, VerbatimDisk, VerbatimUNC};`
`137`		`-`
`138`		`- let parser = PrefixParser::<8>::new(path);`
`139`		`- let parser = parser.as_slice();`
`140`		`- if let Some(parser) = parser.strip_prefix(r"\\") {`
`141`		`- // \\`
`142`		`-`
`143`		`- // The meaning of verbatim paths can change when they use a different`
`144`		`- // separator.`
`145`		`- if let Some(parser) = parser.strip_prefix(r"?\")`
`146`		`- && !parser.prefix_bytes().iter().any(\|&x\| x == b'/')`
`147`		`- {`
`148`		`- // \\?\`
`149`		`- if let Some(parser) = parser.strip_prefix(r"UNC\") {`
`150`		`- // \\?\UNC\server\share`
`151`		`-`
`152`		`- let path = parser.finish();`
`153`		`- let (server, path) = parse_next_component(path, true);`
`154`		`- let (share, _) = parse_next_component(path, true);`
`155`		`-`
`156`		`- Some(VerbatimUNC(server, share))`
`157`		`- } else {`
`158`		`- let path = parser.finish();`
`159`		`-`
`160`		`- // in verbatim paths only recognize an exact drive prefix`
`161`		`- if let Some(drive) = parse_drive_exact(path) {`
`162`		`- // \\?\C:`
`163`		`- Some(VerbatimDisk(drive))`
`164`		`- } else {`
`165`		`- // \\?\prefix`
`166`		`- let (prefix, _) = parse_next_component(path, true);`
`167`		`- Some(Verbatim(prefix))`
`168`		`- }`
`169`		`- }`
`170`		`- } else if let Some(parser) = parser.strip_prefix(r".\") {`
`171`		`- // \\.\COM42`
`172`		`- let path = parser.finish();`
`173`		`- let (prefix, _) = parse_next_component(path, false);`
`174`		`- Some(DeviceNS(prefix))`
`175`		`- } else {`
`176`		`- let path = parser.finish();`
`177`		`- let (server, path) = parse_next_component(path, false);`
`178`		`- let (share, _) = parse_next_component(path, false);`
`179`		`-`
`180`		`- if !server.is_empty() && !share.is_empty() {`
`181`		`- // \\server\share`
`182`		`- Some(UNC(server, share))`
`183`		`- } else {`
`184`		`- // no valid prefix beginning with "\\" recognized`
`185`		`- None`
`186`		`- }`
`187`		`- }`
`188`		`- } else {`
`189`		- // If it has a drive like `C:` then it's a disk.
`190`		`- // Otherwise there is no prefix.`
`191`		`- parse_drive(path).map(Disk)`
`192`		`- }`
`193`		`-}`
`194`		`-`
`195`		`-// Parses a drive prefix, e.g. "C:" and "C:\whatever"`
`196`		`-fn parse_drive(path: &OsStr) -> Option<u8> {`
`197`		`- // In most DOS systems, it is not possible to have more than 26 drive letters.`
`198`		`- // See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>.`
`199`		`- fn is_valid_drive_letter(drive: &u8) -> bool {`
`200`		`- drive.is_ascii_alphabetic()`
`201`		`- }`
`202`		`-`
`203`		`- match path.as_encoded_bytes() {`
`204`		`- [drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()),`
`205`		`- _ => None,`
`206`		`- }`
`207`		`-}`
`208`		`-`
`209`		`-// Parses a drive prefix exactly, e.g. "C:"`
`210`		`-fn parse_drive_exact(path: &OsStr) -> Option<u8> {`
`211`		`- // only parse two bytes: the drive letter and the drive separator`
`212`		`- if path.as_encoded_bytes().get(2).map(\|&x\| is_sep_byte(x)).unwrap_or(true) {`
`213`		`- parse_drive(path)`
`214`		`- } else {`
`215`		`- None`
`216`		`- }`
`217`		`-}`
`218`		`-`
`219`		`-// Parse the next path component.`
`220`		`-//`
`221`		`-// Returns the next component and the rest of the path excluding the component and separator.`
`222`		-// Does not recognize `/` as a separator character if `verbatim` is true.
`223`		`-fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) {`
`224`		`- let separator = if verbatim { is_verbatim_sep } else { is_sep_byte };`
`225`		`-`
`226`		`- match path.as_encoded_bytes().iter().position(\|&x\| separator(x)) {`
`227`		`- Some(separator_start) => {`
`228`		`- let separator_end = separator_start + 1;`
`229`		`-`
`230`		`- let component = &path.as_encoded_bytes()[..separator_start];`
`231`		`-`
`232`		`- // Panic safe`
`233`		- // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index.
`234`		`- let path = &path.as_encoded_bytes()[separator_end..];`
`235`		`-`
`236`		- // SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\')
`237`		- // is encoded in a single byte, therefore `bytes[separator_start]` and
`238`		- // `bytes[separator_end]` must be code point boundaries and thus
`239`		- // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices.
`240`		`- unsafe {`
`241`		`- (`
`242`		`- OsStr::from_encoded_bytes_unchecked(component),`
`243`		`- OsStr::from_encoded_bytes_unchecked(path),`
`244`		`- )`
`245`		`- }`
`246`		`- }`
`247`		`- None => (path, OsStr::new("")),`
`248`		`- }`
`249`		`-}`
`250`		`-`
`251`	`82`	/// Returns a UTF-16 encoded path capable of bypassing the legacy `MAX_PATH` limits.
`252`	`83`	`///`
`253`	`84`	`/// This path may or may not have a verbatim prefix.`

`‎library/std/src/sys/path/windows/tests.rs‎`

Lines changed: 2 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,6 @@`
	`1`	`+use super::super::windows_prefix::*;`
`1`	`2`	`use super::*;`
	`3`	`+use crate::path::Prefix;`
`2`	`4`
`3`	`5`	`#[test]`
`4`	`6`	`fn test_parse_next_component() {`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit 3cb0cba

File tree

7 files changed

7 files changed

`‎library/std/src/path.rs‎`

`‎library/std/src/sys/path/cygwin.rs‎`

`‎library/std/src/sys/path/mod.rs‎`

`‎library/std/src/sys/path/windows.rs‎`

`‎library/std/src/sys/path/windows/tests.rs‎`

0 commit comments