std\sys\path/
windows_prefix.rs

1//! Parse Windows prefixes, for both Windows and Cygwin.
2
3use super::{is_sep_byte, is_verbatim_sep};
4use crate::ffi::OsStr;
5use crate::path::Prefix;
6
7struct PrefixParser<'a, const LEN: usize> {
8    path: &'a OsStr,
9    prefix: [u8; LEN],
10}
11
12impl<'a, const LEN: usize> PrefixParser<'a, LEN> {
13    #[inline]
14    fn get_prefix(path: &OsStr) -> [u8; LEN] {
15        let mut prefix = [0; LEN];
16        // SAFETY: Only ASCII characters are modified.
17        for (i, &ch) in path.as_encoded_bytes().iter().take(LEN).enumerate() {
18            prefix[i] = if ch == b'/' { b'\\' } else { ch };
19        }
20        prefix
21    }
22
23    fn new(path: &'a OsStr) -> Self {
24        Self { path, prefix: Self::get_prefix(path) }
25    }
26
27    fn as_slice(&self) -> PrefixParserSlice<'a, '_> {
28        PrefixParserSlice {
29            path: self.path,
30            prefix: &self.prefix[..LEN.min(self.path.len())],
31            index: 0,
32        }
33    }
34}
35
36struct PrefixParserSlice<'a, 'b> {
37    path: &'a OsStr,
38    prefix: &'b [u8],
39    index: usize,
40}
41
42impl<'a> PrefixParserSlice<'a, '_> {
43    fn strip_prefix(&self, prefix: &str) -> Option<Self> {
44        self.prefix[self.index..]
45            .starts_with(prefix.as_bytes())
46            .then_some(Self { index: self.index + prefix.len(), ..*self })
47    }
48
49    fn prefix_bytes(&self) -> &'a [u8] {
50        &self.path.as_encoded_bytes()[..self.index]
51    }
52
53    fn finish(self) -> &'a OsStr {
54        // SAFETY: The unsafety here stems from converting between &OsStr and
55        // &[u8] and back. This is safe to do because (1) we only look at ASCII
56        // contents of the encoding and (2) new &OsStr values are produced only
57        // from ASCII-bounded slices of existing &OsStr values.
58        unsafe { OsStr::from_encoded_bytes_unchecked(&self.path.as_encoded_bytes()[self.index..]) }
59    }
60}
61
62pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> {
63    use Prefix::{DeviceNS, Disk, UNC, Verbatim, VerbatimDisk, VerbatimUNC};
64
65    let parser = PrefixParser::<8>::new(path);
66    let parser = parser.as_slice();
67    if let Some(parser) = parser.strip_prefix(r"\\") {
68        // \\
69
70        // It's a POSIX path.
71        if cfg!(target_os = "cygwin") && !path.as_encoded_bytes().iter().any(|&x| x == b'\\') {
72            return None;
73        }
74
75        // The meaning of verbatim paths can change when they use a different
76        // separator.
77        if let Some(parser) = parser.strip_prefix(r"?\")
78            // Cygwin allows `/` in verbatim paths.
79            && (cfg!(target_os = "cygwin") || !parser.prefix_bytes().iter().any(|&x| x == b'/'))
80        {
81            // \\?\
82            if let Some(parser) = parser.strip_prefix(r"UNC\") {
83                // \\?\UNC\server\share
84
85                let path = parser.finish();
86                let (server, path) = parse_next_component(path, true);
87                let (share, _) = parse_next_component(path, true);
88
89                Some(VerbatimUNC(server, share))
90            } else {
91                let path = parser.finish();
92
93                // in verbatim paths only recognize an exact drive prefix
94                if let Some(drive) = parse_drive_exact(path) {
95                    // \\?\C:
96                    Some(VerbatimDisk(drive))
97                } else {
98                    // \\?\prefix
99                    let (prefix, _) = parse_next_component(path, true);
100                    Some(Verbatim(prefix))
101                }
102            }
103        } else if let Some(parser) = parser.strip_prefix(r".\") {
104            // \\.\COM42
105            let path = parser.finish();
106            let (prefix, _) = parse_next_component(path, false);
107            Some(DeviceNS(prefix))
108        } else {
109            let path = parser.finish();
110            let (server, path) = parse_next_component(path, false);
111            let (share, _) = parse_next_component(path, false);
112
113            if !server.is_empty() && !share.is_empty() {
114                // \\server\share
115                Some(UNC(server, share))
116            } else {
117                // no valid prefix beginning with "\\" recognized
118                None
119            }
120        }
121    } else {
122        // If it has a drive like `C:` then it's a disk.
123        // Otherwise there is no prefix.
124        Some(Disk(parse_drive(path)?))
125    }
126}
127
128// Parses a drive prefix, e.g. "C:" and "C:\whatever"
129fn parse_drive(path: &OsStr) -> Option<u8> {
130    // In most DOS systems, it is not possible to have more than 26 drive letters.
131    // See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>.
132    fn is_valid_drive_letter(drive: &u8) -> bool {
133        drive.is_ascii_alphabetic()
134    }
135
136    match path.as_encoded_bytes() {
137        [drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()),
138        _ => None,
139    }
140}
141
142// Parses a drive prefix exactly, e.g. "C:"
143fn parse_drive_exact(path: &OsStr) -> Option<u8> {
144    // only parse two bytes: the drive letter and the drive separator
145    if path.as_encoded_bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) {
146        parse_drive(path)
147    } else {
148        None
149    }
150}
151
152// Parse the next path component.
153//
154// Returns the next component and the rest of the path excluding the component and separator.
155// Does not recognize `/` as a separator character on Windows if `verbatim` is true.
156pub(crate) fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) {
157    let separator = if verbatim { is_verbatim_sep } else { is_sep_byte };
158
159    match path.as_encoded_bytes().iter().position(|&x| separator(x)) {
160        Some(separator_start) => {
161            let separator_end = separator_start + 1;
162
163            let component = &path.as_encoded_bytes()[..separator_start];
164
165            // Panic safe
166            // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index.
167            let path = &path.as_encoded_bytes()[separator_end..];
168
169            // SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\')
170            // is encoded in a single byte, therefore `bytes[separator_start]` and
171            // `bytes[separator_end]` must be code point boundaries and thus
172            // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices.
173            unsafe {
174                (
175                    OsStr::from_encoded_bytes_unchecked(component),
176                    OsStr::from_encoded_bytes_unchecked(path),
177                )
178            }
179        }
180        None => (path, OsStr::new("")),
181    }
182}