@@ -30,20 +30,27 @@ use crate::sys_common::{AsInner, FromInner, IntoInner};
3030///
3131/// `OsString` and [`OsStr`] bridge this gap by simultaneously representing Rust
3232/// and platform-native string values, and in particular allowing a Rust string
33- /// to be converted into an "OS" string with no cost if possible . A consequence
34- /// of this is that `OsString` instances are *not* `NUL` terminated; in order
35- /// to pass to e.g., Unix system call, you should create a [`CStr`].
33+ /// to be converted into an "OS" string with no cost. A consequence of this is
34+ /// that `OsString` instances are *not* `NUL` terminated; in order to pass to
35+ /// e.g., a Unix system call, you should create a [`CStr`].
3636///
37- /// `OsString` is to <code>&[OsStr]</code> as [`String`] is to <code>&[str]</code>: the former
38- /// in each pair are owned strings; the latter are borrowed
39- /// references.
37+ /// `OsString` is to <code>&[OsStr]</code> as [`String`] is to <code>&[str]</code>: `OsString` is
38+ /// an owned string like `String, while `&OsStr` is a borrowed reference like `&str`.
4039///
41- /// Note, `OsString` and [`OsStr`] internally do not necessarily hold strings in
42- /// the form native to the platform; While on Unix, strings are stored as a
43- /// sequence of 8-bit values, on Windows, where strings are 16-bit value based
44- /// as just discussed, strings are also actually stored as a sequence of 8-bit
45- /// values, encoded in a less-strict variant of UTF-8. This is useful to
46- /// understand when handling capacity and length values.
40+ /// Note that `OsString` and [`OsStr`] internally do not necessarily hold strings in the form
41+ /// native to the platform. On all platforms, `OsString` and `OsStr` consist of a sequence of
42+ /// bytes, in a superset of UTF-8; any valid UTF-8 sequence is a valid `OsString` or `OsStr`.
43+ /// * On Unix, these bytes can contain any values, in an arbitrary encoding (not necessarily
44+ /// UTF-8, and not necessarily the same encoding for different OS strings).
45+ /// * On Windows, where the native OS uses a sequence of 16-bit values, `OsString` and `OsStr`
46+ /// still consist of a sequence of 8-bit values, encoded in a superset of UTF-8 called
47+ /// ["WTF-8"](https://simonsapin.github.io/wtf-8/) ("Wobbly Translation Format 8-bit"). The
48+ /// WTF-8 format allows encoding arbitrary 16-bit values, including unpaired UTF-16 surrogates
49+ /// that do not constitute valid Unicode, since Windows accepts sequences of arbitrary 16-bit
50+ /// values. (In practice, Windows filenames and similar are almost always valid UTF-16.)
51+ ///
52+ /// Capacity and length values are always in terms of the sequence of bytes, not characters or
53+ /// 16-bit values.
4754///
4855/// # Creating an `OsString`
4956///
@@ -65,8 +72,16 @@ use crate::sys_common::{AsInner, FromInner, IntoInner};
6572///
6673/// # Conversions
6774///
75+ /// `OsStr` provides the method [`OsStr::as_bytes`], which provides a zero-cost conversion to a
76+ /// byte slice. (`OsString` provides this method as well, along with all other `OsStr` methods, via
77+ /// `Deref`.)
78+ ///
79+ /// `OsString` provides the method [`OsString::into_vec`], which provides a zero-cost conversion to
80+ /// `Vec<u8>`.
81+ ///
6882/// See the [module's toplevel documentation about conversions][conversions] for a discussion on
69- /// the traits which `OsString` implements for [conversions] from/to native representations.
83+ /// OS-specific traits which `OsString` and `OsStr` implement for [conversions] from/to native
84+ /// representations.
7085///
7186/// [`CStr`]: crate::ffi::CStr
7287/// [conversions]: super#conversions
@@ -163,6 +178,24 @@ impl OsString {
163178 self . inner . into_string ( ) . map_err ( |buf| OsString { inner : buf } )
164179 }
165180
181+ /// Converts the `OsString` into a `Vec<u8>`.
182+ ///
183+ /// # Examples
184+ ///
185+ /// ```
186+ /// #![feature(osstr_bytes)]
187+ /// use std::ffi::OsString;
188+ ///
189+ /// let os_string = OsString::from("foo");
190+ /// let v = os_string.into_vec();
191+ /// assert_eq!(v, b"foo");
192+ /// ```
193+ #[ unstable( feature = "osstr_bytes" , issue = "none" ) ]
194+ #[ inline]
195+ pub fn into_vec ( self ) -> Vec < u8 > {
196+ self . inner . into_vec ( )
197+ }
198+
166199 /// Extends the string with the given <code>&[OsStr]</code> slice.
167200 ///
168201 /// # Examples
@@ -667,6 +700,23 @@ impl OsStr {
667700 self . inner . to_str ( )
668701 }
669702
703+ /// Converts the `OsStr` into a `&[u8]`.
704+ ///
705+ /// # Examples
706+ ///
707+ /// ```
708+ /// #![feature(osstr_bytes)]
709+ /// use std::ffi::OsStr;
710+ ///
711+ /// let os_str = OsStr::new("foo");
712+ /// assert_eq!(os_str.as_bytes(), b"foo");
713+ /// ```
714+ #[ unstable( feature = "osstr_bytes" , issue = "none" ) ]
715+ #[ inline]
716+ pub fn as_bytes ( & self ) -> & [ u8 ] {
717+ self . inner . as_u8_slice ( )
718+ }
719+
670720 /// Converts an `OsStr` to a <code>[Cow]<[str]></code>.
671721 ///
672722 /// Any non-Unicode sequences are replaced with
0 commit comments