1515//! The byte-level encoding of component lists uses the structure of UTF-8 in
1616//! order to save space:
1717//!
18- //! - A valid UTF-8 codepoint never starts with the bits `10` as this bit
19- //! prefix is reserved for bytes in the middle of a UTF-8 codepoint byte
20- //! sequence. We make use of this fact by letting all string ID components
21- //! start with this `10` prefix. Thus when we parse the contents of a value
22- //! we know to stop if the start byte of the next codepoint has this prefix.
18+ //! - A valid UTF-8 codepoint never starts with the byte `0xFE`. We make use
19+ //! of this fact by letting all string ID components start with this `0xFE`
20+ //! prefix. Thus when we parse the contents of a value we know to stop if
21+ //! we encounter this byte.
2322//!
24- //! - A valid UTF-8 string cannot contain the `0xFF` byte and since string IDs
25- //! start with `10` as described above, they also cannot start with a `0xFF`
26- //! byte. Thus we can safely use `0xFF` as our component list terminator.
23+ //! - A valid UTF-8 string cannot contain the `0xFF` byte. Thus we can safely
24+ //! use `0xFF` as our component list terminator.
2725//!
2826//! The sample composite string ["abc", ID(42), "def", TERMINATOR] would thus be
2927//! encoded as:
3028//!
3129//! ```ignore
32- //! ['a', 'b' , 'c', 128, 0, 0, 42 , 'd', 'e', 'f', 255]
33- //! ^^^^^^^^^^^^^ ^^^
34- //! string ID 42 with 0b10 prefix terminator (0xFF)
30+ //! ['a', 'b' , 'c', 254, 42, 0, 0, 0 , 'd', 'e', 'f', 255]
31+ //! ^^^^^^^^^^^^^^^^ ^^^
32+ //! string ID with 0xFE prefix terminator (0xFF)
3533//! ```
3634//!
37- //! As you can see string IDs are encoded in big endian format so that highest
38- //! order bits show up in the first byte we encounter.
35+ //! As you can see string IDs are encoded in little endian format.
3936//!
4037//! ----------------------------------------------------------------------------
4138//!
5855//! > [0 .. MAX_VIRTUAL_STRING_ID, METADATA_STRING_ID, .. ]
5956//!
6057//! From `0` to `MAX_VIRTUAL_STRING_ID` are the allowed values for virtual strings.
61- //! After `MAX_VIRTUAL_STRING_ID`, there is one string id (`METADATA_STRING_ID`) which is used
62- //! internally by `measureme` to record additional metadata about the profiling session.
63- //! After `METADATA_STRING_ID` are all other `StringId` values.
64- //!
58+ //! After `MAX_VIRTUAL_STRING_ID`, there is one string id (`METADATA_STRING_ID`)
59+ //! which is used internally by `measureme` to record additional metadata about
60+ //! the profiling session. After `METADATA_STRING_ID` are all other `StringId`
61+ //! values.
6562
6663use crate :: file_header:: {
6764 write_file_header, FILE_MAGIC_STRINGTABLE_DATA , FILE_MAGIC_STRINGTABLE_INDEX ,
@@ -84,7 +81,6 @@ impl StringId {
8481
8582 #[ inline]
8683 pub fn new ( id : u32 ) -> StringId {
87- assert ! ( id <= MAX_STRING_ID ) ;
8884 StringId ( id)
8985 }
9086
@@ -106,23 +102,20 @@ impl StringId {
106102
107103 #[ inline]
108104 pub fn from_addr ( addr : Addr ) -> StringId {
109- let id = addr. 0 + FIRST_REGULAR_STRING_ID ;
105+ let id = addr. 0 . checked_add ( FIRST_REGULAR_STRING_ID ) . unwrap ( ) ;
110106 StringId :: new ( id)
111107 }
112108
113109 #[ inline]
114110 pub fn to_addr ( self ) -> Addr {
115- assert ! ( self . 0 >= FIRST_REGULAR_STRING_ID ) ;
116- Addr ( self . 0 - FIRST_REGULAR_STRING_ID )
111+ Addr ( self . 0 . checked_sub ( FIRST_REGULAR_STRING_ID ) . unwrap ( ) )
117112 }
118113}
119114
120115// See module-level documentation for more information on the encoding.
121116pub const TERMINATOR : u8 = 0xFF ;
122-
123- // All 1s except for the two highest bits.
124- pub const MAX_STRING_ID : u32 = 0x3FFF_FFFF ;
125- pub const STRING_ID_MASK : u32 = 0x3FFF_FFFF ;
117+ pub const STRING_REF_TAG : u8 = 0xFE ;
118+ pub const STRING_REF_ENCODED_SIZE : usize = 5 ;
126119
127120/// The maximum id value a virtual string may be.
128121const MAX_USER_VIRTUAL_STRING_ID : u32 = 100_000_000 ;
@@ -175,7 +168,7 @@ impl<'s> StringComponent<'s> {
175168 fn serialized_size ( & self ) -> usize {
176169 match * self {
177170 StringComponent :: Value ( s) => s. len ( ) ,
178- StringComponent :: Ref ( _) => 4 ,
171+ StringComponent :: Ref ( _) => STRING_REF_ENCODED_SIZE ,
179172 }
180173 }
181174
@@ -187,11 +180,10 @@ impl<'s> StringComponent<'s> {
187180 & mut bytes[ s. len ( ) ..]
188181 }
189182 StringComponent :: Ref ( string_id) => {
190- assert ! ( string_id. 0 == string_id. 0 & STRING_ID_MASK ) ;
191- let tagged = string_id. 0 | ( 1u32 << 31 ) ;
192-
193- & mut bytes[ 0 ..4 ] . copy_from_slice ( & tagged. to_be_bytes ( ) ) ;
194- & mut bytes[ 4 ..]
183+ assert ! ( STRING_REF_ENCODED_SIZE == 5 ) ;
184+ bytes[ 0 ] = STRING_REF_TAG ;
185+ & mut bytes[ 1 ..5 ] . copy_from_slice ( & string_id. 0 . to_le_bytes ( ) ) ;
186+ & mut bytes[ 5 ..]
195187 }
196188 }
197189 }
0 commit comments