Add str_util helpers to split camelcase strings

dswij · dswij · commit c76e2d1e59d4 · 2021-12-25T21:55:15.000+08:00
diff --git a/clippy_utils/src/str_utils.rs b/clippy_utils/src/str_utils.rs
@@ -68,6 +68,16 @@ pub fn camel_case_until(s: &str) -> StrIndex {
 /// ```
 #[must_use]
 pub fn camel_case_start(s: &str) -> StrIndex {
+    camel_case_start_from_idx(s, 0)
+}
+
+/// Returns `StrIndex` of the last camel-case component of `s[idx..]`.
+///
+/// ```
+/// assert_eq!(camel_case_start("AbcDef", 0), StrIndex::new(0, 0));
+/// assert_eq!(camel_case_start("AbcDef", 1), StrIndex::new(3, 3));
+/// ```
+pub fn camel_case_start_from_idx(s: &str, start_idx: usize) -> StrIndex {
     let char_count = s.chars().count();
     let range = 0..char_count;
     let mut iter = range.rev().zip(s.char_indices().rev());
@@ -78,9 +88,13 @@ pub fn camel_case_start(s: &str) -> StrIndex {
     } else {
         return StrIndex::new(char_count, s.len());
     }
+
     let mut down = true;
     let mut last_index = StrIndex::new(char_count, s.len());
     for (char_index, (byte_index, c)) in iter {
+        if byte_index < start_idx {
+            continue;
+        }
         if down {
             if c.is_uppercase() {
                 down = false;
@@ -98,9 +112,51 @@ pub fn camel_case_start(s: &str) -> StrIndex {
             return last_index;
         }
     }
+
     last_index
 }
 
+/// Get the indexes of camel case components of a string `s`
+///
+/// ```
+/// assert_eq!(camel_case_indexes("AbcDef"), vec![StrIndex::new(0, 0), StrIndex::new(3, 3)])
+/// ```
+pub fn camel_case_indexes(s: &str) -> Vec<StrIndex> {
+    let mut result = Vec::new();
+    let mut str_idx = camel_case_start(s);
+
+    while str_idx.byte_index < s.len() {
+        let next_idx = str_idx.byte_index + 1;
+        result.push(str_idx);
+        str_idx = camel_case_start_from_idx(s, next_idx);
+    }
+    result.push(str_idx);
+
+    result
+}
+
+/// Split camel case string into a vector of its components
+///
+/// ```
+/// assert_eq!(camel_case_split("AbcDef"), vec!["Abc", "Def"]);
+/// ```
+pub fn camel_case_split(s: &str) -> Vec<&str> {
+    let offsets = camel_case_indexes(s);
+    let mut idxs_iter = offsets.iter().map(|str_idx| str_idx.byte_index).peekable();
+    let idxs: Vec<usize> = if let Some(&idx) = idxs_iter.peek() {
+        if idx == 0 {
+            idxs_iter.collect()
+        } else {
+            Vec::<usize>::from([0]).into_iter().chain(idxs_iter).collect()
+        }
+    } else {
+        return vec![s];
+    };
+    let split_points: Vec<(&usize, &usize)> = idxs[..idxs.len() - 1].iter().zip(&idxs[1..]).collect();
+
+    split_points.iter().map(|(&start, &stop)| &s[start..stop]).collect()
+}
+
 /// Dealing with sting comparison can be complicated, this struct ensures that both the
 /// character and byte count are provided for correct indexing.
 #[derive(Debug, Default, PartialEq, Eq)]
@@ -231,4 +287,29 @@ mod test {
     fn until_caps() {
         assert_eq!(camel_case_until("ABCD"), StrIndex::new(0, 0));
     }
+
+    #[test]
+    fn camel_case_indexes_full() {
+        assert_eq!(
+            camel_case_indexes("AbcDef"),
+            vec![StrIndex::new(0, 0), StrIndex::new(3, 3)]
+        );
+        assert_eq!(
+            camel_case_indexes("abcDef"),
+            vec![StrIndex::new(0, 0), StrIndex::new(3, 3)]
+        );
+        assert_eq!(camel_case_indexes("Abc\u{f6}\u{f6}DD"), vec![StrIndex::new(5, 7)]);
+    }
+
+    #[test]
+    fn camel_case_split_full() {
+        assert_eq!(camel_case_split("A"), vec!["A"]);
+        assert_eq!(camel_case_split("AbcDef"), vec!["Abc", "Def"]);
+        assert_eq!(camel_case_split("Abc"), vec!["Abc"]);
+        assert_eq!(camel_case_split("abcDef"), vec!["abc", "Def"]);
+        assert_eq!(
+            camel_case_split("\u{f6}\u{f6}AabABcd"),
+            vec!["\u{f6}\u{f6}", "Aab", "A", "Bcd"]
+        );
+    }
 }