@@ -1131,7 +1131,7 @@ namespace pcs // i.e. "pythonic c++ strings"
11311131 }
11321132
11331133 /* * \brief Returns a vector of the words in the string, using sep as the delimiter string. At most maxsplit splits are done, the leftmost ones. */
1134- inline std::vector<CppStringT> split (const CppStringT& sep, const size_type maxsplit) const noexcept
1134+ std::vector<CppStringT> split (const CppStringT& sep, const size_type maxsplit) const noexcept
11351135 {
11361136 std::vector<CppStringT> res{};
11371137
@@ -1140,7 +1140,7 @@ namespace pcs // i.e. "pythonic c++ strings"
11401140 }
11411141 else {
11421142 const CppStringT whitespace (value_type (' ' ));
1143- std::vector<CppStringT> all_words{ this ->split (whitespace ) };
1143+ std::vector<CppStringT> all_words{ this ->split (sep ) };
11441144
11451145 size_type count = maxsplit;
11461146 auto word_it = all_words.cbegin ();
@@ -1162,6 +1162,88 @@ namespace pcs // i.e. "pythonic c++ strings"
11621162 }
11631163
11641164
1165+ // --- splitlines() ------------------------------------
1166+ /* * \brief Return a list of the lines in the string, breaking at line boundaries.
1167+ *
1168+ * Line breaks are not included in the resulting list unless keepends is given and true.
1169+ *
1170+ * This method splits on the following line boundaries. In particular, the boundaries are a superset of universal newlines:
1171+ * \n Line Feed
1172+ * \r Carriage Return
1173+ * \r\n Carriage Return + Line Feed
1174+ * \v or \x0b Line Tabulation
1175+ * \f or \x0c Form Feed
1176+ * \x1c File Separator
1177+ * \x1d Group Separator
1178+ * \x1e Record Separator
1179+ * \x85 Next Line (C1 Control Code)
1180+ * \u2028 Line Separator
1181+ * \u2029 Paragraph Separator
1182+ */
1183+ std::vector<CppStringT> splitlines (const bool keep_end = false ) const noexcept
1184+ {
1185+ std::vector<CppStringT> res{};
1186+ CppStringT current{};
1187+ bool prev_cr = false ;
1188+
1189+ for (const value_type& ch : *this ) {
1190+ switch (ch) {
1191+ case value_type (' \v ' ): // Line Tabulation
1192+ case value_type (' \x0b ' ): // Line Tabulation
1193+ case value_type (' \f ' ): // Form Feed
1194+ case value_type (' \x0c ' ): // Form Feed
1195+ case value_type (' \x1c ' ): // File Separator
1196+ case value_type (' \x1d ' ): // Group Separator
1197+ case value_type (' \x1e ' ): // Record Separator
1198+ case value_type (' \x85 ' ): // Next Line (C1 Control Code)
1199+ #pragma warning(push)
1200+ #pragma warning(disable: 4566)
1201+ case value_type (' \u2028 ' ): // Line Separator
1202+ case value_type (' \u2029 ' ): // Paragraph Separator
1203+ #pragma warning(pop)
1204+ if (prev_cr) {
1205+ res.push_back (current);
1206+ current.clear ();
1207+ }
1208+ if (keep_end)
1209+ current += ch;
1210+ res.push_back (current);
1211+ current.clear ();
1212+ prev_cr = false ;
1213+ break ;
1214+
1215+ case value_type (' \r ' ): // Line Feed
1216+ if (prev_cr) {
1217+ res.push_back (current);
1218+ current.clear ();
1219+ }
1220+ if (keep_end)
1221+ current += ch;
1222+ prev_cr = true ;
1223+ break ;
1224+
1225+ case value_type (' \n ' ): // Line Feed
1226+ if (keep_end)
1227+ current += ch;
1228+ res.push_back (current);
1229+ current.clear ();
1230+ prev_cr = false ;
1231+ break ;
1232+
1233+
1234+ default :
1235+ if (prev_cr) {
1236+ res.push_back (current);
1237+ current.clear ();
1238+ }
1239+ current += ch;
1240+ break ;
1241+ }
1242+ }
1243+
1244+ return res;
1245+ }
1246+
11651247
11661248 // --- title() -----------------------------------------
11671249 /* * \brief Returns a titlecased copy of the string where words start with an uppercase character and the remaining characters are lowercase. */
0 commit comments