Skip to content

Commit 90310d7

Browse files
committed
#49 - Implement method CppStringT::splitlines()
Completed.
1 parent 2424122 commit 90310d7

File tree

1 file changed

+84
-2
lines changed

1 file changed

+84
-2
lines changed

cpp-strings/cppstrings.h

Lines changed: 84 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1131,7 +1131,7 @@ namespace pcs // i.e. "pythonic c++ strings"
11311131
}
11321132

11331133
/** \brief Returns a vector of the words in the string, using sep as the delimiter string. At most maxsplit splits are done, the leftmost ones. */
1134-
inline std::vector<CppStringT> split(const CppStringT& sep, const size_type maxsplit) const noexcept
1134+
std::vector<CppStringT> split(const CppStringT& sep, const size_type maxsplit) const noexcept
11351135
{
11361136
std::vector<CppStringT> res{};
11371137

@@ -1140,7 +1140,7 @@ namespace pcs // i.e. "pythonic c++ strings"
11401140
}
11411141
else {
11421142
const CppStringT whitespace(value_type(' '));
1143-
std::vector<CppStringT> all_words{ this->split(whitespace) };
1143+
std::vector<CppStringT> all_words{ this->split(sep) };
11441144

11451145
size_type count = maxsplit;
11461146
auto word_it = all_words.cbegin();
@@ -1162,6 +1162,88 @@ namespace pcs // i.e. "pythonic c++ strings"
11621162
}
11631163

11641164

1165+
//--- splitlines() ------------------------------------
1166+
/** \brief Return a list of the lines in the string, breaking at line boundaries.
1167+
*
1168+
* Line breaks are not included in the resulting list unless keepends is given and true.
1169+
*
1170+
* This method splits on the following line boundaries. In particular, the boundaries are a superset of universal newlines:
1171+
* \n Line Feed
1172+
* \r Carriage Return
1173+
* \r\n Carriage Return + Line Feed
1174+
* \v or \x0b Line Tabulation
1175+
* \f or \x0c Form Feed
1176+
* \x1c File Separator
1177+
* \x1d Group Separator
1178+
* \x1e Record Separator
1179+
* \x85 Next Line (C1 Control Code)
1180+
* \u2028 Line Separator
1181+
* \u2029 Paragraph Separator
1182+
*/
1183+
std::vector<CppStringT> splitlines(const bool keep_end = false) const noexcept
1184+
{
1185+
std::vector<CppStringT> res{};
1186+
CppStringT current{};
1187+
bool prev_cr = false;
1188+
1189+
for (const value_type& ch : *this) {
1190+
switch (ch) {
1191+
case value_type('\v'): // Line Tabulation
1192+
case value_type('\x0b'): // Line Tabulation
1193+
case value_type('\f'): // Form Feed
1194+
case value_type('\x0c'): // Form Feed
1195+
case value_type('\x1c'): // File Separator
1196+
case value_type('\x1d'): // Group Separator
1197+
case value_type('\x1e'): // Record Separator
1198+
case value_type('\x85'): // Next Line (C1 Control Code)
1199+
#pragma warning(push)
1200+
#pragma warning(disable: 4566)
1201+
case value_type('\u2028'): // Line Separator
1202+
case value_type('\u2029'): // Paragraph Separator
1203+
#pragma warning(pop)
1204+
if (prev_cr) {
1205+
res.push_back(current);
1206+
current.clear();
1207+
}
1208+
if (keep_end)
1209+
current += ch;
1210+
res.push_back(current);
1211+
current.clear();
1212+
prev_cr = false;
1213+
break;
1214+
1215+
case value_type('\r'): // Line Feed
1216+
if (prev_cr) {
1217+
res.push_back(current);
1218+
current.clear();
1219+
}
1220+
if (keep_end)
1221+
current += ch;
1222+
prev_cr = true;
1223+
break;
1224+
1225+
case value_type('\n'): // Line Feed
1226+
if (keep_end)
1227+
current += ch;
1228+
res.push_back(current);
1229+
current.clear();
1230+
prev_cr = false;
1231+
break;
1232+
1233+
1234+
default:
1235+
if (prev_cr) {
1236+
res.push_back(current);
1237+
current.clear();
1238+
}
1239+
current += ch;
1240+
break;
1241+
}
1242+
}
1243+
1244+
return res;
1245+
}
1246+
11651247

11661248
//--- title() -----------------------------------------
11671249
/** \brief Returns a titlecased copy of the string where words start with an uppercase character and the remaining characters are lowercase. */

0 commit comments

Comments
 (0)