Skip to content

Commit 2c06c4f

Browse files
committed
[ustring] ToRuneList, constructor from uint array, clarify parameters in Substring
1 parent 9936e75 commit 2c06c4f

File tree

2 files changed

+59
-9
lines changed

2 files changed

+59
-9
lines changed

NStack/NStack.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
It starts with a new string type that is focused on Unicode code-points as opposed to the historical chars and UTF-16 encoding and introduces a utf8 string that supports slicing</Description>
1919
<ReleaseVersion>0.3</ReleaseVersion>
2020
<PackageReleaseNotes>0.12: Rebuilt
21+
Rebuild with an older Roslyn, to prevent regressions on Xamarin.
2122

2223
0.10:
2324
Merged some changes from upstream.

NStack/strings/ustring.cs

Lines changed: 58 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,26 @@ public static ustring Make (IEnumerable<Rune> runes)
481481
return Make (runes.ToList ());
482482
}
483483

484+
/// Initializes a new instance of the <see cref="T:NStack.ustring"/> class from an array of uints, which contain CodePoints.
485+
/// </summary>
486+
/// <returns>The make.</returns>
487+
/// <param name="runes">Runes.</param>
488+
public static ustring Make (uint [] runes)
489+
{
490+
if (runes == null)
491+
throw new ArgumentNullException (nameof (runes));
492+
int size = 0;
493+
foreach (var rune in runes) {
494+
size += Utf8.RuneLen (rune);
495+
}
496+
var encoded = new byte [size];
497+
int offset = 0;
498+
foreach (var rune in runes) {
499+
offset += Utf8.EncodeRune (rune, encoded, offset);
500+
}
501+
return Make (encoded);
502+
}
503+
484504
/// <summary>
485505
/// Initializes a new instance of the <see cref="T:NStack.ustring"/> class from a block of memory and a size.
486506
/// </summary>
@@ -934,8 +954,8 @@ public static ustring Make (byte [] buffer, int start, int count)
934954
/// <summary>
935955
/// Returns a slice of the ustring delimited by the [start, last-element-of-the-string range. If the range is invalid, the return is the Empty string.
936956
/// </summary>
937-
/// <param name="start">Start index, this value is inclusive. If the value is negative, the value is added to the length, allowing this parameter to count to count from the end of the string.</param>
938-
/// <param name="end">This value is expected to be null to indicate that it should be the last element of the string.</param>
957+
/// <param name="start">Byte start index, this value is inclusive. If the value is negative, the value is added to the length, allowing this parameter to count to count from the end of the string.</param>
958+
/// <param name="end">Byte end index. This value is expected to be null to indicate that it should be the last element of the string.</param>
939959
/// <remarks>
940960
/// <para>
941961
/// This is a companion indexer to the indexer that takes two integers, it only exists
@@ -944,6 +964,9 @@ public static ustring Make (byte [] buffer, int start, int count)
944964
///
945965
/// Some examples given the string "1234567890":
946966
/// </para>
967+
/// <para>
968+
/// The indexes are byte indexes, they are not rune indexes.
969+
/// </para>
947970
/// <para>The range [8, null] produces "90"</para>
948971
/// <para>The range [-2, null] produces "90"</para>
949972
/// <para>The range [8, 9] produces "9"</para>
@@ -983,19 +1006,26 @@ public static ustring Make (byte [] buffer, int start, int count)
9831006
}
9841007

9851008
/// <summary>
986-
/// Returns the substring starting at the given position.
1009+
/// Returns the substring starting at the given position in bytes from the origin of the Utf8 string.
1010+
/// Use RuneSubstring to extract substrings based on the rune index, rather than the byte index inside the
1011+
/// Utf8 encoded string.
9871012
/// </summary>
9881013
/// <returns>The substring starting at the specified offset.</returns>
9891014
/// <param name="start">Starting point, the value is .</param>
990-
public ustring Substring (int start)
1015+
public ustring Substring (int byteStart)
9911016
{
9921017
int len = Length;
993-
if (start < 0)
994-
start = 0;
995-
return GetRange (start, len);
1018+
if (byteStart < 0)
1019+
byteStart = 0;
1020+
return GetRange (byteStart, len);
9961021
}
9971022

9981023

1024+
public ustring RuneSubstring (int runeStart)
1025+
{
1026+
throw new NotImplementedException ();
1027+
}
1028+
9991029
/// <summary>
10001030
/// Gets a value indicating whether this <see cref="T:NStack.ustring"/> is empty.
10011031
/// </summary>
@@ -1128,12 +1158,31 @@ public uint [] ToRunes (int limit = -1)
11281158
public List<Rune> ToRuneList ()
11291159
{
11301160
var result = new List<Rune> ();
1131-
for (int offset = 0; offset < Length; ) {
1161+
for (int offset = 0; offset < Length;) {
11321162
(var rune, var size) = Utf8.DecodeRune (this, offset);
11331163
result.Add (rune);
11341164
offset += size;
11351165
}
1136-
return result;
1166+
return result;
1167+
}
1168+
1169+
/// Converts a ustring into a rune array.
1170+
/// </summary>
1171+
/// <returns>An array containing the runes for the string up to the specified limit.</returns>
1172+
/// <param name="limit">Maximum number of entries to return, or -1 for no limits.</param>
1173+
public List<Rune> ToRuneList (int limit)
1174+
{
1175+
var n = Utf8.RuneCount (this);
1176+
if (limit < 0 || n > limit)
1177+
limit = n;
1178+
var result = new List<Rune> ();
1179+
int offset = 0;
1180+
for (int i = 0; i < limit; i++) {
1181+
(var rune, var size) = Utf8.DecodeRune (this, offset);
1182+
result [i] = rune;
1183+
offset += size;
1184+
}
1185+
return result;
11371186
}
11381187

11391188
// primeRK is the prime base used in Rabin-Karp algorithm.

0 commit comments

Comments
 (0)