Skip to content

Commit 26d7d90

Browse files
committed
Add bit_array.to_string_lossy
1 parent 126db53 commit 26d7d90

File tree

3 files changed

+57
-0
lines changed

3 files changed

+57
-0
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Changelog
22

3+
## Unreleased
4+
5+
- The `bit_array` module gains the `to_string_lossy` function.
6+
37
## v0.66.0 - 2025-10-21
48

59
- The `tap` function from the `function` module has been deprecated.

src/gleam/bit_array.gleam

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,46 @@ pub fn to_string(bits: BitArray) -> Result(String, Nil) {
9595
@external(erlang, "gleam_stdlib", "identity")
9696
fn unsafe_to_string(a: BitArray) -> String
9797

98+
/// Converts a bit array to a string. Invalid bits are passed to the provided
99+
/// callback and its result is included in the final string in place of the
100+
/// invalid data.
101+
///
102+
/// ## Examples
103+
///
104+
/// ```gleam
105+
/// to_string_lossy(<<"A":utf8, 0x80, "1":utf8, 0:size(5)>>, fn(_) { "�" })
106+
/// // -> "A�1�"
107+
/// ```
108+
///
109+
pub fn to_string_lossy(
110+
bits: BitArray,
111+
map_invalid_bits: fn(BitArray) -> String,
112+
) -> String {
113+
to_string_lossy_impl(bits, map_invalid_bits, "")
114+
}
115+
116+
fn to_string_lossy_impl(
117+
bits: BitArray,
118+
map_invalid_bits: fn(BitArray) -> String,
119+
acc: String,
120+
) -> String {
121+
case bits {
122+
<<>> -> acc
123+
124+
<<x:utf8_codepoint, rest:bits>> ->
125+
to_string_lossy_impl(
126+
rest,
127+
map_invalid_bits,
128+
acc <> string.from_utf_codepoints([x]),
129+
)
130+
131+
<<x:bytes-1, rest:bits>> ->
132+
to_string_lossy_impl(rest, map_invalid_bits, acc <> map_invalid_bits(x))
133+
134+
_ -> acc <> map_invalid_bits(bits)
135+
}
136+
}
137+
98138
/// Creates a new bit array by joining multiple binaries.
99139
///
100140
/// ## Examples

test/gleam/bit_array_test.gleam

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,19 @@ pub fn to_string_test() {
135135
assert bit_array.to_string(x) == Ok("ø")
136136
}
137137

138+
pub fn to_string_lossy_test() {
139+
assert bit_array.to_string_lossy(<<>>, fn(_) { "�" }) == ""
140+
141+
assert bit_array.to_string_lossy(<<0x80, "A":utf8, 0x81>>, fn(_) { "�" })
142+
== "�A�"
143+
144+
// Test some codepoints that require 2/3/4 bytes to be stored as UTF-8
145+
assert bit_array.to_string_lossy(<<"£И한𐍈":utf8>>, fn(_) { "�" }) == "£И한𐍈"
146+
147+
// Test unaligned bit array
148+
assert bit_array.to_string_lossy(<<"ø":utf8, 2:4>>, fn(_) { "�" }) == "ø�"
149+
}
150+
138151
pub fn is_utf8_test() {
139152
assert bit_array.is_utf8(<<>>)
140153

0 commit comments

Comments
 (0)