Skip to content

Commit f9abed0

Browse files
authored
Merge pull request #413 from c-cube/feat-cbor
add CBOR sub-library
2 parents e24b206 + 43f88a3 commit f9abed0

File tree

10 files changed

+4968
-1
lines changed

10 files changed

+4968
-1
lines changed

.github/workflows/main.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
os:
1818
- macos-latest
1919
- ubuntu-latest
20-
- windows-latest
20+
#- windows-latest
2121
ocaml-compiler:
2222
- '4.03.x'
2323
- '4.13.x'

containers.opam

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ depends: [
1919
"qtest" { with-test }
2020
"qcheck" { >= "0.14" & with-test }
2121
"ounit2" { with-test }
22+
"yojson" { with-test }
2223
"iter" { with-test }
2324
"gen" { with-test }
2425
"csexp" { with-test }

src/cbor/containers_cbor.ml

Lines changed: 318 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,318 @@
1+
2+
module Fmt = CCFormat
3+
4+
type t =
5+
[ `Null
6+
| `Undefined
7+
| `Simple of int
8+
| `Bool of bool
9+
| `Int of int
10+
| `Float of float
11+
| `Bytes of string
12+
| `Text of string
13+
| `Array of t list
14+
| `Map of (t * t) list
15+
| `Tag of int * t
16+
]
17+
18+
let rec pp_diagnostic out (self:t) =
19+
match self with
20+
| `Null -> Fmt.string out "null"
21+
| `Undefined -> Fmt.string out "undefined"
22+
| `Simple i -> Fmt.fprintf out "simple(%d)" i
23+
| `Bool b -> Fmt.bool out b
24+
| `Int i -> Fmt.int out i
25+
| `Float f -> Fmt.float out f
26+
| `Bytes b -> Fmt.fprintf out "h'%s'" (CCString.to_hex b)
27+
| `Text s -> Fmt.fprintf out "%S" s
28+
| `Array l ->
29+
Fmt.fprintf out "[@[";
30+
List.iteri
31+
(fun i x ->
32+
if i>0 then Fmt.fprintf out ",@ ";
33+
pp_diagnostic out x) l;
34+
Fmt.fprintf out "@]]"
35+
| `Map l ->
36+
Fmt.fprintf out "{@[";
37+
List.iteri
38+
(fun i (k,v) ->
39+
if i>0 then Fmt.fprintf out ",@ ";
40+
Fmt.fprintf out "@[%a:@ %a@]" pp_diagnostic k pp_diagnostic v) l;
41+
Fmt.fprintf out "@]}"
42+
| `Tag (i,x) -> Fmt.fprintf out "%d(@[%a@])" i pp_diagnostic x
43+
44+
let to_string_diagnostic (self:t) : string =
45+
Format.asprintf "@[<h>%a@]" pp_diagnostic self
46+
47+
(* we use funtions from Bytes *)
48+
[@@@ifge 4.08]
49+
50+
exception Indefinite
51+
52+
let decode_exn (s:string) : t =
53+
let b = Bytes.unsafe_of_string s in
54+
let i = ref 0 in
55+
56+
(* currently at end delimiter? *)
57+
let[@inline] is_break_stop_code () =
58+
Char.code s.[!i] = 0b111_11111
59+
in
60+
61+
let[@inline] read_i8 () =
62+
let c = Char.code s.[!i] in
63+
incr i;
64+
c
65+
in
66+
67+
let[@inline] read_i16 () =
68+
let c = Bytes.get_uint16_be b !i in
69+
i := !i + 2;
70+
c
71+
in
72+
73+
let[@inline] read_i32 () =
74+
let c = Bytes.get_int32_be b !i in
75+
i := !i + 4;
76+
c
77+
in
78+
79+
let[@inline] read_i64 () =
80+
let c = Bytes.get_int64_be b !i in
81+
i := !i + 8;
82+
c
83+
in
84+
85+
let reserve_n n =
86+
let j = !i in
87+
if j + n > String.length s then failwith "cbor: cannot extract slice";
88+
i := !i + n;
89+
j
90+
in
91+
92+
let[@inline] i64_to_int i =
93+
let j = Int64.to_int i in
94+
if Int64.(of_int j = i) then j
95+
else failwith "int64 does not fit in int"
96+
in
97+
98+
(* read integer value from least significant bits *)
99+
let read_int ~allow_indefinite low =
100+
match low with
101+
| _ when low < 0 -> failwith "cbor: invalid length"
102+
| _ when low < 24 -> Int64.of_int low
103+
| 24 -> Int64.of_int (read_i8())
104+
| 25 -> Int64.of_int (read_i16())
105+
| 26 -> Int64.of_int32 (read_i32())
106+
| 27 -> read_i64()
107+
| 28 | 29 | 30 -> failwith "cbor: invalid length"
108+
| 31 ->
109+
if allow_indefinite then raise_notrace Indefinite
110+
else failwith "cbor: invalid integer 31 in this context"
111+
| _ -> assert false
112+
in
113+
114+
(* appendix D
115+
116+
double decode_half(unsigned char *halfp) {
117+
unsigned half = (halfp[0] << 8) + halfp[1];
118+
unsigned exp = (half >> 10) & 0x1f;
119+
unsigned mant = half & 0x3ff;
120+
double val;
121+
if (exp == 0) val = ldexp(mant, -24);
122+
else if (exp != 31) val = ldexp(mant + 1024, exp - 25);
123+
else val = mant == 0 ? INFINITY : NAN;
124+
return half & 0x8000 ? -val : val;
125+
}
126+
*)
127+
let decode_f16 (half:int) : float =
128+
(* exponent is bits 15:10 *)
129+
let exp = (half lsr 10) land 0x1f in
130+
(* mantissa is bits 9:0 *)
131+
let mant = half land 0x3ff in
132+
let value =
133+
if exp = 0 then ldexp (float mant) (-24)
134+
else if exp <> 31 then ldexp (float (mant + 1024)) (exp - 25)
135+
else if mant = 0 then infinity else nan
136+
in
137+
if half land 0x8000 <> 0 then -. value else value
138+
in
139+
140+
(* roughly follow https://www.rfc-editor.org/rfc/rfc8949.html#pseudocode *)
141+
let rec read_value () =
142+
let c = read_i8() in
143+
let high = (c land 0b111_00000) lsr 5 in
144+
let low = c land 0b000_11111 in
145+
begin match high with
146+
| 0 -> `Int (read_int ~allow_indefinite:false low |> i64_to_int)
147+
| 1 ->
148+
let i = read_int ~allow_indefinite:false low |> i64_to_int in
149+
`Int (-1 - i)
150+
| 2 ->
151+
let s = read_bytes ~ty:`Bytes low in
152+
`Bytes s
153+
| 3 ->
154+
let s = read_bytes ~ty:`String low in
155+
`Text s
156+
157+
| 4 ->
158+
let l =
159+
match read_int ~allow_indefinite:true low |> i64_to_int with
160+
| len -> List.init len (fun _ -> read_value())
161+
| exception Indefinite ->
162+
let l = ref [] in
163+
while not (is_break_stop_code ()) do
164+
l := read_value() :: !l
165+
done;
166+
incr i; (* consume stop code *)
167+
List.rev !l
168+
in
169+
`Array l
170+
171+
| 5 ->
172+
let l =
173+
match read_int ~allow_indefinite:true low |> i64_to_int with
174+
| len -> List.init len (fun _ -> read_pair())
175+
| exception Indefinite ->
176+
let l = ref [] in
177+
while not (is_break_stop_code ()) do
178+
l := read_pair() :: !l
179+
done;
180+
incr i; (* consume stop code *)
181+
List.rev !l
182+
in
183+
`Map l
184+
185+
| 6 ->
186+
let tag = read_int ~allow_indefinite:false low |> i64_to_int in
187+
let v = read_value() in
188+
`Tag (tag,v)
189+
190+
| 7 ->
191+
(* simple or float,
192+
https://www.rfc-editor.org/rfc/rfc8949.html#fpnocont *)
193+
let i = read_int ~allow_indefinite:false low in
194+
begin match low with
195+
| 20 -> `Bool false
196+
| 21 -> `Bool true
197+
| 22 -> `Null
198+
| 23 -> `Undefined
199+
| _ when low<=24 -> `Simple (i64_to_int i)
200+
| 25 -> (* float16 *)
201+
`Float (decode_f16 (Int64.to_int i))
202+
| 26 -> (* float 32 *)
203+
`Float (Int32.float_of_bits (Int64.to_int32 i))
204+
| 27 -> (* float 64 *)
205+
`Float (Int64.float_of_bits i)
206+
| 28 | 29 | 30 -> failwith "cbor: malformed"
207+
| 31 -> failwith "uncaught 'break' stop code"
208+
| _ -> assert false (* unreachable *)
209+
end
210+
211+
| _ -> assert false (* unreachable *)
212+
end
213+
214+
and read_bytes ~ty low =
215+
match read_int ~allow_indefinite:true low |> i64_to_int with
216+
| exception Indefinite ->
217+
let buf = Buffer.create 32 in
218+
while not (is_break_stop_code()) do
219+
match read_value(), ty with
220+
| `Text s, `String
221+
| `Bytes s, `Bytes -> Buffer.add_string buf s
222+
| _ -> failwith "cbor: invalid chunk in indefinite length string/byte"
223+
done;
224+
incr i; (* consume stop code *)
225+
Buffer.contents buf
226+
| len ->
227+
let off = reserve_n len in
228+
String.sub s off len
229+
230+
and read_pair() =
231+
let k = read_value() in
232+
let v = read_value() in
233+
k,v
234+
in
235+
read_value()
236+
237+
let decode s =
238+
try Ok (decode_exn s)
239+
with Failure s -> Error s
240+
241+
let encode ?(buf=Buffer.create 32) (self:t) : string =
242+
Buffer.clear buf;
243+
244+
let[@inline] add_byte (high:int) (low:int) =
245+
let i = (high lsl 5) lor low in
246+
assert ((i land 0xff) == i);
247+
Buffer.add_char buf (Char.unsafe_chr i)
248+
in
249+
250+
let add_i64 (i:int64) =
251+
Buffer.add_int64_be buf i
252+
in
253+
254+
(* add unsigned integer, including first tag byte *)
255+
let add_uint (high:int) (x:int) =
256+
assert (x >= 0);
257+
if x < 24 then add_byte high x
258+
else if x <= 0xff then (
259+
add_byte high 24;
260+
Buffer.add_char buf (Char.unsafe_chr x)
261+
) else if x <= 0xff_ff then (
262+
add_byte high 25;
263+
Buffer.add_uint16_be buf x
264+
) else if x <= 0xff_ff_ff_ff then (
265+
add_byte high 26;
266+
Buffer.add_int32_be buf (Int32.of_int x)
267+
) else (
268+
add_byte high 27;
269+
Buffer.add_int64_be buf (Int64.of_int x)
270+
)
271+
in
272+
273+
let rec encode_val (self:t): unit =
274+
match self with
275+
| `Bool false -> add_byte 7 20
276+
| `Bool true -> add_byte 7 21
277+
| `Null -> add_byte 7 22
278+
| `Undefined -> add_byte 7 23
279+
| `Simple i ->
280+
if i < 24 then add_byte 7 i
281+
else if i <= 0xff then (
282+
add_byte 7 24;
283+
Buffer.add_char buf (Char.unsafe_chr i)
284+
) else (
285+
failwith "cbor: simple value too high (above 255)"
286+
)
287+
| `Float f ->
288+
add_byte 7 27; (* float 64 *)
289+
add_i64 (Int64.bits_of_float f)
290+
| `Array l ->
291+
add_uint 4 (List.length l);
292+
List.iter encode_val l
293+
| `Map l ->
294+
add_uint 5 (List.length l);
295+
List.iter (fun (k,v) -> encode_val k; encode_val v) l
296+
| `Text s ->
297+
add_uint 3 (String.length s);
298+
Buffer.add_string buf s
299+
| `Bytes s ->
300+
add_uint 2 (String.length s);
301+
Buffer.add_string buf s
302+
| `Tag (t, v) ->
303+
add_uint 6 t;
304+
encode_val v
305+
| `Int i ->
306+
if i >= 0 then add_uint 0 i
307+
else if min_int + 2 > i then (
308+
(* large negative int, be careful. encode [(-i)-1] via int64. *)
309+
add_byte 1 27;
310+
Buffer.add_int64_be buf (Int64.(neg (add 1L (of_int i))))
311+
) else (
312+
add_uint 1 (-i-1)
313+
)
314+
in
315+
encode_val self;
316+
Buffer.contents buf
317+
318+
[@@@endif]

src/cbor/containers_cbor.mli

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
2+
(** CBOR encoder/decoder.
3+
4+
The type is chosen to be compatible with ocaml-cbor.
5+
See {{: https://www.rfc-editor.org/rfc/rfc8949.html} the RFC}.
6+
7+
{b note} this is experimental.
8+
9+
{b note} this is only available on OCaml >= 4.08. Below that, the module
10+
is empty.
11+
12+
@since NEXT_RELEASE
13+
*)
14+
15+
type t =
16+
[ `Null
17+
| `Undefined
18+
| `Simple of int
19+
| `Bool of bool
20+
| `Int of int
21+
| `Float of float
22+
| `Bytes of string
23+
| `Text of string
24+
| `Array of t list
25+
| `Map of (t * t) list
26+
| `Tag of int * t
27+
]
28+
29+
val pp_diagnostic : t CCFormat.printer
30+
31+
val to_string_diagnostic : t -> string
32+
33+
(* we use funtions from Bytes *)
34+
[@@@ifge 4.08]
35+
36+
val encode : ?buf:Buffer.t -> t -> string
37+
38+
val decode : string -> (t, string) result
39+
40+
val decode_exn : string -> t
41+
(** Like {!decode}.
42+
@raise Failure if the string isn't valid *)
43+
44+
45+
[@@@endif]

src/cbor/dune

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
(library
2+
(name containers_cbor)
3+
(libraries containers)
4+
(preprocess (action (run %{project_root}/src/core/cpp/cpp.exe %{input-file})))
5+
(public_name containers.cbor))

0 commit comments

Comments
 (0)