|
| 1 | + |
| 2 | +module Fmt = CCFormat |
| 3 | + |
| 4 | +type t = |
| 5 | + [ `Null |
| 6 | + | `Undefined |
| 7 | + | `Simple of int |
| 8 | + | `Bool of bool |
| 9 | + | `Int of int |
| 10 | + | `Float of float |
| 11 | + | `Bytes of string |
| 12 | + | `Text of string |
| 13 | + | `Array of t list |
| 14 | + | `Map of (t * t) list |
| 15 | + | `Tag of int * t |
| 16 | + ] |
| 17 | + |
| 18 | +let rec pp_diagnostic out (self:t) = |
| 19 | + match self with |
| 20 | + | `Null -> Fmt.string out "null" |
| 21 | + | `Undefined -> Fmt.string out "undefined" |
| 22 | + | `Simple i -> Fmt.fprintf out "simple(%d)" i |
| 23 | + | `Bool b -> Fmt.bool out b |
| 24 | + | `Int i -> Fmt.int out i |
| 25 | + | `Float f -> Fmt.float out f |
| 26 | + | `Bytes b -> Fmt.fprintf out "h'%s'" (CCString.to_hex b) |
| 27 | + | `Text s -> Fmt.fprintf out "%S" s |
| 28 | + | `Array l -> |
| 29 | + Fmt.fprintf out "[@["; |
| 30 | + List.iteri |
| 31 | + (fun i x -> |
| 32 | + if i>0 then Fmt.fprintf out ",@ "; |
| 33 | + pp_diagnostic out x) l; |
| 34 | + Fmt.fprintf out "@]]" |
| 35 | + | `Map l -> |
| 36 | + Fmt.fprintf out "{@["; |
| 37 | + List.iteri |
| 38 | + (fun i (k,v) -> |
| 39 | + if i>0 then Fmt.fprintf out ",@ "; |
| 40 | + Fmt.fprintf out "@[%a:@ %a@]" pp_diagnostic k pp_diagnostic v) l; |
| 41 | + Fmt.fprintf out "@]}" |
| 42 | + | `Tag (i,x) -> Fmt.fprintf out "%d(@[%a@])" i pp_diagnostic x |
| 43 | + |
| 44 | +let to_string_diagnostic (self:t) : string = |
| 45 | + Format.asprintf "@[<h>%a@]" pp_diagnostic self |
| 46 | + |
| 47 | +(* we use funtions from Bytes *) |
| 48 | +[@@@ifge 4.08] |
| 49 | + |
| 50 | +exception Indefinite |
| 51 | + |
| 52 | +let decode_exn (s:string) : t = |
| 53 | + let b = Bytes.unsafe_of_string s in |
| 54 | + let i = ref 0 in |
| 55 | + |
| 56 | + (* currently at end delimiter? *) |
| 57 | + let[@inline] is_break_stop_code () = |
| 58 | + Char.code s.[!i] = 0b111_11111 |
| 59 | + in |
| 60 | + |
| 61 | + let[@inline] read_i8 () = |
| 62 | + let c = Char.code s.[!i] in |
| 63 | + incr i; |
| 64 | + c |
| 65 | + in |
| 66 | + |
| 67 | + let[@inline] read_i16 () = |
| 68 | + let c = Bytes.get_uint16_be b !i in |
| 69 | + i := !i + 2; |
| 70 | + c |
| 71 | + in |
| 72 | + |
| 73 | + let[@inline] read_i32 () = |
| 74 | + let c = Bytes.get_int32_be b !i in |
| 75 | + i := !i + 4; |
| 76 | + c |
| 77 | + in |
| 78 | + |
| 79 | + let[@inline] read_i64 () = |
| 80 | + let c = Bytes.get_int64_be b !i in |
| 81 | + i := !i + 8; |
| 82 | + c |
| 83 | + in |
| 84 | + |
| 85 | + let reserve_n n = |
| 86 | + let j = !i in |
| 87 | + if j + n > String.length s then failwith "cbor: cannot extract slice"; |
| 88 | + i := !i + n; |
| 89 | + j |
| 90 | + in |
| 91 | + |
| 92 | + let[@inline] i64_to_int i = |
| 93 | + let j = Int64.to_int i in |
| 94 | + if Int64.(of_int j = i) then j |
| 95 | + else failwith "int64 does not fit in int" |
| 96 | + in |
| 97 | + |
| 98 | + (* read integer value from least significant bits *) |
| 99 | + let read_int ~allow_indefinite low = |
| 100 | + match low with |
| 101 | + | _ when low < 0 -> failwith "cbor: invalid length" |
| 102 | + | _ when low < 24 -> Int64.of_int low |
| 103 | + | 24 -> Int64.of_int (read_i8()) |
| 104 | + | 25 -> Int64.of_int (read_i16()) |
| 105 | + | 26 -> Int64.of_int32 (read_i32()) |
| 106 | + | 27 -> read_i64() |
| 107 | + | 28 | 29 | 30 -> failwith "cbor: invalid length" |
| 108 | + | 31 -> |
| 109 | + if allow_indefinite then raise_notrace Indefinite |
| 110 | + else failwith "cbor: invalid integer 31 in this context" |
| 111 | + | _ -> assert false |
| 112 | + in |
| 113 | + |
| 114 | + (* appendix D |
| 115 | +
|
| 116 | + double decode_half(unsigned char *halfp) { |
| 117 | + unsigned half = (halfp[0] << 8) + halfp[1]; |
| 118 | + unsigned exp = (half >> 10) & 0x1f; |
| 119 | + unsigned mant = half & 0x3ff; |
| 120 | + double val; |
| 121 | + if (exp == 0) val = ldexp(mant, -24); |
| 122 | + else if (exp != 31) val = ldexp(mant + 1024, exp - 25); |
| 123 | + else val = mant == 0 ? INFINITY : NAN; |
| 124 | + return half & 0x8000 ? -val : val; |
| 125 | + } |
| 126 | + *) |
| 127 | + let decode_f16 (half:int) : float = |
| 128 | + (* exponent is bits 15:10 *) |
| 129 | + let exp = (half lsr 10) land 0x1f in |
| 130 | + (* mantissa is bits 9:0 *) |
| 131 | + let mant = half land 0x3ff in |
| 132 | + let value = |
| 133 | + if exp = 0 then ldexp (float mant) (-24) |
| 134 | + else if exp <> 31 then ldexp (float (mant + 1024)) (exp - 25) |
| 135 | + else if mant = 0 then infinity else nan |
| 136 | + in |
| 137 | + if half land 0x8000 <> 0 then -. value else value |
| 138 | + in |
| 139 | + |
| 140 | + (* roughly follow https://www.rfc-editor.org/rfc/rfc8949.html#pseudocode *) |
| 141 | + let rec read_value () = |
| 142 | + let c = read_i8() in |
| 143 | + let high = (c land 0b111_00000) lsr 5 in |
| 144 | + let low = c land 0b000_11111 in |
| 145 | + begin match high with |
| 146 | + | 0 -> `Int (read_int ~allow_indefinite:false low |> i64_to_int) |
| 147 | + | 1 -> |
| 148 | + let i = read_int ~allow_indefinite:false low |> i64_to_int in |
| 149 | + `Int (-1 - i) |
| 150 | + | 2 -> |
| 151 | + let s = read_bytes ~ty:`Bytes low in |
| 152 | + `Bytes s |
| 153 | + | 3 -> |
| 154 | + let s = read_bytes ~ty:`String low in |
| 155 | + `Text s |
| 156 | + |
| 157 | + | 4 -> |
| 158 | + let l = |
| 159 | + match read_int ~allow_indefinite:true low |> i64_to_int with |
| 160 | + | len -> List.init len (fun _ -> read_value()) |
| 161 | + | exception Indefinite -> |
| 162 | + let l = ref [] in |
| 163 | + while not (is_break_stop_code ()) do |
| 164 | + l := read_value() :: !l |
| 165 | + done; |
| 166 | + incr i; (* consume stop code *) |
| 167 | + List.rev !l |
| 168 | + in |
| 169 | + `Array l |
| 170 | + |
| 171 | + | 5 -> |
| 172 | + let l = |
| 173 | + match read_int ~allow_indefinite:true low |> i64_to_int with |
| 174 | + | len -> List.init len (fun _ -> read_pair()) |
| 175 | + | exception Indefinite -> |
| 176 | + let l = ref [] in |
| 177 | + while not (is_break_stop_code ()) do |
| 178 | + l := read_pair() :: !l |
| 179 | + done; |
| 180 | + incr i; (* consume stop code *) |
| 181 | + List.rev !l |
| 182 | + in |
| 183 | + `Map l |
| 184 | + |
| 185 | + | 6 -> |
| 186 | + let tag = read_int ~allow_indefinite:false low |> i64_to_int in |
| 187 | + let v = read_value() in |
| 188 | + `Tag (tag,v) |
| 189 | + |
| 190 | + | 7 -> |
| 191 | + (* simple or float, |
| 192 | + https://www.rfc-editor.org/rfc/rfc8949.html#fpnocont *) |
| 193 | + let i = read_int ~allow_indefinite:false low in |
| 194 | + begin match low with |
| 195 | + | 20 -> `Bool false |
| 196 | + | 21 -> `Bool true |
| 197 | + | 22 -> `Null |
| 198 | + | 23 -> `Undefined |
| 199 | + | _ when low<=24 -> `Simple (i64_to_int i) |
| 200 | + | 25 -> (* float16 *) |
| 201 | + `Float (decode_f16 (Int64.to_int i)) |
| 202 | + | 26 -> (* float 32 *) |
| 203 | + `Float (Int32.float_of_bits (Int64.to_int32 i)) |
| 204 | + | 27 -> (* float 64 *) |
| 205 | + `Float (Int64.float_of_bits i) |
| 206 | + | 28 | 29 | 30 -> failwith "cbor: malformed" |
| 207 | + | 31 -> failwith "uncaught 'break' stop code" |
| 208 | + | _ -> assert false (* unreachable *) |
| 209 | + end |
| 210 | + |
| 211 | + | _ -> assert false (* unreachable *) |
| 212 | + end |
| 213 | + |
| 214 | + and read_bytes ~ty low = |
| 215 | + match read_int ~allow_indefinite:true low |> i64_to_int with |
| 216 | + | exception Indefinite -> |
| 217 | + let buf = Buffer.create 32 in |
| 218 | + while not (is_break_stop_code()) do |
| 219 | + match read_value(), ty with |
| 220 | + | `Text s, `String |
| 221 | + | `Bytes s, `Bytes -> Buffer.add_string buf s |
| 222 | + | _ -> failwith "cbor: invalid chunk in indefinite length string/byte" |
| 223 | + done; |
| 224 | + incr i; (* consume stop code *) |
| 225 | + Buffer.contents buf |
| 226 | + | len -> |
| 227 | + let off = reserve_n len in |
| 228 | + String.sub s off len |
| 229 | + |
| 230 | + and read_pair() = |
| 231 | + let k = read_value() in |
| 232 | + let v = read_value() in |
| 233 | + k,v |
| 234 | + in |
| 235 | + read_value() |
| 236 | + |
| 237 | +let decode s = |
| 238 | + try Ok (decode_exn s) |
| 239 | + with Failure s -> Error s |
| 240 | + |
| 241 | +let encode ?(buf=Buffer.create 32) (self:t) : string = |
| 242 | + Buffer.clear buf; |
| 243 | + |
| 244 | + let[@inline] add_byte (high:int) (low:int) = |
| 245 | + let i = (high lsl 5) lor low in |
| 246 | + assert ((i land 0xff) == i); |
| 247 | + Buffer.add_char buf (Char.unsafe_chr i) |
| 248 | + in |
| 249 | + |
| 250 | + let add_i64 (i:int64) = |
| 251 | + Buffer.add_int64_be buf i |
| 252 | + in |
| 253 | + |
| 254 | + (* add unsigned integer, including first tag byte *) |
| 255 | + let add_uint (high:int) (x:int) = |
| 256 | + assert (x >= 0); |
| 257 | + if x < 24 then add_byte high x |
| 258 | + else if x <= 0xff then ( |
| 259 | + add_byte high 24; |
| 260 | + Buffer.add_char buf (Char.unsafe_chr x) |
| 261 | + ) else if x <= 0xff_ff then ( |
| 262 | + add_byte high 25; |
| 263 | + Buffer.add_uint16_be buf x |
| 264 | + ) else if x <= 0xff_ff_ff_ff then ( |
| 265 | + add_byte high 26; |
| 266 | + Buffer.add_int32_be buf (Int32.of_int x) |
| 267 | + ) else ( |
| 268 | + add_byte high 27; |
| 269 | + Buffer.add_int64_be buf (Int64.of_int x) |
| 270 | + ) |
| 271 | + in |
| 272 | + |
| 273 | + let rec encode_val (self:t): unit = |
| 274 | + match self with |
| 275 | + | `Bool false -> add_byte 7 20 |
| 276 | + | `Bool true -> add_byte 7 21 |
| 277 | + | `Null -> add_byte 7 22 |
| 278 | + | `Undefined -> add_byte 7 23 |
| 279 | + | `Simple i -> |
| 280 | + if i < 24 then add_byte 7 i |
| 281 | + else if i <= 0xff then ( |
| 282 | + add_byte 7 24; |
| 283 | + Buffer.add_char buf (Char.unsafe_chr i) |
| 284 | + ) else ( |
| 285 | + failwith "cbor: simple value too high (above 255)" |
| 286 | + ) |
| 287 | + | `Float f -> |
| 288 | + add_byte 7 27; (* float 64 *) |
| 289 | + add_i64 (Int64.bits_of_float f) |
| 290 | + | `Array l -> |
| 291 | + add_uint 4 (List.length l); |
| 292 | + List.iter encode_val l |
| 293 | + | `Map l -> |
| 294 | + add_uint 5 (List.length l); |
| 295 | + List.iter (fun (k,v) -> encode_val k; encode_val v) l |
| 296 | + | `Text s -> |
| 297 | + add_uint 3 (String.length s); |
| 298 | + Buffer.add_string buf s |
| 299 | + | `Bytes s -> |
| 300 | + add_uint 2 (String.length s); |
| 301 | + Buffer.add_string buf s |
| 302 | + | `Tag (t, v) -> |
| 303 | + add_uint 6 t; |
| 304 | + encode_val v |
| 305 | + | `Int i -> |
| 306 | + if i >= 0 then add_uint 0 i |
| 307 | + else if min_int + 2 > i then ( |
| 308 | + (* large negative int, be careful. encode [(-i)-1] via int64. *) |
| 309 | + add_byte 1 27; |
| 310 | + Buffer.add_int64_be buf (Int64.(neg (add 1L (of_int i)))) |
| 311 | + ) else ( |
| 312 | + add_uint 1 (-i-1) |
| 313 | + ) |
| 314 | + in |
| 315 | + encode_val self; |
| 316 | + Buffer.contents buf |
| 317 | + |
| 318 | +[@@@endif] |
0 commit comments