Skip to content

Commit 911e9fa

Browse files
committed
add Containers_bencode
A small module to parse/print Bencode values. Bencode is really simple and can embed binary strings easily, unlike JSON.
1 parent e633831 commit 911e9fa

File tree

5 files changed

+276
-3
lines changed

5 files changed

+276
-3
lines changed

containers.opam

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ depends: [
1717
"seq" # compat
1818
"either" # compat
1919
"qtest" { with-test }
20-
"qcheck" { with-test }
20+
"qcheck" { >= "0.14" & with-test }
2121
"ounit" { with-test }
2222
"iter" { with-test }
2323
"gen" { with-test }

qtest/dune

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,15 @@
77
(rule
88
(targets run_qtest.ml)
99
(deps ./make.exe (source_tree ../src))
10-
(action (run ./make.exe -target %{targets} ../src/core ../src/unix/)))
10+
(action (run ./make.exe -target %{targets} ../src/core ../src/bencode ../src/unix/)))
1111

1212
(executable
1313
(name run_qtest)
1414
(modes native)
1515
(modules run_qtest)
1616
; disable some warnings in qtests
1717
(flags :standard -warn-error -a -w -3-33-35-27-39-50)
18-
(libraries iter gen qcheck containers containers.unix unix uutf threads))
18+
(libraries iter gen qcheck containers containers.unix containers.bencode unix uutf threads))
1919

2020
(rule
2121
(alias runtest)

src/bencode/containers_bencode.ml

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
module Str_map = Map.Make(String)
2+
3+
type t =
4+
| Int of int64
5+
| String of string
6+
| List of t list
7+
| Map of t Str_map.t
8+
9+
let rec equal t1 t2 = match t1, t2 with
10+
| Int i1, Int i2 -> i1 = i2
11+
| String s1, String s2 -> s1 = s2
12+
| List l1, List l2 ->
13+
(try List.for_all2 equal l1 l2 with Invalid_argument _ -> false)
14+
| Map d1, Map d2 -> Str_map.equal equal d1 d2
15+
| (Int _ | String _ | List _ | Map _), _ -> false
16+
17+
let rec hash t =
18+
let module H = CCHash in
19+
match t with
20+
| Int i -> H.int64 i
21+
| String s -> H.combine2 10 (H.string s)
22+
| List l -> H.combine2 20 (H.list hash l)
23+
| Map l ->
24+
H.combine2 30
25+
(H.iter (H.pair H.string hash) @@
26+
(fun k -> Str_map.iter (fun x y -> k(x,y)) l))
27+
28+
let int64 i : t = Int i
29+
let int i : t = int64 (Int64.of_int i)
30+
let string s : t = String s
31+
let list l : t = List l
32+
let map m : t = Map m
33+
let map_of_list l : t =
34+
map @@ List.fold_left (fun m (k,v) -> Str_map.add k v m) Str_map.empty l
35+
36+
let rec pp_debug out (self:t) : unit =
37+
let fpf = Format.fprintf in
38+
match self with
39+
| Int i -> fpf out "%Ld" i
40+
| String s -> fpf out "%S" s
41+
| List l ->
42+
fpf out "[@[<hv>";
43+
List.iteri (fun i v ->
44+
if i>0 then fpf out ";@ ";
45+
pp_debug out v) l;
46+
fpf out "@]]"
47+
| Map m ->
48+
fpf out "{@[<hv>";
49+
let i = ref 0 in
50+
Str_map.iter (fun k v ->
51+
if !i>0 then fpf out ";@ ";
52+
incr i;
53+
fpf out "@[<1>%S:@ %a@]" k pp_debug v) m;
54+
fpf out "@]}"
55+
56+
let to_string_debug self = Format.asprintf "%a" pp_debug self
57+
58+
module Encode = struct
59+
let bpf = Printf.bprintf
60+
let fpf = Printf.fprintf
61+
62+
let rec to_buffer (buf:Buffer.t) (self:t) : unit =
63+
let recurse = to_buffer buf in
64+
let addc = Buffer.add_char in
65+
match self with
66+
| Int i -> bpf buf "i%Lde" i
67+
| String s -> bpf buf "%d:%s" (String.length s) s
68+
| List l -> addc buf 'l'; List.iter recurse l; addc buf 'e'
69+
| Map l ->
70+
addc buf 'd';
71+
Str_map.iter (fun k v -> bpf buf "%d:%s%a" (String.length k) k to_buffer v) l;
72+
addc buf 'e'
73+
74+
let to_string (self:t) : string =
75+
let buf = Buffer.create 32 in
76+
to_buffer buf self;
77+
Buffer.contents buf
78+
79+
let rec to_chan (oc:out_channel) (self:t) : unit =
80+
let recurse = to_chan oc in
81+
let addc = output_char in
82+
match self with
83+
| Int i -> fpf oc "i%Lde" i
84+
| String s -> fpf oc "%d:%s" (String.length s) s
85+
| List l -> addc oc 'l'; List.iter recurse l; addc oc 'e'
86+
| Map l ->
87+
addc oc 'd';
88+
Str_map.iter (fun k v -> fpf oc "%d:%s%a" (String.length k) k to_chan v) l;
89+
addc oc 'e'
90+
91+
let to_fmt out self =
92+
Format.pp_print_string out (to_string self)
93+
end
94+
95+
module Decode = struct
96+
exception Fail
97+
98+
let of_string s =
99+
let i = ref 0 in
100+
101+
let[@inline] check_not_eof() =
102+
if !i >= String.length s then raise_notrace Fail;
103+
in
104+
105+
let rec top () : t =
106+
check_not_eof ();
107+
match String.unsafe_get s !i with
108+
| 'l' ->
109+
incr i;
110+
read_list []
111+
| 'd' ->
112+
incr i;
113+
read_map Str_map.empty
114+
| 'i' -> incr i; let n = read_int 'e' true 0 in int n
115+
| '0' .. '9' -> String (parse_str_len ())
116+
| _ -> raise_notrace Fail
117+
118+
(* read integer until char [stop] is met, consume [stop], return int *)
119+
and read_int stop sign n : int =
120+
check_not_eof ();
121+
match String.unsafe_get s !i with
122+
| c when c == stop -> incr i; if sign then n else -n
123+
| '-' when stop == 'e' && sign && n=0 ->
124+
incr i; read_int stop false n
125+
| '0' .. '9' as c ->
126+
incr i; read_int stop sign (Char.code c - Char.code '0' + 10 * n)
127+
| _ -> raise_notrace Fail
128+
129+
and parse_str_len () : string =
130+
let n = read_int ':' true 0 in
131+
if !i + n > String.length s then raise_notrace Fail;
132+
let s = String.sub s !i n in
133+
i := !i + n;
134+
s
135+
136+
and read_list acc =
137+
check_not_eof();
138+
match String.unsafe_get s !i with
139+
| 'e' -> incr i; List (List.rev acc)
140+
| _ -> let x = top() in read_list (x::acc)
141+
142+
and read_map acc =
143+
check_not_eof();
144+
match String.unsafe_get s !i with
145+
| 'e' -> incr i; Map acc
146+
| _ ->
147+
let k = parse_str_len () in
148+
let v = top() in
149+
read_map (Str_map.add k v acc)
150+
in
151+
152+
try Some (top())
153+
with Fail -> None
154+
155+
let of_string_exn s =
156+
match of_string s with
157+
| Some x -> x
158+
| None -> failwith "bencode.decode: invalid string"
159+
end
160+
161+
(*$= & ~printer:to_string_debug
162+
(map_of_list []) (Decode.of_string_exn "de")
163+
(list [int 1; int 2; string "foo"]) (Decode.of_string_exn "li1ei2e3:fooe")
164+
*)
165+
166+
(*$inject
167+
module B = Containers_bencode
168+
169+
let rec size = function
170+
| Int _ | String _ -> 1
171+
| List l -> List.fold_left (fun n x -> n + size x) 0 l
172+
| Map m -> Str_map.fold(fun _ v n -> size v + n) m 0
173+
174+
let g_rand_b =
175+
Q.Gen.(
176+
sized_size (0--7) @@ fix @@ fun self n ->
177+
let str n = string_size ~gen:char (0 -- n) in
178+
let base = [
179+
int >|= B.int;
180+
str 100 >|= B.string;
181+
] in
182+
match n with
183+
| 0 -> oneof base
184+
| n ->
185+
frequency @@
186+
List.map (fun x -> 2, x) base @
187+
[ 1, list_size (0 -- 10) (self (n-1)) >|= B.list;
188+
1, list_size (0 -- 10) (pair (str 10) (self (n-1)) ) >|= B.map_of_list;
189+
]
190+
)
191+
192+
let rec shrink_b self = Q.(Iter.(
193+
match self with
194+
| Int i -> Shrink.int64 i >|= B.int64
195+
| String s -> Shrink.string s >|= B.string
196+
| List l -> Shrink.list ~shrink:shrink_b l >|= B.list
197+
| Map l ->
198+
let l = Str_map.fold (fun k v l -> (k,v) :: l) l [] in
199+
Shrink.list ~shrink:(fun (k,v) ->
200+
(Shrink.string k >|= fun k -> k,v) <+>
201+
(shrink_b v >|= fun v -> k,v))
202+
l
203+
>|= B.map_of_list
204+
))
205+
206+
let rand_b = Q.make ~print:to_string_debug ~stats:["size", size]
207+
~shrink:shrink_b g_rand_b
208+
*)
209+
210+
(*$Q
211+
rand_b (fun b -> \
212+
let s=Encode.to_string b in \
213+
equal (Decode.of_string_exn s) b)
214+
*)

src/bencode/containers_bencode.mli

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
(** Basic Bencode decoder/encoder.
2+
3+
See https://en.wikipedia.org/wiki/Bencode .
4+
5+
@since NEXT_RELEASE *)
6+
7+
module Str_map : module type of Map.Make(String)
8+
9+
type t =
10+
| Int of int64
11+
| String of string
12+
| List of t list
13+
| Map of t Str_map.t
14+
15+
val equal : t -> t -> bool
16+
17+
val hash : t -> int
18+
19+
val pp_debug : Format.formatter -> t -> unit
20+
(** Printer for diagnostic/human consumption *)
21+
22+
val to_string_debug : t -> string
23+
24+
val int : int -> t
25+
26+
val int64 : int64 -> t
27+
28+
val string : string -> t
29+
30+
val list : t list -> t
31+
32+
val map_of_list : (string * t) list -> t
33+
34+
val map : t Str_map.t -> t
35+
36+
(** Encoding *)
37+
module Encode : sig
38+
val to_string : t -> string
39+
40+
val to_buffer : Buffer.t -> t -> unit
41+
42+
val to_chan : out_channel -> t -> unit
43+
44+
val to_fmt : Format.formatter -> t -> unit
45+
end
46+
47+
(** Decoding *)
48+
module Decode : sig
49+
val of_string : string -> t option
50+
51+
val of_string_exn : string -> t
52+
(** Parse string.
53+
@raise Failure if the string is not valid bencode. *)
54+
end

src/bencode/dune

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
(library
2+
(name containers_bencode)
3+
(public_name containers.bencode)
4+
(libraries containers)
5+
(synopsis "Bencode codec for containers (the format for bittorrent files)"))

0 commit comments

Comments
 (0)