|
8 | 8 | // option. This file may not be copied, modified, or distributed |
9 | 9 | // except according to those terms. |
10 | 10 |
|
11 | | -#[feature(managed_boxes)]; |
12 | | - |
13 | 11 | /* -*- mode: rust; indent-tabs-mode: nil -*- |
14 | 12 | * Implementation of 'fasta' benchmark from |
15 | 13 | * Computer Language Benchmarks Game |
16 | 14 | * http://shootout.alioth.debian.org/ |
17 | 15 | */ |
18 | | -extern mod extra; |
19 | 16 |
|
20 | | -use std::int; |
21 | 17 | use std::io; |
| 18 | +use std::io::buffered::BufferedWriter; |
22 | 19 | use std::io::File; |
| 20 | +use std::num::min; |
23 | 21 | use std::os; |
24 | | -use std::rand::Rng; |
25 | | -use std::rand; |
26 | | -use std::str; |
27 | 22 |
|
28 | | -static LINE_LENGTH: uint = 60u; |
| 23 | +static LINE_LENGTH: uint = 60; |
| 24 | +static IM: u32 = 139968; |
29 | 25 |
|
30 | 26 | struct MyRandom { |
31 | 27 | last: u32 |
32 | 28 | } |
33 | | - |
34 | | -fn myrandom_next(r: @mut MyRandom, mx: u32) -> u32 { |
35 | | - r.last = (r.last * 3877u32 + 29573u32) % 139968u32; |
36 | | - mx * r.last / 139968u32 |
37 | | -} |
38 | | - |
39 | | -#[deriving(Clone)] |
40 | | -struct AminoAcids { |
41 | | - ch: char, |
42 | | - prob: u32 |
43 | | -} |
44 | | - |
45 | | -fn make_cumulative(aa: ~[AminoAcids]) -> ~[AminoAcids] { |
46 | | - let mut cp: u32 = 0u32; |
47 | | - let mut ans: ~[AminoAcids] = ~[]; |
48 | | - for a in aa.iter() { |
49 | | - cp += a.prob; |
50 | | - ans.push(AminoAcids {ch: a.ch, prob: cp}); |
| 29 | +impl MyRandom { |
| 30 | + fn new() -> MyRandom { MyRandom { last: 42 } } |
| 31 | + fn normalize(p: f32) -> u32 {(p * IM as f32).floor() as u32} |
| 32 | + fn gen(&mut self) -> u32 { |
| 33 | + self.last = (self.last * 3877 + 29573) % IM; |
| 34 | + self.last |
51 | 35 | } |
52 | | - ans |
53 | 36 | } |
54 | 37 |
|
55 | | -fn select_random(r: u32, genelist: ~[AminoAcids]) -> char { |
56 | | - if r < genelist[0].prob { return genelist[0].ch; } |
57 | | - fn bisect(v: ~[AminoAcids], lo: uint, hi: uint, target: u32) -> char { |
58 | | - if hi > lo + 1u { |
59 | | - let mid: uint = lo + (hi - lo) / 2u; |
60 | | - if target < v[mid].prob { |
61 | | - return bisect(v, lo, mid, target); |
62 | | - } else { |
63 | | - return bisect(v, mid, hi, target); |
64 | | - } |
65 | | - } else { |
66 | | - return v[hi].ch; |
67 | | - } |
| 38 | +struct AAGen<'a> { |
| 39 | + rng: &'a mut MyRandom, |
| 40 | + data: ~[(u32, u8)] |
| 41 | +} |
| 42 | +impl<'a> AAGen<'a> { |
| 43 | + fn new<'b>(rng: &'b mut MyRandom, aa: &[(char, f32)]) -> AAGen<'b> { |
| 44 | + let mut cum = 0.; |
| 45 | + let data = aa.iter() |
| 46 | + .map(|&(ch, p)| { cum += p; (MyRandom::normalize(cum), ch as u8) }) |
| 47 | + .collect(); |
| 48 | + AAGen { rng: rng, data: data } |
68 | 49 | } |
69 | | - bisect(genelist.clone(), 0, genelist.len() - 1, r) |
70 | 50 | } |
71 | | - |
72 | | -fn make_random_fasta(wr: @mut io::Writer, |
73 | | - id: ~str, |
74 | | - desc: ~str, |
75 | | - genelist: ~[AminoAcids], |
76 | | - n: int) { |
77 | | - writeln!(wr, ">{} {}", id, desc); |
78 | | - let mut rng = rand::rng(); |
79 | | - let rng = @mut MyRandom { |
80 | | - last: rng.gen() |
81 | | - }; |
82 | | - let mut op: ~str = ~""; |
83 | | - for _ in range(0u, n as uint) { |
84 | | - op.push_char(select_random(myrandom_next(rng, 100u32), |
85 | | - genelist.clone())); |
86 | | - if op.len() >= LINE_LENGTH { |
87 | | - writeln!(wr, "{}", op); |
88 | | - op = ~""; |
89 | | - } |
| 51 | +impl<'a> Iterator<u8> for AAGen<'a> { |
| 52 | + fn next(&mut self) -> Option<u8> { |
| 53 | + let r = self.rng.gen(); |
| 54 | + self.data.iter() |
| 55 | + .skip_while(|pc| pc.n0() < r) |
| 56 | + .map(|&(_, c)| c) |
| 57 | + .next() |
90 | 58 | } |
91 | | - if op.len() > 0u { writeln!(wr, "{}", op); } |
92 | 59 | } |
93 | 60 |
|
94 | | -fn make_repeat_fasta(wr: @mut io::Writer, id: ~str, desc: ~str, s: ~str, n: int) { |
95 | | - writeln!(wr, ">{} {}", id, desc); |
96 | | - let mut op = str::with_capacity( LINE_LENGTH ); |
97 | | - let sl = s.len(); |
98 | | - for i in range(0u, n as uint) { |
99 | | - if (op.len() >= LINE_LENGTH) { |
100 | | - writeln!(wr, "{}", op); |
101 | | - op = str::with_capacity( LINE_LENGTH ); |
| 61 | +fn make_fasta<W: Writer, I: Iterator<u8>>( |
| 62 | + wr: &mut W, header: &str, mut it: I, mut n: uint) |
| 63 | +{ |
| 64 | + wr.write(header.as_bytes()); |
| 65 | + let mut line = [0u8, .. LINE_LENGTH + 1]; |
| 66 | + while n > 0 { |
| 67 | + let nb = min(LINE_LENGTH, n); |
| 68 | + for i in range(0, nb) { |
| 69 | + line[i] = it.next().unwrap(); |
102 | 70 | } |
103 | | - op.push_char( s[i % sl] as char ); |
| 71 | + n -= nb; |
| 72 | + line[nb] = '\n' as u8; |
| 73 | + wr.write(line.slice_to(nb + 1)); |
104 | 74 | } |
105 | | - if op.len() > 0 { |
106 | | - writeln!(wr, "{}", op); |
107 | | - } |
108 | | -} |
109 | | - |
110 | | -fn acid(ch: char, prob: u32) -> AminoAcids { |
111 | | - AminoAcids {ch: ch, prob: prob} |
112 | 75 | } |
113 | 76 |
|
114 | | -fn main() { |
| 77 | +fn run<W: Writer>(writer: &mut W) { |
115 | 78 | let args = os::args(); |
116 | | - let args = if os::getenv("RUST_BENCH").is_some() { |
117 | | - // alioth tests k-nucleotide with this data at 25,000,000 |
118 | | - ~[~"", ~"5000000"] |
| 79 | + let n = if os::getenv("RUST_BENCH").is_some() { |
| 80 | + 25000000 |
119 | 81 | } else if args.len() <= 1u { |
120 | | - ~[~"", ~"1000"] |
| 82 | + 1000 |
121 | 83 | } else { |
122 | | - args |
| 84 | + from_str(args[1]).unwrap() |
123 | 85 | }; |
124 | 86 |
|
125 | | - let writer = if os::getenv("RUST_BENCH").is_some() { |
126 | | - let file = File::create(&Path::new("./shootout-fasta.data")); |
127 | | - @mut file as @mut io::Writer |
128 | | - } else { |
129 | | - @mut io::stdout() as @mut io::Writer |
130 | | - }; |
131 | | - |
132 | | - let n = from_str::<int>(args[1]).unwrap(); |
| 87 | + let rng = &mut MyRandom::new(); |
| 88 | + let alu = |
| 89 | + "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\ |
| 90 | + GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\ |
| 91 | + CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\ |
| 92 | + ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\ |
| 93 | + GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\ |
| 94 | + AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\ |
| 95 | + AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"; |
| 96 | + let iub = &[('a', 0.27), ('c', 0.12), ('g', 0.12), |
| 97 | + ('t', 0.27), ('B', 0.02), ('D', 0.02), |
| 98 | + ('H', 0.02), ('K', 0.02), ('M', 0.02), |
| 99 | + ('N', 0.02), ('R', 0.02), ('S', 0.02), |
| 100 | + ('V', 0.02), ('W', 0.02), ('Y', 0.02)]; |
| 101 | + let homosapiens = &[('a', 0.3029549426680), |
| 102 | + ('c', 0.1979883004921), |
| 103 | + ('g', 0.1975473066391), |
| 104 | + ('t', 0.3015094502008)]; |
| 105 | + |
| 106 | + make_fasta(writer, ">ONE Homo sapiens alu\n", |
| 107 | + alu.as_bytes().iter().cycle().map(|c| *c), n * 2); |
| 108 | + make_fasta(writer, ">TWO IUB ambiguity codes\n", |
| 109 | + AAGen::new(rng, iub), n * 3); |
| 110 | + make_fasta(writer, ">THREE Homo sapiens frequency\n", |
| 111 | + AAGen::new(rng, homosapiens), n * 5); |
| 112 | + |
| 113 | + writer.flush(); |
| 114 | +} |
133 | 115 |
|
134 | | - let iub: ~[AminoAcids] = |
135 | | - make_cumulative(~[acid('a', 27u32), acid('c', 12u32), acid('g', 12u32), |
136 | | - acid('t', 27u32), acid('B', 2u32), acid('D', 2u32), |
137 | | - acid('H', 2u32), acid('K', 2u32), acid('M', 2u32), |
138 | | - acid('N', 2u32), acid('R', 2u32), acid('S', 2u32), |
139 | | - acid('V', 2u32), acid('W', 2u32), acid('Y', 2u32)]); |
140 | | - let homosapiens: ~[AminoAcids] = |
141 | | - make_cumulative(~[acid('a', 30u32), acid('c', 20u32), acid('g', 20u32), |
142 | | - acid('t', 30u32)]); |
143 | | - let alu: ~str = |
144 | | - ~"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\ |
145 | | - GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\ |
146 | | - CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\ |
147 | | - ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\ |
148 | | - GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\ |
149 | | - AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\ |
150 | | - AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"; |
151 | | - make_repeat_fasta(writer, ~"ONE", ~"Homo sapiens alu", alu, n * 2); |
152 | | - make_random_fasta(writer, ~"TWO", ~"IUB ambiguity codes", iub, n * 3); |
153 | | - make_random_fasta(writer, ~"THREE", |
154 | | - ~"Homo sapiens frequency", homosapiens, n * 5); |
| 116 | +fn main() { |
| 117 | + if os::getenv("RUST_BENCH").is_some() { |
| 118 | + let mut file = BufferedWriter::new(File::create(&Path::new("./shootout-fasta.data"))); |
| 119 | + run(&mut file); |
| 120 | + } else { |
| 121 | + run(&mut BufferedWriter::new(io::stdout())); |
| 122 | + } |
155 | 123 | } |
0 commit comments