38
38
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
39
39
// OF THE POSSIBILITY OF SUCH DAMAGE.
40
40
41
- // ignore-android doesn't terminate?
41
+ // ignore-android see #10393 #13206
42
42
43
- #![ feature( slicing_syntax, asm , if_let , tuple_indexing ) ]
43
+ #![ feature( slicing_syntax, unboxed_closures , overloaded_calls ) ]
44
44
45
45
extern crate libc;
46
46
47
47
use std:: io:: stdio:: { stdin_raw, stdout_raw} ;
48
- use std:: sync:: { Future } ;
49
48
use std:: num:: { div_rem} ;
50
49
use std:: ptr:: { copy_memory} ;
51
50
use std:: io:: { IoResult , EndOfFile } ;
52
- use std:: slice:: raw:: { mut_buf_as_slice} ;
53
51
54
- use shared_memory:: { SharedMemory } ;
55
-
56
- mod tables {
57
- use std:: sync:: { Once , ONCE_INIT } ;
58
-
59
- /// Lookup tables.
60
- static mut CPL16 : [ u16 , ..1 << 16 ] = [ 0 , ..1 << 16 ] ;
61
- static mut CPL8 : [ u8 , ..1 << 8 ] = [ 0 , ..1 << 8 ] ;
62
-
63
- /// Generates the tables.
64
- pub fn get ( ) -> Tables {
65
- /// To make sure we initialize the tables only once.
66
- static INIT : Once = ONCE_INIT ;
67
- INIT . doit ( || {
68
- unsafe {
69
- for i in range ( 0 , 1 << 8 ) {
70
- CPL8 [ i] = match i as u8 {
71
- b'A' | b'a' => b'T' ,
72
- b'C' | b'c' => b'G' ,
73
- b'G' | b'g' => b'C' ,
74
- b'T' | b't' => b'A' ,
75
- b'U' | b'u' => b'A' ,
76
- b'M' | b'm' => b'K' ,
77
- b'R' | b'r' => b'Y' ,
78
- b'W' | b'w' => b'W' ,
79
- b'S' | b's' => b'S' ,
80
- b'Y' | b'y' => b'R' ,
81
- b'K' | b'k' => b'M' ,
82
- b'V' | b'v' => b'B' ,
83
- b'H' | b'h' => b'D' ,
84
- b'D' | b'd' => b'H' ,
85
- b'B' | b'b' => b'V' ,
86
- b'N' | b'n' => b'N' ,
87
- i => i,
88
- } ;
89
- }
90
-
91
- for ( i, v) in CPL16 . iter_mut ( ) . enumerate ( ) {
92
- * v = * CPL8 . unsafe_get ( i & 255 ) as u16 << 8 |
93
- * CPL8 . unsafe_get ( i >> 8 ) as u16 ;
94
- }
95
- }
96
- } ) ;
97
- Tables { _dummy : ( ) }
98
- }
99
-
100
- /// Accessor for the static arrays.
101
- ///
102
- /// To make sure that the tables can't be accessed without having been initialized.
103
- pub struct Tables {
104
- _dummy : ( )
105
- }
106
-
107
- impl Tables {
108
- /// Retreives the complement for `i`.
109
- pub fn cpl8 ( self , i : u8 ) -> u8 {
110
- // Not really unsafe.
111
- unsafe { CPL8 [ i as uint ] }
112
- }
113
-
114
- /// Retreives the complement for `i`.
115
- pub fn cpl16 ( self , i : u16 ) -> u16 {
116
- unsafe { CPL16 [ i as uint ] }
117
- }
118
- }
52
+ struct Tables {
53
+ table8 : [ u8 , ..1 << 8 ] ,
54
+ table16 : [ u16 , ..1 << 16 ]
119
55
}
120
56
121
- mod shared_memory {
122
- use std:: sync:: { Arc } ;
123
- use std:: mem:: { transmute} ;
124
- use std:: raw:: { Slice } ;
125
-
126
- /// Structure for sharing disjoint parts of a vector mutably across tasks.
127
- pub struct SharedMemory {
128
- ptr : Arc < Vec < u8 > > ,
129
- start : uint ,
130
- len : uint ,
131
- }
132
-
133
- impl SharedMemory {
134
- pub fn new ( ptr : Vec < u8 > ) -> SharedMemory {
135
- let len = ptr. len ( ) ;
136
- SharedMemory {
137
- ptr : Arc :: new ( ptr) ,
138
- start : 0 ,
139
- len : len,
140
- }
57
+ impl Tables {
58
+ fn new ( ) -> Tables {
59
+ let mut table8 = [ 0 , ..1 << 8 ] ;
60
+ for ( i, v) in table8. iter_mut ( ) . enumerate ( ) {
61
+ * v = Tables :: computed_cpl8 ( i as u8 ) ;
141
62
}
142
-
143
- pub fn as_mut_slice ( & mut self ) -> & mut [ u8 ] {
144
- unsafe {
145
- transmute ( Slice {
146
- data : self . ptr . as_ptr ( ) . offset ( self . start as int ) as * const u8 ,
147
- len : self . len ,
148
- } )
149
- }
63
+ let mut table16 = [ 0 , ..1 << 16 ] ;
64
+ for ( i, v) in table16. iter_mut ( ) . enumerate ( ) {
65
+ * v = table8[ i & 255 ] as u16 << 8 |
66
+ table8[ i >> 8 ] as u16 ;
150
67
}
68
+ Tables { table8 : table8, table16 : table16 }
69
+ }
151
70
152
- pub fn len ( & self ) -> uint {
153
- self . len
71
+ fn computed_cpl8 ( c : u8 ) -> u8 {
72
+ match c {
73
+ b'A' | b'a' => b'T' ,
74
+ b'C' | b'c' => b'G' ,
75
+ b'G' | b'g' => b'C' ,
76
+ b'T' | b't' => b'A' ,
77
+ b'U' | b'u' => b'A' ,
78
+ b'M' | b'm' => b'K' ,
79
+ b'R' | b'r' => b'Y' ,
80
+ b'W' | b'w' => b'W' ,
81
+ b'S' | b's' => b'S' ,
82
+ b'Y' | b'y' => b'R' ,
83
+ b'K' | b'k' => b'M' ,
84
+ b'V' | b'v' => b'B' ,
85
+ b'H' | b'h' => b'D' ,
86
+ b'D' | b'd' => b'H' ,
87
+ b'B' | b'b' => b'V' ,
88
+ b'N' | b'n' => b'N' ,
89
+ i => i,
154
90
}
91
+ }
155
92
156
- pub fn split_at ( self , mid : uint ) -> ( SharedMemory , SharedMemory ) {
157
- assert ! ( mid <= self . len) ;
158
- let left = SharedMemory {
159
- ptr : self . ptr . clone ( ) ,
160
- start : self . start ,
161
- len : mid,
162
- } ;
163
- let right = SharedMemory {
164
- ptr : self . ptr ,
165
- start : self . start + mid,
166
- len : self . len - mid,
167
- } ;
168
- ( left, right)
169
- }
93
+ /// Retreives the complement for `i`.
94
+ fn cpl8 ( & self , i : u8 ) -> u8 {
95
+ self . table8 [ i as uint ]
96
+ }
170
97
171
- /// Resets the object so that it covers the whole range of the contained vector.
172
- ///
173
- /// You must not call this method if `self` is not the only reference to the
174
- /// shared memory.
175
- ///
176
- /// FIXME: If `Arc` had a method to check if the reference is unique, then we
177
- /// wouldn't need the `unsafe` here.
178
- ///
179
- /// FIXME: If `Arc` had a method to unwrap the contained value, then we could
180
- /// simply unwrap here.
181
- pub unsafe fn reset ( self ) -> SharedMemory {
182
- let len = self . ptr . len ( ) ;
183
- SharedMemory {
184
- ptr : self . ptr ,
185
- start : 0 ,
186
- len : len,
187
- }
188
- }
98
+ /// Retreives the complement for `i`.
99
+ fn cpl16 ( & self , i : u16 ) -> u16 {
100
+ self . table16 [ i as uint ]
189
101
}
190
102
}
191
103
192
-
193
104
/// Reads all remaining bytes from the stream.
194
105
fn read_to_end < R : Reader > ( r : & mut R ) -> IoResult < Vec < u8 > > {
106
+ // As reading the input stream in memory is a bottleneck, we tune
107
+ // Reader::read_to_end() with a fast growing policy to limit
108
+ // recopies. If MREMAP_RETAIN is implemented in the linux kernel
109
+ // and jemalloc use it, this trick will become useless.
195
110
const CHUNK : uint = 64 * 1024 ;
196
111
197
- let mut vec = Vec :: with_capacity ( 1024 * 1024 ) ;
112
+ let mut vec = Vec :: with_capacity ( CHUNK ) ;
198
113
loop {
114
+ // workaround: very fast growing
199
115
if vec. capacity ( ) - vec. len ( ) < CHUNK {
200
116
let cap = vec. capacity ( ) ;
201
117
let mult = if cap < 256 * 1024 * 1024 {
202
- // FIXME (mahkoh): Temporary workaround for jemalloc on linux. Replace
203
- // this by 2x once the jemalloc preformance issue has been fixed.
204
118
16
205
119
} else {
206
120
2
207
121
} ;
208
122
vec. reserve_exact ( mult * cap) ;
209
123
}
210
- unsafe {
211
- let ptr = vec. as_mut_ptr ( ) . offset ( vec. len ( ) as int ) ;
212
- match mut_buf_as_slice ( ptr, CHUNK , |s| r. read ( s) ) {
213
- Ok ( n) => {
214
- let len = vec. len ( ) ;
215
- vec. set_len ( len + n) ;
216
- } ,
217
- Err ( ref e) if e. kind == EndOfFile => break ,
218
- Err ( e) => return Err ( e) ,
219
- }
124
+ match r. push_at_least ( 1 , CHUNK , & mut vec) {
125
+ Ok ( _) => { }
126
+ Err ( ref e) if e. kind == EndOfFile => break ,
127
+ Err ( e) => return Err ( e)
220
128
}
221
129
}
222
130
Ok ( vec)
@@ -225,11 +133,8 @@ fn read_to_end<R: Reader>(r: &mut R) -> IoResult<Vec<u8>> {
225
133
/// Finds the first position at which `b` occurs in `s`.
226
134
fn memchr ( h : & [ u8 ] , n : u8 ) -> Option < uint > {
227
135
use libc:: { c_void, c_int, size_t} ;
228
- extern {
229
- fn memchr ( h : * const c_void , n : c_int , s : size_t ) -> * mut c_void ;
230
- }
231
136
let res = unsafe {
232
- memchr ( h. as_ptr ( ) as * const c_void , n as c_int , h. len ( ) as size_t )
137
+ libc :: memchr ( h. as_ptr ( ) as * const c_void , n as c_int , h. len ( ) as size_t )
233
138
} ;
234
139
if res. is_null ( ) {
235
140
None
@@ -238,13 +143,36 @@ fn memchr(h: &[u8], n: u8) -> Option<uint> {
238
143
}
239
144
}
240
145
146
+ /// A mutable iterator over DNA sequences
147
+ struct MutDnaSeqs < ' a > { s : & ' a mut [ u8 ] }
148
+ fn mut_dna_seqs < ' a > ( s : & ' a mut [ u8 ] ) -> MutDnaSeqs < ' a > {
149
+ MutDnaSeqs { s : s }
150
+ }
151
+ impl < ' a > Iterator < & ' a mut [ u8 ] > for MutDnaSeqs < ' a > {
152
+ fn next ( & mut self ) -> Option < & ' a mut [ u8 ] > {
153
+ let tmp = std:: mem:: replace ( & mut self . s , & mut [ ] ) ;
154
+ let tmp = match memchr ( tmp, b'\n' ) {
155
+ Some ( i) => tmp. slice_from_mut ( i + 1 ) ,
156
+ None => return None ,
157
+ } ;
158
+ let ( seq, tmp) = match memchr ( tmp, b'>' ) {
159
+ Some ( i) => tmp. split_at_mut ( i) ,
160
+ None => {
161
+ let len = tmp. len ( ) ;
162
+ tmp. split_at_mut ( len)
163
+ }
164
+ } ;
165
+ self . s = tmp;
166
+ Some ( seq)
167
+ }
168
+ }
169
+
241
170
/// Length of a normal line without the terminating \n.
242
171
const LINE_LEN : uint = 60 ;
243
172
244
173
/// Compute the reverse complement.
245
- fn reverse_complement ( mut view : SharedMemory , tables : tables:: Tables ) {
246
- // Drop the last newline
247
- let seq = view. as_mut_slice ( ) . init_mut ( ) ;
174
+ fn reverse_complement ( seq : & mut [ u8 ] , tables : & Tables ) {
175
+ let seq = seq. init_mut ( ) ; // Drop the last newline
248
176
let len = seq. len ( ) ;
249
177
let off = LINE_LEN - len % ( LINE_LEN + 1 ) ;
250
178
let mut i = LINE_LEN ;
@@ -290,34 +218,36 @@ fn reverse_complement(mut view: SharedMemory, tables: tables::Tables) {
290
218
}
291
219
}
292
220
293
- fn main ( ) {
294
- let mut data = SharedMemory :: new ( read_to_end ( & mut stdin_raw ( ) ) . unwrap ( ) ) ;
295
- let tables = tables:: get ( ) ;
296
-
297
- let mut futures = vec ! ( ) ;
298
- loop {
299
- let ( _, mut tmp_data) = match memchr ( data. as_mut_slice ( ) , b'\n' ) {
300
- Some ( i) => data. split_at ( i + 1 ) ,
301
- _ => break ,
302
- } ;
303
- let ( view, tmp_data) = match memchr ( tmp_data. as_mut_slice ( ) , b'>' ) {
304
- Some ( i) => tmp_data. split_at ( i) ,
305
- None => {
306
- let len = tmp_data. len ( ) ;
307
- tmp_data. split_at ( len)
308
- } ,
309
- } ;
310
- futures. push ( Future :: spawn ( proc ( ) reverse_complement ( view, tables) ) ) ;
311
- data = tmp_data;
312
- }
313
-
314
- for f in futures. iter_mut ( ) {
315
- f. get ( ) ;
221
+ /// Executes a closure in parallel over the given iterator over mutable slice.
222
+ /// The closure `f` is run in parallel with an element of `iter`.
223
+ fn parallel < ' a , I , T , F > ( mut iter : I , f : F )
224
+ where T : Send + Sync ,
225
+ I : Iterator < & ' a mut [ T ] > ,
226
+ F : Fn ( & ' a mut [ T ] ) + Sync {
227
+ use std:: mem;
228
+ use std:: raw:: Repr ;
229
+
230
+ let ( tx, rx) = channel ( ) ;
231
+ for chunk in iter {
232
+ let tx = tx. clone ( ) ;
233
+
234
+ // Need to convert `f` and `chunk` to something that can cross the task
235
+ // boundary.
236
+ let f = & f as * const F as * const uint ;
237
+ let raw = chunk. repr ( ) ;
238
+ spawn ( proc ( ) {
239
+ let f = f as * const F ;
240
+ unsafe { ( * f) ( mem:: transmute ( raw) ) }
241
+ drop ( tx)
242
+ } ) ;
316
243
}
244
+ drop ( tx) ;
245
+ for ( ) in rx. iter ( ) { }
246
+ }
317
247
318
- // Not actually unsafe. If Arc had a way to check uniqueness then we could do that in
319
- // `reset` and it would tell us that, yes, it is unique at this point.
320
- data = unsafe { data . reset ( ) } ;
321
-
248
+ fn main ( ) {
249
+ let mut data = read_to_end ( & mut stdin_raw ( ) ) . unwrap ( ) ;
250
+ let tables = & Tables :: new ( ) ;
251
+ parallel ( mut_dna_seqs ( data [ mut ] ) , | & : seq| reverse_complement ( seq , tables ) ) ;
322
252
stdout_raw ( ) . write ( data. as_mut_slice ( ) ) . unwrap ( ) ;
323
253
}
0 commit comments