Skip to content

Commit 3516a8a

Browse files
committed
Merge pull request #32 from llogiq/regex_u8
moved regex_dna to u8 matching
2 parents 9807d0c + c7df678 commit 3516a8a

File tree

2 files changed

+27
-25
lines changed

2 files changed

+27
-25
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ SOURCES = $(wildcard src/*.rs)
22
RUSTC ?= rustc
33
RUSTC_FLAGS ?= -C opt-level=3 -C target-cpu=core2 -C lto
44
RUSTC_FLAGS += -L ./lib
5-
REGEX ?= regex-0.1.66
5+
REGEX ?= regex-0.1.69
66
ARENA ?= typed-arena-1.1.0
77
NUM_CPU ?= num_cpus-0.2.11
88

src/regex_dna.rs

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,31 +7,33 @@
77

88
extern crate regex;
99

10+
use regex::bytes::Regex;
11+
1012
use std::io::{self, Read};
1113
use std::sync::Arc;
1214
use std::thread;
1315

14-
macro_rules! regex { ($re:expr) => { ::regex::Regex::new($re).unwrap() } }
16+
macro_rules! regex { ($re:expr) => { Regex::new($re).unwrap() } }
1517

1618
fn main() {
17-
let mut seq = String::with_capacity(50 * (1 << 20));
18-
io::stdin().read_to_string(&mut seq).unwrap();
19+
let mut seq = Vec::with_capacity(50 * (1 << 20));
20+
io::stdin().read_to_end(&mut seq).unwrap();
1921
let ilen = seq.len();
2022

21-
seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "");
23+
seq = regex!(r">[^\n]*\n|\n").replace_all(&seq, &b""[..]);
2224
let clen = seq.len();
2325
let seq_arc = Arc::new(seq.clone());
2426

2527
let variants = vec![
26-
regex!("agggtaaa|tttaccct"),
27-
regex!("[cgt]gggtaaa|tttaccc[acg]"),
28-
regex!("a[act]ggtaaa|tttacc[agt]t"),
29-
regex!("ag[act]gtaaa|tttac[agt]ct"),
30-
regex!("agg[act]taaa|ttta[agt]cct"),
31-
regex!("aggg[acg]aaa|ttt[cgt]ccct"),
32-
regex!("agggt[cgt]aa|tt[acg]accct"),
33-
regex!("agggta[cgt]a|t[acg]taccct"),
34-
regex!("agggtaa[cgt]|[acg]ttaccct"),
28+
regex!(r"agggtaaa|tttaccct"),
29+
regex!(r"[cgt]gggtaaa|tttaccc[acg]"),
30+
regex!(r"a[act]ggtaaa|tttacc[agt]t"),
31+
regex!(r"ag[act]gtaaa|tttac[agt]ct"),
32+
regex!(r"agg[act]taaa|ttta[agt]cct"),
33+
regex!(r"aggg[acg]aaa|ttt[cgt]ccct"),
34+
regex!(r"agggt[cgt]aa|tt[acg]accct"),
35+
regex!(r"agggta[cgt]a|t[acg]taccct"),
36+
regex!(r"agggtaa[cgt]|[acg]ttaccct"),
3537
];
3638
let mut counts = vec![];
3739
for variant in variants {
@@ -42,17 +44,17 @@ fn main() {
4244
}
4345

4446
let substs = vec![
45-
(regex!("B"), "(c|g|t)"),
46-
(regex!("D"), "(a|g|t)"),
47-
(regex!("H"), "(a|c|t)"),
48-
(regex!("K"), "(g|t)"),
49-
(regex!("M"), "(a|c)"),
50-
(regex!("N"), "(a|c|g|t)"),
51-
(regex!("R"), "(a|g)"),
52-
(regex!("S"), "(c|g)"),
53-
(regex!("V"), "(a|c|g)"),
54-
(regex!("W"), "(a|t)"),
55-
(regex!("Y"), "(c|t)"),
47+
(regex!(r"B"), &b"(c|g|t)"[..]),
48+
(regex!(r"D"), &b"(a|g|t)"[..]),
49+
(regex!(r"H"), &b"(a|c|t)"[..]),
50+
(regex!(r"K"), &b"(g|t)"[..]),
51+
(regex!(r"M"), &b"(a|c)"[..]),
52+
(regex!(r"N"), &b"(a|c|g|t)"[..]),
53+
(regex!(r"R"), &b"(a|g)"[..]),
54+
(regex!(r"S"), &b"(c|g)"[..]),
55+
(regex!(r"V"), &b"(a|c|g)"[..]),
56+
(regex!(r"W"), &b"(a|t)"[..]),
57+
(regex!(r"Y"), &b"(c|t)"[..]),
5658
];
5759
let mut seq = seq;
5860
for (re, replacement) in substs.into_iter() {

0 commit comments

Comments
 (0)