Skip to content

Commit 35b73f5

Browse files
committed
teddy: port teddy searcher to std::arch
This commit ports the Teddy searcher to use std::arch and moves off the portable SIMD vector API. Performance remains the same, and it looks like the codegen is identical, which is great! This also makes the `simd-accel` feature a no-op and adds a new `unstable` feature which will enable the Teddy optimization. The `-C target-feature` or `-C target-cpu` settings are no longer necessary, since this will now do runtime target feature detection. We also add a new `unstable` feature to the regex crate, which will enable this new use of std::arch. Once enabled, the Teddy optimizations becomes available automatically without any additional compile time flags.
1 parent 58dc611 commit 35b73f5

File tree

15 files changed

+397
-182
lines changed

15 files changed

+397
-182
lines changed

Cargo.toml

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,6 @@ memchr = "2.0.0"
3131
thread_local = "0.3.2"
3232
# For parsing regular expressions.
3333
regex-syntax = { path = "regex-syntax", version = "0.5.1" }
34-
# For accelerating text search.
35-
simd = { version = "0.2.1", optional = true }
3634
# For compiling UTF-8 decoding into automata.
3735
utf8-ranges = "1.0.0"
3836

@@ -45,10 +43,20 @@ quickcheck = { version = "0.6", default-features = false }
4543
rand = "0.4"
4644

4745
[features]
48-
# Enable to use the unstable pattern traits defined in std.
46+
# We don't enable any features by default currently, but if the compiler
47+
# supports a specific type of feature, then regex's build.rs might enable
48+
# some default features.
49+
default = []
50+
# A blanket feature that governs whether unstable features are enabled or not.
51+
# Unstable features are disabled by default, and typically rely on unstable
52+
# features in rustc itself.
53+
unstable = ["pattern"]
54+
# Enable to use the unstable pattern traits defined in std. This is enabled
55+
# by default if the unstable feature is enabled.
4956
pattern = []
5057
# Enable to use simd acceleration.
51-
simd-accel = ["simd"]
58+
# Note that this is deprecated and is a no-op.
59+
simd-accel = []
5260

5361
[lib]
5462
# There are no benchmarks in the library code itself

bench/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ libc = "0.2"
1818
onig = { version = "3", optional = true }
1919
libpcre-sys = { version = "0.2", optional = true }
2020
memmap = "0.6"
21-
regex = { version = "0.2.0", path = "..", features = ["simd-accel"] }
21+
regex = { version = "0.2.0", path = "..", features = ["unstable"] }
2222
regex-syntax = { version = "0.5.0", path = "../regex-syntax" }
2323
serde = "1"
2424
serde_derive = "1"

build.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
use std::env;
2+
use std::ffi::OsString;
3+
use std::process::Command;
4+
5+
fn main() {
6+
let rustc = env::var_os("RUSTC").unwrap_or(OsString::from("rustc"));
7+
let output = Command::new(&rustc)
8+
.arg("--version")
9+
.output()
10+
.unwrap()
11+
.stdout;
12+
let version = String::from_utf8(output).unwrap();
13+
14+
// If we're using nightly Rust, then we can enable vector optimizations.
15+
// Note that these aren't actually activated unless the `nightly` feature
16+
// is enabled.
17+
//
18+
// We also don't activate these if we've explicitly disabled auto
19+
// optimizations. Disabling auto optimizations is intended for use in
20+
// tests, so that we can reliably test fallback implementations.
21+
if env::var_os("CARGO_CFG_REGEX_DISABLE_AUTO_OPTIMIZATIONS").is_none() {
22+
if version.contains("nightly") {
23+
println!("cargo:rustc-cfg=regex_runtime_teddy_ssse3");
24+
}
25+
}
26+
}

src/exec.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use compile::Compiler;
2323
use dfa;
2424
use error::Error;
2525
use input::{ByteInput, CharInput};
26-
use literals::LiteralSearcher;
26+
use literal::LiteralSearcher;
2727
use pikevm;
2828
use prog::Program;
2929
use re_builder::RegexOptions;

src/input.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use std::u32;
1616

1717
use syntax;
1818

19-
use literals::LiteralSearcher;
19+
use literal::LiteralSearcher;
2020
use prog::InstEmptyLook;
2121
use utf8::{decode_utf8, decode_last_utf8};
2222

src/lib.rs

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -520,14 +520,15 @@ another matching engine with fixed memory requirements.
520520
#![deny(missing_docs)]
521521
#![cfg_attr(test, deny(warnings))]
522522
#![cfg_attr(feature = "pattern", feature(pattern))]
523-
#![cfg_attr(feature = "simd-accel", feature(cfg_target_feature))]
523+
#![cfg_attr(feature = "unstable", feature(target_feature, stdsimd))]
524524

525525
extern crate aho_corasick;
526526
extern crate memchr;
527527
extern crate thread_local;
528-
#[macro_use] #[cfg(test)] extern crate quickcheck;
528+
#[cfg(test)]
529+
#[macro_use]
530+
extern crate quickcheck;
529531
extern crate regex_syntax as syntax;
530-
#[cfg(feature = "simd-accel")] extern crate simd;
531532
extern crate utf8_ranges;
532533

533534
pub use error::Error;
@@ -645,7 +646,7 @@ mod exec;
645646
mod expand;
646647
mod freqs;
647648
mod input;
648-
mod literals;
649+
mod literal;
649650
#[cfg(feature = "pattern")]
650651
mod pattern;
651652
mod pikevm;
@@ -655,12 +656,9 @@ mod re_bytes;
655656
mod re_set;
656657
mod re_trait;
657658
mod re_unicode;
658-
#[cfg(feature = "simd-accel")]
659-
mod simd_accel;
660-
#[cfg(not(feature = "simd-accel"))]
661-
#[path = "simd_fallback/mod.rs"]
662-
mod simd_accel;
663659
mod sparse;
660+
#[cfg(feature = "unstable")]
661+
mod vector;
664662

665663
/// The `internal` module exists to support suspicious activity, such as
666664
/// testing different matching engines and supporting the `regex-debug` CLI
@@ -670,6 +668,6 @@ pub mod internal {
670668
pub use compile::Compiler;
671669
pub use exec::{Exec, ExecBuilder};
672670
pub use input::{Char, Input, CharInput, InputAt};
673-
pub use literals::LiteralSearcher;
671+
pub use literal::LiteralSearcher;
674672
pub use prog::{Program, Inst, EmptyLook, InstRanges};
675673
}

src/literals.rs renamed to src/literal/mod.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@ use memchr::{memchr, memchr2, memchr3};
1616
use syntax::hir::literal::{Literal, Literals};
1717

1818
use freqs::BYTE_FREQUENCIES;
19+
use self::teddy_ssse3::Teddy;
1920

20-
use simd_accel::teddy128::{Teddy, is_teddy_128_available};
21+
mod teddy_ssse3;
2122

2223
/// A prefix extracted from a compiled regular expression.
2324
///
@@ -219,7 +220,7 @@ impl Matcher {
219220
}
220221
}
221222
let is_aho_corasick_fast = sset.dense.len() == 1 && sset.all_ascii;
222-
if is_teddy_128_available() && !is_aho_corasick_fast {
223+
if Teddy::available() && !is_aho_corasick_fast {
223224
// Only try Teddy if Aho-Corasick can't use memchr on an ASCII
224225
// byte. Also, in its current form, Teddy doesn't scale well to
225226
// lots of literals.

src/simd_fallback/teddy128.rs renamed to src/literal/teddy_ssse3/fallback.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
11
use syntax::hir::literal::Literals;
22

3-
pub fn is_teddy_128_available() -> bool {
4-
false
5-
}
6-
73
#[derive(Debug, Clone)]
84
pub struct Teddy(());
95

@@ -15,6 +11,7 @@ pub struct Match {
1511
}
1612

1713
impl Teddy {
14+
pub fn available() -> bool { false }
1815
pub fn new(_pats: &Literals) -> Option<Teddy> { None }
1916
pub fn patterns(&self) -> &[Vec<u8>] { &[] }
2017
pub fn len(&self) -> usize { 0 }

0 commit comments

Comments
 (0)