Skip to content

goodbye simd crate, hello std::arch #456

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Mar 13, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ bench-log
wiki
tags
examples/debug.rs
tmp/
16 changes: 12 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ memchr = "2.0.0"
thread_local = "0.3.2"
# For parsing regular expressions.
regex-syntax = { path = "regex-syntax", version = "0.5.1" }
# For accelerating text search.
simd = { version = "0.2.1", optional = true }
# For compiling UTF-8 decoding into automata.
utf8-ranges = "1.0.0"

Expand All @@ -45,10 +43,20 @@ quickcheck = { version = "0.6", default-features = false }
rand = "0.4"

[features]
# Enable to use the unstable pattern traits defined in std.
# We don't enable any features by default currently, but if the compiler
# supports a specific type of feature, then regex's build.rs might enable
# some default features.
default = []
# A blanket feature that governs whether unstable features are enabled or not.
# Unstable features are disabled by default, and typically rely on unstable
# features in rustc itself.
unstable = ["pattern"]
# Enable to use the unstable pattern traits defined in std. This is enabled
# by default if the unstable feature is enabled.
pattern = []
# Enable to use simd acceleration.
simd-accel = ["simd"]
# Note that this is deprecated and is a no-op.
simd-accel = []

[lib]
# There are no benchmarks in the library code itself
Expand Down
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,16 @@ assert!(!matches.matched(5));
assert!(matches.matched(6));
```

### Usage: enable SIMD optimizations

This crate provides an `unstable` feature that can only be enabled on nightly
Rust. When this feature is enabled, the regex crate will use SIMD optimizations
if your CPU supports them. No additional compile time flags are required; the
regex crate will detect your CPU support at runtime.

When `std::arch` becomes stable, then these optimizations will be enabled
automatically.


### Usage: a regular expression parser

Expand Down
2 changes: 1 addition & 1 deletion bench/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ libc = "0.2"
onig = { version = "3", optional = true }
libpcre-sys = { version = "0.2", optional = true }
memmap = "0.6"
regex = { version = "0.2.0", path = "..", features = ["simd-accel"] }
regex = { version = "0.2.0", path = "..", features = ["unstable"] }
regex-syntax = { version = "0.5.0", path = "../regex-syntax" }
serde = "1"
serde_derive = "1"
Expand Down
3 changes: 0 additions & 3 deletions bench/compile
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
#!/bin/sh

# Enable SIMD.
export RUSTFLAGS="-C target-cpu=native"

exec cargo build \
--release \
--features 're-re2 re-onig re-pcre1 re-pcre2 re-rust re-rust-bytes re-tcl re-dphobos-dmd re-dphobos-ldc' \
Expand Down
5 changes: 0 additions & 5 deletions bench/run
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,6 @@ if [ $# = 0 ] || [ $1 = '-h' ] || [ $1 = '--help' ]; then
usage
fi

# Enable SIMD, unless we're in CI, then we inherit RUSTLFAGS.
if [ -z "$TRAVIS_RUST_VERSION" ]; then
export RUSTFLAGS="-C target-cpu=native"
fi

which="$1"
shift
case $which in
Expand Down
27 changes: 27 additions & 0 deletions build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
use std::env;
use std::ffi::OsString;
use std::process::Command;

fn main() {
let rustc = env::var_os("RUSTC").unwrap_or(OsString::from("rustc"));
let output = Command::new(&rustc)
.arg("--version")
.output()
.unwrap()
.stdout;
let version = String::from_utf8(output).unwrap();

// If we're using nightly Rust, then we can enable vector optimizations.
// Note that these aren't actually activated unless the `nightly` feature
// is enabled.
//
// We also don't activate these if we've explicitly disabled auto
// optimizations. Disabling auto optimizations is intended for use in
// tests, so that we can reliably test fallback implementations.
if env::var_os("CARGO_CFG_REGEX_DISABLE_AUTO_OPTIMIZATIONS").is_none() {
if version.contains("nightly") {
println!("cargo:rustc-cfg=regex_runtime_teddy_ssse3");
println!("cargo:rustc-cfg=regex_runtime_teddy_avx2");
}
}
}
1 change: 0 additions & 1 deletion ci/after_success.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ if [ "$TRAVIS_RUST_VERSION" != "nightly" ] || [ "$TRAVIS_PULL_REQUEST" != "false
exit 0
fi

export RUSTFLAGS="-C target-feature=+ssse3"
env

# Install kcov.
Expand Down
2 changes: 1 addition & 1 deletion ci/run-kcov
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ while true; do
esac
done

cargo test --no-run --verbose --jobs 4
cargo test --no-run --verbose --jobs 4 --features unstable
for t in ${tests[@]}; do
kcov \
--verify \
Expand Down
8 changes: 1 addition & 7 deletions ci/script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,6 @@

set -ex

if [ "$TRAVIS_RUST_VERSION" = "nightly" ]; then
# We set this once so that all invocations share this setting. This should
# help with build times by avoiding excessive re-compiles.
export RUSTFLAGS="-C target-feature=+ssse3"
fi

# Builds the regex crate and runs tests.
cargo build --verbose
cargo doc --verbose
Expand All @@ -25,7 +19,7 @@ fi

# Run tests. If we have nightly, then enable our nightly features.
if [ "$TRAVIS_RUST_VERSION" = "nightly" ]; then
cargo test --verbose --features 'simd-accel pattern'
cargo test --verbose --features unstable
else
cargo test --verbose
fi
Expand Down
2 changes: 1 addition & 1 deletion src/exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use compile::Compiler;
use dfa;
use error::Error;
use input::{ByteInput, CharInput};
use literals::LiteralSearcher;
use literal::LiteralSearcher;
use pikevm;
use prog::Program;
use re_builder::RegexOptions;
Expand Down
2 changes: 1 addition & 1 deletion src/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use std::u32;

use syntax;

use literals::LiteralSearcher;
use literal::LiteralSearcher;
use prog::InstEmptyLook;
use utf8::{decode_utf8, decode_last_utf8};

Expand Down
18 changes: 8 additions & 10 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -520,14 +520,15 @@ another matching engine with fixed memory requirements.
#![deny(missing_docs)]
#![cfg_attr(test, deny(warnings))]
#![cfg_attr(feature = "pattern", feature(pattern))]
#![cfg_attr(feature = "simd-accel", feature(cfg_target_feature))]
#![cfg_attr(feature = "unstable", feature(target_feature, stdsimd))]

extern crate aho_corasick;
extern crate memchr;
extern crate thread_local;
#[macro_use] #[cfg(test)] extern crate quickcheck;
#[cfg(test)]
#[macro_use]
extern crate quickcheck;
extern crate regex_syntax as syntax;
#[cfg(feature = "simd-accel")] extern crate simd;
extern crate utf8_ranges;

pub use error::Error;
Expand Down Expand Up @@ -645,7 +646,7 @@ mod exec;
mod expand;
mod freqs;
mod input;
mod literals;
mod literal;
#[cfg(feature = "pattern")]
mod pattern;
mod pikevm;
Expand All @@ -655,12 +656,9 @@ mod re_bytes;
mod re_set;
mod re_trait;
mod re_unicode;
#[cfg(feature = "simd-accel")]
mod simd_accel;
#[cfg(not(feature = "simd-accel"))]
#[path = "simd_fallback/mod.rs"]
mod simd_accel;
mod sparse;
#[cfg(feature = "unstable")]
mod vector;

/// The `internal` module exists to support suspicious activity, such as
/// testing different matching engines and supporting the `regex-debug` CLI
Expand All @@ -670,6 +668,6 @@ pub mod internal {
pub use compile::Compiler;
pub use exec::{Exec, ExecBuilder};
pub use input::{Char, Input, CharInput, InputAt};
pub use literals::LiteralSearcher;
pub use literal::LiteralSearcher;
pub use prog::{Program, Inst, EmptyLook, InstRanges};
}
54 changes: 42 additions & 12 deletions src/literals.rs → src/literal/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,11 @@ use memchr::{memchr, memchr2, memchr3};
use syntax::hir::literal::{Literal, Literals};

use freqs::BYTE_FREQUENCIES;
use self::teddy_avx2::{Teddy as TeddyAVX2};
use self::teddy_ssse3::{Teddy as TeddySSSE3};

use simd_accel::teddy128::{Teddy, is_teddy_128_available};
mod teddy_avx2;
mod teddy_ssse3;

/// A prefix extracted from a compiled regular expression.
///
Expand Down Expand Up @@ -46,7 +49,10 @@ enum Matcher {
AC(FullAcAutomaton<Literal>),
/// A simd accelerated multiple string matcher. Used only for a small
/// number of small literals.
Teddy128(Teddy),
TeddySSSE3(TeddySSSE3),
/// A simd accelerated multiple string matcher. Used only for a small
/// number of small literals. This uses 256-bit vectors.
TeddyAVX2(TeddyAVX2),
}

impl LiteralSearcher {
Expand Down Expand Up @@ -97,7 +103,8 @@ impl LiteralSearcher {
FreqyPacked(ref s) => s.find(haystack).map(|i| (i, i + s.len())),
BoyerMoore(ref s) => s.find(haystack).map(|i| (i, i + s.len())),
AC(ref aut) => aut.find(haystack).next().map(|m| (m.start, m.end)),
Teddy128(ref ted) => ted.find(haystack).map(|m| (m.start, m.end)),
TeddySSSE3(ref t) => t.find(haystack).map(|m| (m.start, m.end)),
TeddyAVX2(ref t) => t.find(haystack).map(|m| (m.start, m.end)),
}
}

Expand Down Expand Up @@ -135,8 +142,11 @@ impl LiteralSearcher {
Matcher::FreqyPacked(ref s) => LiteralIter::Single(&s.pat),
Matcher::BoyerMoore(ref s) => LiteralIter::Single(&s.pattern),
Matcher::AC(ref ac) => LiteralIter::AC(ac.patterns()),
Matcher::Teddy128(ref ted) => {
LiteralIter::Teddy128(ted.patterns())
Matcher::TeddySSSE3(ref ted) => {
LiteralIter::TeddySSSE3(ted.patterns())
}
Matcher::TeddyAVX2(ref ted) => {
LiteralIter::TeddyAVX2(ted.patterns())
}
}
}
Expand Down Expand Up @@ -165,7 +175,8 @@ impl LiteralSearcher {
FreqyPacked(_) => 1,
BoyerMoore(_) => 1,
AC(ref aut) => aut.len(),
Teddy128(ref ted) => ted.len(),
TeddySSSE3(ref ted) => ted.len(),
TeddyAVX2(ref ted) => ted.len(),
}
}

Expand All @@ -178,7 +189,8 @@ impl LiteralSearcher {
FreqyPacked(ref single) => single.approximate_size(),
BoyerMoore(ref single) => single.approximate_size(),
AC(ref aut) => aut.heap_bytes(),
Teddy128(ref ted) => ted.approximate_size(),
TeddySSSE3(ref ted) => ted.approximate_size(),
TeddyAVX2(ref ted) => ted.approximate_size(),
}
}
}
Expand Down Expand Up @@ -219,7 +231,15 @@ impl Matcher {
}
}
let is_aho_corasick_fast = sset.dense.len() == 1 && sset.all_ascii;
if is_teddy_128_available() && !is_aho_corasick_fast {
if TeddyAVX2::available() && !is_aho_corasick_fast {
const MAX_TEDDY_LITERALS: usize = 32;
if lits.literals().len() <= MAX_TEDDY_LITERALS {
if let Some(ted) = TeddyAVX2::new(lits) {
return Matcher::TeddyAVX2(ted);
}
}
}
if TeddySSSE3::available() && !is_aho_corasick_fast {
// Only try Teddy if Aho-Corasick can't use memchr on an ASCII
// byte. Also, in its current form, Teddy doesn't scale well to
// lots of literals.
Expand All @@ -231,8 +251,8 @@ impl Matcher {
// negating the benefit of memchr.
const MAX_TEDDY_LITERALS: usize = 32;
if lits.literals().len() <= MAX_TEDDY_LITERALS {
if let Some(ted) = Teddy::new(lits) {
return Matcher::Teddy128(ted);
if let Some(ted) = TeddySSSE3::new(lits) {
return Matcher::TeddySSSE3(ted);
}
}
// Fallthrough to ol' reliable Aho-Corasick...
Expand All @@ -247,7 +267,8 @@ pub enum LiteralIter<'a> {
Bytes(&'a [u8]),
Single(&'a [u8]),
AC(&'a [Literal]),
Teddy128(&'a [Vec<u8>]),
TeddySSSE3(&'a [Vec<u8>]),
TeddyAVX2(&'a [Vec<u8>]),
}

impl<'a> Iterator for LiteralIter<'a> {
Expand Down Expand Up @@ -283,7 +304,16 @@ impl<'a> Iterator for LiteralIter<'a> {
Some(&**next)
}
}
LiteralIter::Teddy128(ref mut lits) => {
LiteralIter::TeddySSSE3(ref mut lits) => {
if lits.is_empty() {
None
} else {
let next = &lits[0];
*lits = &lits[1..];
Some(&**next)
}
}
LiteralIter::TeddyAVX2(ref mut lits) => {
if lits.is_empty() {
None
} else {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
use syntax::hir::literal::Literals;

pub fn is_teddy_128_available() -> bool {
false
}

#[derive(Debug, Clone)]
pub struct Teddy(());

Expand All @@ -15,6 +11,7 @@ pub struct Match {
}

impl Teddy {
pub fn available() -> bool { false }
pub fn new(_pats: &Literals) -> Option<Teddy> { None }
pub fn patterns(&self) -> &[Vec<u8>] { &[] }
pub fn len(&self) -> usize { 0 }
Expand Down
Loading