Skip to content

Various inconsistencies between different engines #429

Closed
@SeanRBurton

Description

@SeanRBurton

I've generated several failing test-cases which exercise various dark corners, and should make for good regression tests. I did my best to make them mostly orthogonal, but most of the issues seem to be word-boundary related.

fn test() {
    let patterns = [
        "(?:(?-u:\\b)|(?u:h))+",
        "(?u:\\B)",
        "(?:(?u:\\b)|(?s-u:.))+",
        "(?:(?-u:\\B)|(?su:.))+",
        "(?m:$)(?m:^)(?su:.)",
        "(?m:$)^(?m:^)",
        "(?P<kp>(?iu:do)(?m:$))*",

        "(?u:\\B)",
        "(?:(?-u:\\b)|(?u:[\u{0}-W]))+",
        "((?m:$)(?-u:\\B)(?s-u:.)(?-u:\\B)$)",
        "(?m:$)(?m:$)^(?su:.)",
        "(?-u:\\B)(?m:^)",
        "(?:(?u:\\b)|(?-u:.))+",
    ];
    let haystacks = [
        "h",
        "鋸",
        "oB",
        "\u{fef80}",
        "\n‣",
        "\n",
        "dodo",

        "䡁",
        "0",
        "\n\n",
        "\n\u{81}¨\u{200a}",
        "0\n",
        "0",
    ];
    for (i, (pattern, haystack)) in patterns.iter()
                                            .zip(haystacks.iter()).enumerate() {
        let re0 = ExecBuilder::new(&pattern).only_utf8(false)
                                            .build()
                                            .unwrap()
                                            .into_regex();
        let re1 = ExecBuilder::new(&pattern).only_utf8(false)
                                            .nfa()
                                            .bytes(i < 7)
                                            .build()
                                            .unwrap()
                                            .into_regex();
        let caps0 = re0.captures(haystack);
        let caps1 = re1.captures(haystack);
        let mut correct = true;
        match (caps0, caps1) {
            (Some(a), Some(b)) => {
                for (c0, c1) in a.iter().zip(b.iter()) {
                    match (c0, c1) {
                        (Some(c), Some(d)) => {
                            if c.start() != d.start() || c.end() != d.end() {
                                correct = false;
                                break;
                            }
                        }
                        (None, None) => (),
                        _ => {
                            correct = false;
                            break;
                        }
                    }
                }
            }
            _ => correct = false,
        }
        println!("{:?}", correct);
    }
}

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions