Struct SharedRegex

Source

pub struct SharedRegex {
    pub regex: Arc<Regex>,
    pub cache_key: RegexCacheKey,
}

Fields§

§regex: Arc<Regex>§cache_key: RegexCacheKey

Methods from Deref<Target = MetaRegex>§

pub fn is_match<'h, I>(&self, input: I) -> bool
where I: Into<Input<'h>>,

Returns true if and only if this regex matches the given haystack.

This routine may short circuit if it knows that scanning future input will never lead to a different result. (Consider how this might make a difference given the regex a+ on the haystack aaaaaaaaaaaaaaa. This routine may stop after it sees the first a, but routines like find need to continue searching because + is greedy by default.)

§Example

use regex_automata::meta::Regex;

let re = Regex::new("foo[0-9]+bar")?;

assert!(re.is_match("foo12345bar"));
assert!(!re.is_match("foobar"));

§Example: consistency with search APIs

is_match is guaranteed to return true whenever find returns a match. This includes searches that are executed entirely within a codepoint:

use regex_automata::{meta::Regex, Input};

let re = Regex::new("a*")?;

// This doesn't match because the default configuration bans empty
// matches from splitting a codepoint.
assert!(!re.is_match(Input::new("☃").span(1..2)));
assert_eq!(None, re.find(Input::new("☃").span(1..2)));

Notice that when UTF-8 mode is disabled, then the above reports a match because the restriction against zero-width matches that split a codepoint has been lifted:

use regex_automata::{meta::Regex, Input, Match};

let re = Regex::builder()
    .configure(Regex::config().utf8_empty(false))
    .build("a*")?;

assert!(re.is_match(Input::new("☃").span(1..2)));
assert_eq!(
    Some(Match::must(0, 1..1)),
    re.find(Input::new("☃").span(1..2)),
);

A similar idea applies when using line anchors with CRLF mode enabled, which prevents them from matching between a \r and a \n.

use regex_automata::{meta::Regex, Input, Match};

let re = Regex::new(r"(?Rm:$)")?;
assert!(!re.is_match(Input::new("\r\n").span(1..1)));
// A regular line anchor, which only considers \n as a
// line terminator, will match.
let re = Regex::new(r"(?m:$)")?;
assert!(re.is_match(Input::new("\r\n").span(1..1)));

pub fn find<'h, I>(&self, input: I) -> Option<Match>
where I: Into<Input<'h>>,

Executes a leftmost search and returns the first match that is found, if one exists.

§Example

use regex_automata::{meta::Regex, Match};

let re = Regex::new("foo[0-9]+")?;
assert_eq!(Some(Match::must(0, 0..8)), re.find("foo12345"));

pub fn captures<'h, I>(&self, input: I, caps: &mut Captures)
where I: Into<Input<'h>>,

Executes a leftmost forward search and writes the spans of capturing groups that participated in a match into the provided [Captures] value. If no match was found, then [Captures::is_match] is guaranteed to return false.

§Example

use regex_automata::{meta::Regex, Span};

let re = Regex::new(r"^([0-9]{4})-([0-9]{2})-([0-9]{2})$")?;
let mut caps = re.create_captures();

re.captures("2010-03-14", &mut caps);
assert!(caps.is_match());
assert_eq!(Some(Span::from(0..4)), caps.get_group(1));
assert_eq!(Some(Span::from(5..7)), caps.get_group(2));
assert_eq!(Some(Span::from(8..10)), caps.get_group(3));

pub fn find_iter<'r, 'h, I>(&'r self, input: I) -> FindMatches<'r, 'h>
where I: Into<Input<'h>>,

Returns an iterator over all non-overlapping leftmost matches in the given haystack. If no match exists, then the iterator yields no elements.

§Example

use regex_automata::{meta::Regex, Match};

let re = Regex::new("foo[0-9]+")?;
let haystack = "foo1 foo12 foo123";
let matches: Vec<Match> = re.find_iter(haystack).collect();
assert_eq!(matches, vec![
    Match::must(0, 0..4),
    Match::must(0, 5..10),
    Match::must(0, 11..17),
]);

pub fn captures_iter<'r, 'h, I>(&'r self, input: I) -> CapturesMatches<'r, 'h>
where I: Into<Input<'h>>,

Returns an iterator over all non-overlapping Captures values. If no match exists, then the iterator yields no elements.

This yields the same matches as [Regex::find_iter], but it includes the spans of all capturing groups that participate in each match.

Tip: See util::iter::Searcher for how to correctly iterate over all matches in a haystack while avoiding the creation of a new Captures value for every match. (Which you are forced to do with an Iterator.)

§Example

use regex_automata::{meta::Regex, Span};

let re = Regex::new("foo(?P<numbers>[0-9]+)")?;

let haystack = "foo1 foo12 foo123";
let matches: Vec<Span> = re
    .captures_iter(haystack)
    // The unwrap is OK since 'numbers' matches if the pattern matches.
    .map(|caps| caps.get_group_by_name("numbers").unwrap())
    .collect();
assert_eq!(matches, vec![
    Span::from(3..4),
    Span::from(8..10),
    Span::from(14..17),
]);

pub fn split<'r, 'h, I>(&'r self, input: I) -> Split<'r, 'h>
where I: Into<Input<'h>>,

Returns an iterator of spans of the haystack given, delimited by a match of the regex. Namely, each element of the iterator corresponds to a part of the haystack that isn’t matched by the regular expression.

§Example

To split a string delimited by arbitrary amounts of spaces or tabs:

use regex_automata::meta::Regex;

let re = Regex::new(r"[ \t]+")?;
let hay = "a b \t  c\td    e";
let fields: Vec<&str> = re.split(hay).map(|span| &hay[span]).collect();
assert_eq!(fields, vec!["a", "b", "c", "d", "e"]);

§Example: more cases

Basic usage:

use regex_automata::meta::Regex;

let re = Regex::new(r" ")?;
let hay = "Mary had a little lamb";
let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect();
assert_eq!(got, vec!["Mary", "had", "a", "little", "lamb"]);

let re = Regex::new(r"X")?;
let hay = "";
let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect();
assert_eq!(got, vec![""]);

let re = Regex::new(r"X")?;
let hay = "lionXXtigerXleopard";
let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect();
assert_eq!(got, vec!["lion", "", "tiger", "leopard"]);

let re = Regex::new(r"::")?;
let hay = "lion::tiger::leopard";
let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect();
assert_eq!(got, vec!["lion", "tiger", "leopard"]);

If a haystack contains multiple contiguous matches, you will end up with empty spans yielded by the iterator:

use regex_automata::meta::Regex;

let re = Regex::new(r"X")?;
let hay = "XXXXaXXbXc";
let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect();
assert_eq!(got, vec!["", "", "", "", "a", "", "b", "c"]);

let re = Regex::new(r"/")?;
let hay = "(///)";
let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect();
assert_eq!(got, vec!["(", "", "", ")"]);

Separators at the start or end of a haystack are neighbored by empty spans.

use regex_automata::meta::Regex;

let re = Regex::new(r"0")?;
let hay = "010";
let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect();
assert_eq!(got, vec!["", "1", ""]);

When the empty string is used as a regex, it splits at every valid UTF-8 boundary by default (which includes the beginning and end of the haystack):

use regex_automata::meta::Regex;

let re = Regex::new(r"")?;
let hay = "rust";
let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect();
assert_eq!(got, vec!["", "r", "u", "s", "t", ""]);

// Splitting by an empty string is UTF-8 aware by default!
let re = Regex::new(r"")?;
let hay = "☃";
let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect();
assert_eq!(got, vec!["", "☃", ""]);

But note that UTF-8 mode for empty strings can be disabled, which will then result in a match at every byte offset in the haystack, including between every UTF-8 code unit.

use regex_automata::meta::Regex;

let re = Regex::builder()
    .configure(Regex::config().utf8_empty(false))
    .build(r"")?;
let hay = "☃".as_bytes();
let got: Vec<&[u8]> = re.split(hay).map(|sp| &hay[sp]).collect();
assert_eq!(got, vec![
    // Writing byte string slices is just brutal. The problem is that
    // b"foo" has type &[u8; 3] instead of &[u8].
    &[][..], &[b'\xE2'][..], &[b'\x98'][..], &[b'\x83'][..], &[][..],
]);

Contiguous separators (commonly shows up with whitespace), can lead to possibly surprising behavior. For example, this code is correct:

use regex_automata::meta::Regex;

let re = Regex::new(r" ")?;
let hay = "    a  b c";
let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect();
assert_eq!(got, vec!["", "", "", "", "a", "", "b", "c"]);

It does not give you ["a", "b", "c"]. For that behavior, you’d want to match contiguous space characters:

use regex_automata::meta::Regex;

let re = Regex::new(r" +")?;
let hay = "    a  b c";
let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect();
// N.B. This does still include a leading empty span because ' +'
// matches at the beginning of the haystack.
assert_eq!(got, vec!["", "a", "b", "c"]);

pub fn splitn<'r, 'h, I>(&'r self, input: I, limit: usize) -> SplitN<'r, 'h>
where I: Into<Input<'h>>,

Returns an iterator of at most limit spans of the haystack given, delimited by a match of the regex. (A limit of 0 will return no spans.) Namely, each element of the iterator corresponds to a part of the haystack that isn’t matched by the regular expression. The remainder of the haystack that is not split will be the last element in the iterator.

§Example

Get the first two words in some haystack:

use regex_automata::meta::Regex;

let re = Regex::new(r"\W+").unwrap();
let hay = "Hey! How are you?";
let fields: Vec<&str> =
    re.splitn(hay, 3).map(|span| &hay[span]).collect();
assert_eq!(fields, vec!["Hey", "How", "are you?"]);

§Examples: more cases

use regex_automata::meta::Regex;

let re = Regex::new(r" ")?;
let hay = "Mary had a little lamb";
let got: Vec<&str> = re.splitn(hay, 3).map(|sp| &hay[sp]).collect();
assert_eq!(got, vec!["Mary", "had", "a little lamb"]);

let re = Regex::new(r"X")?;
let hay = "";
let got: Vec<&str> = re.splitn(hay, 3).map(|sp| &hay[sp]).collect();
assert_eq!(got, vec![""]);

let re = Regex::new(r"X")?;
let hay = "lionXXtigerXleopard";
let got: Vec<&str> = re.splitn(hay, 3).map(|sp| &hay[sp]).collect();
assert_eq!(got, vec!["lion", "", "tigerXleopard"]);

let re = Regex::new(r"::")?;
let hay = "lion::tiger::leopard";
let got: Vec<&str> = re.splitn(hay, 2).map(|sp| &hay[sp]).collect();
assert_eq!(got, vec!["lion", "tiger::leopard"]);

let re = Regex::new(r"X")?;
let hay = "abcXdef";
let got: Vec<&str> = re.splitn(hay, 1).map(|sp| &hay[sp]).collect();
assert_eq!(got, vec!["abcXdef"]);

let re = Regex::new(r"X")?;
let hay = "abcdef";
let got: Vec<&str> = re.splitn(hay, 2).map(|sp| &hay[sp]).collect();
assert_eq!(got, vec!["abcdef"]);

let re = Regex::new(r"X")?;
let hay = "abcXdef";
let got: Vec<&str> = re.splitn(hay, 0).map(|sp| &hay[sp]).collect();
assert!(got.is_empty());

pub fn search(&self, input: &Input<'_>) -> Option<Match>

Returns the start and end offset of the leftmost match. If no match exists, then None is returned.

This is like [Regex::find] but, but it accepts a concrete &Input instead of an Into<Input>.

§Example

use regex_automata::{meta::Regex, Input, Match};

let re = Regex::new(r"Samwise|Sam")?;
let input = Input::new(
    "one of the chief characters, Samwise the Brave",
);
assert_eq!(Some(Match::must(0, 29..36)), re.search(&input));

pub fn search_half(&self, input: &Input<'_>) -> Option<HalfMatch>

Returns the end offset of the leftmost match. If no match exists, then None is returned.

This is distinct from [Regex::search] in that it only returns the end of a match and not the start of the match. Depending on a variety of implementation details, this may permit the regex engine to do less overall work. For example, if a DFA is being used to execute a search, then the start of a match usually requires running a separate DFA in reverse to the find the start of a match. If one only needs the end of a match, then the separate reverse scan to find the start of a match can be skipped. (Note that the reverse scan is avoided even when using Regex::search when possible, for example, in the case of an anchored search.)

§Example

use regex_automata::{meta::Regex, Input, HalfMatch};

let re = Regex::new(r"Samwise|Sam")?;
let input = Input::new(
    "one of the chief characters, Samwise the Brave",
);
assert_eq!(Some(HalfMatch::must(0, 36)), re.search_half(&input));

pub fn search_captures(&self, input: &Input<'_>, caps: &mut Captures)

Executes a leftmost forward search and writes the spans of capturing groups that participated in a match into the provided [Captures] value. If no match was found, then [Captures::is_match] is guaranteed to return false.

This is like [Regex::captures], but it accepts a concrete &Input instead of an Into<Input>.

§Example: specific pattern search

This example shows how to build a multi-pattern Regex that permits searching for specific patterns.

use regex_automata::{
    meta::Regex,
    Anchored, Match, PatternID, Input,
};

let re = Regex::new_many(&["[a-z0-9]{6}", "[a-z][a-z0-9]{5}"])?;
let mut caps = re.create_captures();
let haystack = "foo123";

// Since we are using the default leftmost-first match and both
// patterns match at the same starting position, only the first pattern
// will be returned in this case when doing a search for any of the
// patterns.
let expected = Some(Match::must(0, 0..6));
re.search_captures(&Input::new(haystack), &mut caps);
assert_eq!(expected, caps.get_match());

// But if we want to check whether some other pattern matches, then we
// can provide its pattern ID.
let expected = Some(Match::must(1, 0..6));
let input = Input::new(haystack)
    .anchored(Anchored::Pattern(PatternID::must(1)));
re.search_captures(&input, &mut caps);
assert_eq!(expected, caps.get_match());

§Example: specifying the bounds of a search

This example shows how providing the bounds of a search can produce different results than simply sub-slicing the haystack.

use regex_automata::{meta::Regex, Match, Input};

let re = Regex::new(r"\b[0-9]{3}\b")?;
let mut caps = re.create_captures();
let haystack = "foo123bar";

// Since we sub-slice the haystack, the search doesn't know about
// the larger context and assumes that `123` is surrounded by word
// boundaries. And of course, the match position is reported relative
// to the sub-slice as well, which means we get `0..3` instead of
// `3..6`.
let expected = Some(Match::must(0, 0..3));
let input = Input::new(&haystack[3..6]);
re.search_captures(&input, &mut caps);
assert_eq!(expected, caps.get_match());

// But if we provide the bounds of the search within the context of the
// entire haystack, then the search can take the surrounding context
// into account. (And if we did find a match, it would be reported
// as a valid offset into `haystack` instead of its sub-slice.)
let expected = None;
let input = Input::new(haystack).range(3..6);
re.search_captures(&input, &mut caps);
assert_eq!(expected, caps.get_match());

pub fn search_slots( &self, input: &Input<'_>, slots: &mut [Option<NonMaxUsize>], ) -> Option<PatternID>

Executes a leftmost forward search and writes the spans of capturing groups that participated in a match into the provided slots, and returns the matching pattern ID. The contents of the slots for patterns other than the matching pattern are unspecified. If no match was found, then None is returned and the contents of slots is unspecified.

This is like [Regex::search], but it accepts a raw slots slice instead of a Captures value. This is useful in contexts where you don’t want or need to allocate a Captures.

It is legal to pass any number of slots to this routine. If the regex engine would otherwise write a slot offset that doesn’t fit in the provided slice, then it is simply skipped. In general though, there are usually three slice lengths you might want to use:

An empty slice, if you only care about which pattern matched.
A slice with pattern_len() * 2 slots, if you only care about the overall match spans for each matching pattern.
A slice with slot_len() slots, which permits recording match offsets for every capturing group in every pattern.

§Example

This example shows how to find the overall match offsets in a multi-pattern search without allocating a Captures value. Indeed, we can put our slots right on the stack.

use regex_automata::{meta::Regex, PatternID, Input};

let re = Regex::new_many(&[
    r"\pL+",
    r"\d+",
])?;
let input = Input::new("!@#123");

// We only care about the overall match offsets here, so we just
// allocate two slots for each pattern. Each slot records the start
// and end of the match.
let mut slots = [None; 4];
let pid = re.search_slots(&input, &mut slots);
assert_eq!(Some(PatternID::must(1)), pid);

// The overall match offsets are always at 'pid * 2' and 'pid * 2 + 1'.
// See 'GroupInfo' for more details on the mapping between groups and
// slot indices.
let slot_start = pid.unwrap().as_usize() * 2;
let slot_end = slot_start + 1;
assert_eq!(Some(3), slots[slot_start].map(|s| s.get()));
assert_eq!(Some(6), slots[slot_end].map(|s| s.get()));

pub fn which_overlapping_matches( &self, input: &Input<'_>, patset: &mut PatternSet, )

Writes the set of patterns that match anywhere in the given search configuration to patset. If multiple patterns match at the same position and this Regex was configured with [MatchKind::All] semantics, then all matching patterns are written to the given set.

Unless all of the patterns in this Regex are anchored, then generally speaking, this will scan the entire haystack.

This search routine does not clear the pattern set. This gives some flexibility to the caller (e.g., running multiple searches with the same pattern set), but does make the API bug-prone if you’re reusing the same pattern set for multiple searches but intended them to be independent.

If a pattern ID matched but the given PatternSet does not have sufficient capacity to store it, then it is not inserted and silently dropped.

§Example

This example shows how to find all matching patterns in a haystack, even when some patterns match at the same position as other patterns. It is important that we configure the Regex with [MatchKind::All] semantics here, or else overlapping matches will not be reported.

use regex_automata::{meta::Regex, Input, MatchKind, PatternSet};

let patterns = &[
    r"\w+", r"\d+", r"\pL+", r"foo", r"bar", r"barfoo", r"foobar",
];
let re = Regex::builder()
    .configure(Regex::config().match_kind(MatchKind::All))
    .build_many(patterns)?;

let input = Input::new("foobar");
let mut patset = PatternSet::new(re.pattern_len());
re.which_overlapping_matches(&input, &mut patset);
let expected = vec![0, 2, 3, 4, 6];
let got: Vec<usize> = patset.iter().map(|p| p.as_usize()).collect();
assert_eq!(expected, got);

pub fn search_with(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match>

This is like [Regex::search], but requires the caller to explicitly pass a [Cache].

§Why pass a `Cache` explicitly?

Passing a Cache explicitly will bypass the use of an internal memory pool used by Regex to get a Cache for a search. The use of this pool can be slower in some cases when a Regex is used from multiple threads simultaneously. Typically, performance only becomes an issue when there is heavy contention, which in turn usually only occurs when each thread’s primary unit of work is a regex search on a small haystack.

§Example

use regex_automata::{meta::Regex, Input, Match};

let re = Regex::new(r"Samwise|Sam")?;
let mut cache = re.create_cache();
let input = Input::new(
    "one of the chief characters, Samwise the Brave",
);
assert_eq!(
    Some(Match::must(0, 29..36)),
    re.search_with(&mut cache, &input),
);

pub fn search_half_with( &self, cache: &mut Cache, input: &Input<'_>, ) -> Option<HalfMatch>

This is like [Regex::search_half], but requires the caller to explicitly pass a [Cache].

§Why pass a `Cache` explicitly?

Passing a Cache explicitly will bypass the use of an internal memory pool used by Regex to get a Cache for a search. The use of this pool can be slower in some cases when a Regex is used from multiple threads simultaneously. Typically, performance only becomes an issue when there is heavy contention, which in turn usually only occurs when each thread’s primary unit of work is a regex search on a small haystack.

§Example

use regex_automata::{meta::Regex, Input, HalfMatch};

let re = Regex::new(r"Samwise|Sam")?;
let mut cache = re.create_cache();
let input = Input::new(
    "one of the chief characters, Samwise the Brave",
);
assert_eq!(
    Some(HalfMatch::must(0, 36)),
    re.search_half_with(&mut cache, &input),
);

pub fn search_captures_with( &self, cache: &mut Cache, input: &Input<'_>, caps: &mut Captures, )

This is like [Regex::search_captures], but requires the caller to explicitly pass a [Cache].

§Why pass a `Cache` explicitly?

Passing a Cache explicitly will bypass the use of an internal memory pool used by Regex to get a Cache for a search. The use of this pool can be slower in some cases when a Regex is used from multiple threads simultaneously. Typically, performance only becomes an issue when there is heavy contention, which in turn usually only occurs when each thread’s primary unit of work is a regex search on a small haystack.

§Example: specific pattern search

This example shows how to build a multi-pattern Regex that permits searching for specific patterns.

use regex_automata::{
    meta::Regex,
    Anchored, Match, PatternID, Input,
};

let re = Regex::new_many(&["[a-z0-9]{6}", "[a-z][a-z0-9]{5}"])?;
let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
let haystack = "foo123";

// Since we are using the default leftmost-first match and both
// patterns match at the same starting position, only the first pattern
// will be returned in this case when doing a search for any of the
// patterns.
let expected = Some(Match::must(0, 0..6));
re.search_captures_with(&mut cache, &Input::new(haystack), &mut caps);
assert_eq!(expected, caps.get_match());

// But if we want to check whether some other pattern matches, then we
// can provide its pattern ID.
let expected = Some(Match::must(1, 0..6));
let input = Input::new(haystack)
    .anchored(Anchored::Pattern(PatternID::must(1)));
re.search_captures_with(&mut cache, &input, &mut caps);
assert_eq!(expected, caps.get_match());

§Example: specifying the bounds of a search

This example shows how providing the bounds of a search can produce different results than simply sub-slicing the haystack.

use regex_automata::{meta::Regex, Match, Input};

let re = Regex::new(r"\b[0-9]{3}\b")?;
let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
let haystack = "foo123bar";

// Since we sub-slice the haystack, the search doesn't know about
// the larger context and assumes that `123` is surrounded by word
// boundaries. And of course, the match position is reported relative
// to the sub-slice as well, which means we get `0..3` instead of
// `3..6`.
let expected = Some(Match::must(0, 0..3));
let input = Input::new(&haystack[3..6]);
re.search_captures_with(&mut cache, &input, &mut caps);
assert_eq!(expected, caps.get_match());

// But if we provide the bounds of the search within the context of the
// entire haystack, then the search can take the surrounding context
// into account. (And if we did find a match, it would be reported
// as a valid offset into `haystack` instead of its sub-slice.)
let expected = None;
let input = Input::new(haystack).range(3..6);
re.search_captures_with(&mut cache, &input, &mut caps);
assert_eq!(expected, caps.get_match());

pub fn search_slots_with( &self, cache: &mut Cache, input: &Input<'_>, slots: &mut [Option<NonMaxUsize>], ) -> Option<PatternID>

This is like [Regex::search_slots], but requires the caller to explicitly pass a [Cache].

§Why pass a `Cache` explicitly?

Passing a Cache explicitly will bypass the use of an internal memory pool used by Regex to get a Cache for a search. The use of this pool can be slower in some cases when a Regex is used from multiple threads simultaneously. Typically, performance only becomes an issue when there is heavy contention, which in turn usually only occurs when each thread’s primary unit of work is a regex search on a small haystack.

§Example

This example shows how to find the overall match offsets in a multi-pattern search without allocating a Captures value. Indeed, we can put our slots right on the stack.

use regex_automata::{meta::Regex, PatternID, Input};

let re = Regex::new_many(&[
    r"\pL+",
    r"\d+",
])?;
let mut cache = re.create_cache();
let input = Input::new("!@#123");

// We only care about the overall match offsets here, so we just
// allocate two slots for each pattern. Each slot records the start
// and end of the match.
let mut slots = [None; 4];
let pid = re.search_slots_with(&mut cache, &input, &mut slots);
assert_eq!(Some(PatternID::must(1)), pid);

// The overall match offsets are always at 'pid * 2' and 'pid * 2 + 1'.
// See 'GroupInfo' for more details on the mapping between groups and
// slot indices.
let slot_start = pid.unwrap().as_usize() * 2;
let slot_end = slot_start + 1;
assert_eq!(Some(3), slots[slot_start].map(|s| s.get()));
assert_eq!(Some(6), slots[slot_end].map(|s| s.get()));

pub fn which_overlapping_matches_with( &self, cache: &mut Cache, input: &Input<'_>, patset: &mut PatternSet, )

This is like [Regex::which_overlapping_matches], but requires the caller to explicitly pass a [Cache].

Passing a Cache explicitly will bypass the use of an internal memory pool used by Regex to get a Cache for a search. The use of this pool can be slower in some cases when a Regex is used from multiple threads simultaneously. Typically, performance only becomes an issue when there is heavy contention, which in turn usually only occurs when each thread’s primary unit of work is a regex search on a small haystack.

§Why pass a `Cache` explicitly?

§Example

use regex_automata::{meta::Regex, Input, MatchKind, PatternSet};

let patterns = &[
    r"\w+", r"\d+", r"\pL+", r"foo", r"bar", r"barfoo", r"foobar",
];
let re = Regex::builder()
    .configure(Regex::config().match_kind(MatchKind::All))
    .build_many(patterns)?;
let mut cache = re.create_cache();

let input = Input::new("foobar");
let mut patset = PatternSet::new(re.pattern_len());
re.which_overlapping_matches_with(&mut cache, &input, &mut patset);
let expected = vec![0, 2, 3, 4, 6];
let got: Vec<usize> = patset.iter().map(|p| p.as_usize()).collect();
assert_eq!(expected, got);

pub fn create_captures(&self) -> Captures

Creates a new object for recording capture group offsets. This is used in search APIs like [Regex::captures] and [Regex::search_captures].

This is a convenience routine for Captures::all(re.group_info().clone()). Callers may build other types of Captures values that record less information (and thus require less work from the regex engine) using [Captures::matches] and [Captures::empty].

§Example

This shows some alternatives to [Regex::create_captures]:

use regex_automata::{
    meta::Regex,
    util::captures::Captures,
    Match, PatternID, Span,
};

let re = Regex::new(r"(?<first>[A-Z][a-z]+) (?<last>[A-Z][a-z]+)")?;

// This is equivalent to Regex::create_captures. It stores matching
// offsets for all groups in the regex.
let mut all = Captures::all(re.group_info().clone());
re.captures("Bruce Springsteen", &mut all);
assert_eq!(Some(Match::must(0, 0..17)), all.get_match());
assert_eq!(Some(Span::from(0..5)), all.get_group_by_name("first"));
assert_eq!(Some(Span::from(6..17)), all.get_group_by_name("last"));

// In this version, we only care about the implicit groups, which
// means offsets for the explicit groups will be unavailable. It can
// sometimes be faster to ask for fewer groups, since the underlying
// regex engine needs to do less work to keep track of them.
let mut matches = Captures::matches(re.group_info().clone());
re.captures("Bruce Springsteen", &mut matches);
// We still get the overall match info.
assert_eq!(Some(Match::must(0, 0..17)), matches.get_match());
// But now the explicit groups are unavailable.
assert_eq!(None, matches.get_group_by_name("first"));
assert_eq!(None, matches.get_group_by_name("last"));

// Finally, in this version, we don't ask to keep track of offsets for
// *any* groups. All we get back is whether a match occurred, and if
// so, the ID of the pattern that matched.
let mut empty = Captures::empty(re.group_info().clone());
re.captures("Bruce Springsteen", &mut empty);
// it's a match!
assert!(empty.is_match());
// for pattern ID 0
assert_eq!(Some(PatternID::ZERO), empty.pattern());
// Match offsets are unavailable.
assert_eq!(None, empty.get_match());
// And of course, explicit groups are unavailable too.
assert_eq!(None, empty.get_group_by_name("first"));
assert_eq!(None, empty.get_group_by_name("last"));

pub fn create_cache(&self) -> Cache

Creates a new cache for use with lower level search APIs like [Regex::search_with].

The cache returned should only be used for searches for this Regex. If you want to reuse the cache for another Regex, then you must call [Cache::reset] with that Regex.

This is a convenience routine for [Cache::new].

§Example

use regex_automata::{meta::Regex, Input, Match};

let re = Regex::new(r"(?-u)m\w+\s+m\w+")?;
let mut cache = re.create_cache();
let input = Input::new("crazy janey and her mission man");
assert_eq!(
    Some(Match::must(0, 20..31)),
    re.search_with(&mut cache, &input),
);

pub fn pattern_len(&self) -> usize

Returns the total number of patterns in this regex.

The standard [Regex::new] constructor always results in a Regex with a single pattern, but [Regex::new_many] permits building a multi-pattern regex.

A Regex guarantees that the maximum possible PatternID returned in any match is Regex::pattern_len() - 1. In the case where the number of patterns is 0, a match is impossible.

§Example

use regex_automata::meta::Regex;

let re = Regex::new(r"(?m)^[a-z]$")?;
assert_eq!(1, re.pattern_len());

let re = Regex::new_many::<&str>(&[])?;
assert_eq!(0, re.pattern_len());

let re = Regex::new_many(&["a", "b", "c"])?;
assert_eq!(3, re.pattern_len());

pub fn captures_len(&self) -> usize

Returns the total number of capturing groups.

This includes the implicit capturing group corresponding to the entire match. Therefore, the minimum value returned is 1.

§Example

This shows a few patterns and how many capture groups they have.

use regex_automata::meta::Regex;

let len = |pattern| {
    Regex::new(pattern).map(|re| re.captures_len())
};

assert_eq!(1, len("a")?);
assert_eq!(2, len("(a)")?);
assert_eq!(3, len("(a)|(b)")?);
assert_eq!(5, len("(a)(b)|(c)(d)")?);
assert_eq!(2, len("(a)|b")?);
assert_eq!(2, len("a|(b)")?);
assert_eq!(2, len("(b)*")?);
assert_eq!(2, len("(b)+")?);

§Example: multiple patterns

This routine also works for multiple patterns. The total number is the sum of the capture groups of each pattern.

use regex_automata::meta::Regex;

let len = |patterns| {
    Regex::new_many(patterns).map(|re| re.captures_len())
};

assert_eq!(2, len(&["a", "b"])?);
assert_eq!(4, len(&["(a)", "(b)"])?);
assert_eq!(6, len(&["(a)|(b)", "(c)|(d)"])?);
assert_eq!(8, len(&["(a)(b)|(c)(d)", "(x)(y)"])?);
assert_eq!(3, len(&["(a)", "b"])?);
assert_eq!(3, len(&["a", "(b)"])?);
assert_eq!(4, len(&["(a)", "(b)*"])?);
assert_eq!(4, len(&["(a)+", "(b)+"])?);

pub fn static_captures_len(&self) -> Option<usize>

Returns the total number of capturing groups that appear in every possible match.

If the number of capture groups can vary depending on the match, then this returns None. That is, a value is only returned when the number of matching groups is invariant or “static.”

Note that like [Regex::captures_len], this does include the implicit capturing group corresponding to the entire match. Therefore, when a non-None value is returned, it is guaranteed to be at least 1. Stated differently, a return value of Some(0) is impossible.

§Example

This shows a few cases where a static number of capture groups is available and a few cases where it is not.

use regex_automata::meta::Regex;

let len = |pattern| {
    Regex::new(pattern).map(|re| re.static_captures_len())
};

assert_eq!(Some(1), len("a")?);
assert_eq!(Some(2), len("(a)")?);
assert_eq!(Some(2), len("(a)|(b)")?);
assert_eq!(Some(3), len("(a)(b)|(c)(d)")?);
assert_eq!(None, len("(a)|b")?);
assert_eq!(None, len("a|(b)")?);
assert_eq!(None, len("(b)*")?);
assert_eq!(Some(2), len("(b)+")?);

§Example: multiple patterns

This property extends to regexes with multiple patterns as well. In order for their to be a static number of capture groups in this case, every pattern must have the same static number.

use regex_automata::meta::Regex;

let len = |patterns| {
    Regex::new_many(patterns).map(|re| re.static_captures_len())
};

assert_eq!(Some(1), len(&["a", "b"])?);
assert_eq!(Some(2), len(&["(a)", "(b)"])?);
assert_eq!(Some(2), len(&["(a)|(b)", "(c)|(d)"])?);
assert_eq!(Some(3), len(&["(a)(b)|(c)(d)", "(x)(y)"])?);
assert_eq!(None, len(&["(a)", "b"])?);
assert_eq!(None, len(&["a", "(b)"])?);
assert_eq!(None, len(&["(a)", "(b)*"])?);
assert_eq!(Some(2), len(&["(a)+", "(b)+"])?);

pub fn group_info(&self) -> &GroupInfo

Return information about the capture groups in this Regex.

A GroupInfo is an immutable object that can be cheaply cloned. It is responsible for maintaining a mapping between the capture groups in the concrete syntax of zero or more regex patterns and their internal representation used by some of the regex matchers. It is also responsible for maintaining a mapping between the name of each group (if one exists) and its corresponding group index.

A GroupInfo is ultimately what is used to build a [Captures] value, which is some mutable space where group offsets are stored as a result of a search.

§Example

This shows some alternatives to [Regex::create_captures]:

use regex_automata::{
    meta::Regex,
    util::captures::Captures,
    Match, PatternID, Span,
};

let re = Regex::new(r"(?<first>[A-Z][a-z]+) (?<last>[A-Z][a-z]+)")?;

// This is equivalent to Regex::create_captures. It stores matching
// offsets for all groups in the regex.
let mut all = Captures::all(re.group_info().clone());
re.captures("Bruce Springsteen", &mut all);
assert_eq!(Some(Match::must(0, 0..17)), all.get_match());
assert_eq!(Some(Span::from(0..5)), all.get_group_by_name("first"));
assert_eq!(Some(Span::from(6..17)), all.get_group_by_name("last"));

// In this version, we only care about the implicit groups, which
// means offsets for the explicit groups will be unavailable. It can
// sometimes be faster to ask for fewer groups, since the underlying
// regex engine needs to do less work to keep track of them.
let mut matches = Captures::matches(re.group_info().clone());
re.captures("Bruce Springsteen", &mut matches);
// We still get the overall match info.
assert_eq!(Some(Match::must(0, 0..17)), matches.get_match());
// But now the explicit groups are unavailable.
assert_eq!(None, matches.get_group_by_name("first"));
assert_eq!(None, matches.get_group_by_name("last"));

// Finally, in this version, we don't ask to keep track of offsets for
// *any* groups. All we get back is whether a match occurred, and if
// so, the ID of the pattern that matched.
let mut empty = Captures::empty(re.group_info().clone());
re.captures("Bruce Springsteen", &mut empty);
// it's a match!
assert!(empty.is_match());
// for pattern ID 0
assert_eq!(Some(PatternID::ZERO), empty.pattern());
// Match offsets are unavailable.
assert_eq!(None, empty.get_match());
// And of course, explicit groups are unavailable too.
assert_eq!(None, empty.get_group_by_name("first"));
assert_eq!(None, empty.get_group_by_name("last"));

pub fn get_config(&self) -> &Config

Returns the configuration object used to build this Regex.

If no configuration object was explicitly passed, then the configuration returned represents the default.

pub fn is_accelerated(&self) -> bool

Returns true if this regex has a high chance of being “accelerated.”

The precise meaning of “accelerated” is specifically left unspecified, but the general meaning is that the search is a high likelihood of running faster than a character-at-a-time loop inside a standard regex engine.

When a regex is accelerated, it is only a probabilistic claim. That is, just because the regex is believed to be accelerated, that doesn’t mean it will definitely execute searches very fast. Similarly, if a regex is not accelerated, that is also a probabilistic claim. That is, a regex for which is_accelerated returns false could still run searches more quickly than a regex for which is_accelerated returns true.

Whether a regex is marked as accelerated or not is dependent on implementations details that may change in a semver compatible release. That is, a regex that is accelerated in a x.y.1 release might not be accelerated in a x.y.2 release.

Basically, the value of acceleration boils down to a hedge: a hodge podge of internal heuristics combine to make a probabilistic guess that this regex search may run “fast.” The value in knowing this from a caller’s perspective is that it may act as a signal that no further work should be done to accelerate a search. For example, a grep-like tool might try to do some extra work extracting literals from a regex to create its own heuristic acceleration strategies. But it might choose to defer to this crate’s acceleration strategy if one exists. This routine permits querying whether such a strategy is active for a particular regex.

§Example

use regex_automata::meta::Regex;

// A simple literal is very likely to be accelerated.
let re = Regex::new(r"foo")?;
assert!(re.is_accelerated());

// A regex with no literals is likely to not be accelerated.
let re = Regex::new(r"\w")?;
assert!(!re.is_accelerated());

pub fn memory_usage(&self) -> usize

Return the total approximate heap memory, in bytes, used by this Regex.

Note that currently, there is no high level configuration for setting a limit on the specific value returned by this routine. Instead, the following routines can be used to control heap memory at a bit of a lower level:

[Config::nfa_size_limit] controls how big any of the NFAs are allowed to be.
[Config::onepass_size_limit] controls how big the one-pass DFA is allowed to be.
[Config::hybrid_cache_capacity] controls how much memory the lazy DFA is permitted to allocate to store its transition table.
[Config::dfa_size_limit] controls how big a fully compiled DFA is allowed to be.
[Config::dfa_state_limit] controls the conditions under which the meta regex engine will even attempt to build a fully compiled DFA.

Struct SharedRegex Copy item path

Fields§

Methods from Deref<Target = MetaRegex>§

pub fn is_match<'h, I>(&self, input: I) -> boolwhere I: Into<Input<'h>>,

§Example

§Example: consistency with search APIs

pub fn find<'h, I>(&self, input: I) -> Option<Match>where I: Into<Input<'h>>,

§Example

pub fn captures<'h, I>(&self, input: I, caps: &mut Captures)where I: Into<Input<'h>>,

§Example

pub fn find_iter<'r, 'h, I>(&'r self, input: I) -> FindMatches<'r, 'h>where I: Into<Input<'h>>,

§Example

pub fn captures_iter<'r, 'h, I>(&'r self, input: I) -> CapturesMatches<'r, 'h>where I: Into<Input<'h>>,

§Example

pub fn split<'r, 'h, I>(&'r self, input: I) -> Split<'r, 'h>where I: Into<Input<'h>>,

§Example

§Example: more cases

pub fn splitn<'r, 'h, I>(&'r self, input: I, limit: usize) -> SplitN<'r, 'h>where I: Into<Input<'h>>,

§Example

§Examples: more cases

pub fn search(&self, input: &Input<'_>) -> Option<Match>

§Example

pub fn search_half(&self, input: &Input<'_>) -> Option<HalfMatch>

§Example

pub fn search_captures(&self, input: &Input<'_>, caps: &mut Captures)

§Example: specific pattern search

§Example: specifying the bounds of a search

pub fn search_slots( &self, input: &Input<'_>, slots: &mut [Option<NonMaxUsize>], ) -> Option<PatternID>

§Example

pub fn which_overlapping_matches( &self, input: &Input<'_>, patset: &mut PatternSet, )

§Example

pub fn search_with(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match>

§Why pass a Cache explicitly?

§Example

pub fn search_half_with( &self, cache: &mut Cache, input: &Input<'_>, ) -> Option<HalfMatch>

§Why pass a Cache explicitly?

§Example

pub fn search_captures_with( &self, cache: &mut Cache, input: &Input<'_>, caps: &mut Captures, )

§Why pass a Cache explicitly?

§Example: specific pattern search

§Example: specifying the bounds of a search

pub fn search_slots_with( &self, cache: &mut Cache, input: &Input<'_>, slots: &mut [Option<NonMaxUsize>], ) -> Option<PatternID>

§Why pass a Cache explicitly?

§Example

pub fn which_overlapping_matches_with( &self, cache: &mut Cache, input: &Input<'_>, patset: &mut PatternSet, )

§Why pass a Cache explicitly?

§Example

pub fn create_captures(&self) -> Captures

§Example

pub fn create_cache(&self) -> Cache

§Example

pub fn pattern_len(&self) -> usize

§Example

pub fn captures_len(&self) -> usize

§Example

§Example: multiple patterns

pub fn static_captures_len(&self) -> Option<usize>

§Example

§Example: multiple patterns

pub fn group_info(&self) -> &GroupInfo

§Example

pub fn get_config(&self) -> &Config

pub fn is_accelerated(&self) -> bool

§Example

pub fn memory_usage(&self) -> usize

Trait Implementations§

impl Clone for SharedRegex

fn clone(&self) -> SharedRegex

fn clone_from(&mut self, source: &Self)

impl Debug for SharedRegex

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl Deref for SharedRegex

type Target = Regex

fn deref(&self) -> &Self::Target

Auto Trait Implementations§

impl Freeze for SharedRegex

impl RefUnwindSafe for SharedRegex

impl Send for SharedRegex

impl Sync for SharedRegex

impl Unpin for SharedRegex

Struct SharedRegex

pub fn is_match<'h, I>(&self, input: I) -> bool
where I: Into<Input<'h>>,

pub fn find<'h, I>(&self, input: I) -> Option<Match>
where I: Into<Input<'h>>,

pub fn captures<'h, I>(&self, input: I, caps: &mut Captures)
where I: Into<Input<'h>>,

pub fn find_iter<'r, 'h, I>(&'r self, input: I) -> FindMatches<'r, 'h>
where I: Into<Input<'h>>,

pub fn captures_iter<'r, 'h, I>(&'r self, input: I) -> CapturesMatches<'r, 'h>
where I: Into<Input<'h>>,

pub fn split<'r, 'h, I>(&'r self, input: I) -> Split<'r, 'h>
where I: Into<Input<'h>>,

pub fn splitn<'r, 'h, I>(&'r self, input: I, limit: usize) -> SplitN<'r, 'h>
where I: Into<Input<'h>>,

§Why pass a `Cache` explicitly?

§Why pass a `Cache` explicitly?

§Why pass a `Cache` explicitly?

§Why pass a `Cache` explicitly?

§Why pass a `Cache` explicitly?

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<T> PolicyExt for T
where T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

impl<P, T> Receiver for P
where P: Deref<Target = T> + ?Sized, T: ?Sized,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,