class RE2::Regexp
Public Class Methods
Returns a version of str with all potentially meaningful regexp characters escaped. The returned string, used as a regular expression, will exactly match the original string.
@param [String] unquoted the unquoted string @return [String] the escaped string @example
RE2::Regexp.escape("1.5-2.0?") #=> "1\.5\-2\.0\?"
static VALUE re2_QuoteMeta(VALUE self, VALUE unquoted) { UNUSED(self); string quoted_string = RE2::QuoteMeta(StringValuePtr(unquoted)); return rb_str_new(quoted_string.data(), quoted_string.size()); }
Returns a new {RE2::Regexp} object with a compiled version of
pattern
stored inside.
@return [RE2::Regexp]
@overload initialize(pattern)
Returns a new {RE2::Regexp} object with a compiled version of +pattern+ stored inside with the default options. @param [String] pattern the pattern to compile @return [RE2::Regexp] an RE2::Regexp with the specified pattern @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
@overload initialize(pattern, options)
Returns a new {RE2::Regexp} object with a compiled version of +pattern+ stored inside with the specified options. @param [String] pattern the pattern to compile @param [Hash] options the options with which to compile the pattern @option options [Boolean] :utf8 (true) text and pattern are UTF-8; otherwise Latin-1 @option options [Boolean] :posix_syntax (false) restrict regexps to POSIX egrep syntax @option options [Boolean] :longest_match (false) search for longest match, not first match @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR @option options [Fixnum] :max_mem approx. max memory footprint of RE2 @option options [Boolean] :literal (false) interpret string as literal, not regexp @option options [Boolean] :never_nl (false) never match \n, even if it is in regexp @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode) @option options [Boolean] :perl_classes (false) allow Perl's \d \s \w \D \S \W when in posix_syntax mode @option options [Boolean] :word_boundary (false) allow \b \B (word boundary and not) when in posix_syntax mode @option options [Boolean] :one_line (false) ^ and $ only match beginning and end of text when in posix_syntax mode @return [RE2::Regexp] an RE2::Regexp with the specified pattern and options @raise [NoMemoryError] if memory could not be allocated for the compiled pattern
static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) { VALUE pattern, options, utf8, posix_syntax, longest_match, log_errors, max_mem, literal, never_nl, case_sensitive, perl_classes, word_boundary, one_line; re2_pattern *p; rb_scan_args(argc, argv, "11", &pattern, &options); Data_Get_Struct(self, re2_pattern, p); if (RTEST(options)) { if (TYPE(options) != T_HASH) { rb_raise(rb_eArgError, "options should be a hash"); } RE2::Options re2_options; utf8 = rb_hash_aref(options, ID2SYM(id_utf8)); if (!NIL_P(utf8)) { re2_options.set_utf8(RTEST(utf8)); } posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax)); if (!NIL_P(posix_syntax)) { re2_options.set_posix_syntax(RTEST(posix_syntax)); } longest_match = rb_hash_aref(options, ID2SYM(id_longest_match)); if (!NIL_P(longest_match)) { re2_options.set_longest_match(RTEST(longest_match)); } log_errors = rb_hash_aref(options, ID2SYM(id_log_errors)); if (!NIL_P(log_errors)) { re2_options.set_log_errors(RTEST(log_errors)); } max_mem = rb_hash_aref(options, ID2SYM(id_max_mem)); if (!NIL_P(max_mem)) { re2_options.set_max_mem(NUM2INT(max_mem)); } literal = rb_hash_aref(options, ID2SYM(id_literal)); if (!NIL_P(literal)) { re2_options.set_literal(RTEST(literal)); } never_nl = rb_hash_aref(options, ID2SYM(id_never_nl)); if (!NIL_P(never_nl)) { re2_options.set_never_nl(RTEST(never_nl)); } case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive)); if (!NIL_P(case_sensitive)) { re2_options.set_case_sensitive(RTEST(case_sensitive)); } perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes)); if (!NIL_P(perl_classes)) { re2_options.set_perl_classes(RTEST(perl_classes)); } word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary)); if (!NIL_P(word_boundary)) { re2_options.set_word_boundary(RTEST(word_boundary)); } one_line = rb_hash_aref(options, ID2SYM(id_one_line)); if (!NIL_P(one_line)) { re2_options.set_one_line(RTEST(one_line)); } p->pattern = new(nothrow) RE2(StringValuePtr(pattern), re2_options); } else { p->pattern = new(nothrow) RE2(StringValuePtr(pattern)); } if (p->pattern == 0) { rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object"); } return self; }
Returns a version of str with all potentially meaningful regexp characters escaped. The returned string, used as a regular expression, will exactly match the original string.
@param [String] unquoted the unquoted string @return [String] the escaped string @example
RE2::Regexp.escape("1.5-2.0?") #=> "1\.5\-2\.0\?"
static VALUE re2_QuoteMeta(VALUE self, VALUE unquoted) { UNUSED(self); string quoted_string = RE2::QuoteMeta(StringValuePtr(unquoted)); return rb_str_new(quoted_string.data(), quoted_string.size()); }
Public Instance Methods
Returns true or false to indicate a successful match. Equivalent to +re2.match(text, 0)+.
@return [Boolean] whether the match was successful
static VALUE re2_regexp_match_query(VALUE self, VALUE text) { VALUE argv[2]; argv[0] = text; argv[1] = INT2FIX(0); return re2_regexp_match(2, argv, self); }
Returns true or false to indicate a successful match. Equivalent to +re2.match(text, 0)+.
@return [Boolean] whether the match was successful
static VALUE re2_regexp_match_query(VALUE self, VALUE text) { VALUE argv[2]; argv[0] = text; argv[1] = INT2FIX(0); return re2_regexp_match(2, argv, self); }
Returns whether or not the regular expression re2
was compiled
with the case_sensitive option set to false.
@return [Boolean] the inverse of the case_sensitive option @example
re2 = RE2::Regexp.new("woo?", :case_sensitive => true) re2.case_insensitive? #=> false re2.casefold? #=> false
static VALUE re2_regexp_case_insensitive(VALUE self) { return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue); }
Returns whether or not the regular expression re2
was compiled
with the case_sensitive option set to true.
@return [Boolean] the case_sensitive option @example
re2 = RE2::Regexp.new("woo?", :case_sensitive => true) re2.case_sensitive? #=> true
static VALUE re2_regexp_case_sensitive(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().case_sensitive()); }
Returns whether or not the regular expression re2
was compiled
with the case_sensitive option set to false.
@return [Boolean] the inverse of the case_sensitive option @example
re2 = RE2::Regexp.new("woo?", :case_sensitive => true) re2.case_insensitive? #=> false re2.casefold? #=> false
static VALUE re2_regexp_case_insensitive(VALUE self) { return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue); }
If the RE2 could not be created properly, returns an error string otherwise returns nil.
@return [String, nil] the error string or nil
static VALUE re2_regexp_error(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); if (p->pattern->ok()) { return Qnil; } else { return rb_str_new(p->pattern->error().data(), p->pattern->error().size()); } }
If the RE2 could not be created properly, returns the offending portion of the regexp otherwise returns nil.
@return [String, nil] the offending portion of the regexp or nil
static VALUE re2_regexp_error_arg(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); if (p->pattern->ok()) { return Qnil; } else { return ENCODED_STR_NEW(p->pattern->error_arg().data(), p->pattern->error_arg().size(), p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"); } }
Returns a printable version of the regular expression re2
.
@return [String] a printable version of the regular expression @example
re2 = RE2::Regexp.new("woo?") re2.inspect #=> "#<RE2::Regexp /woo?/>"
static VALUE re2_regexp_inspect(VALUE self) { re2_pattern *p; VALUE result; ostringstream output; Data_Get_Struct(self, re2_pattern, p); output << "#<RE2::Regexp /" << p->pattern->pattern() << "/>"; result = ENCODED_STR_NEW(output.str().data(), output.str().length(), p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"); return result; }
Returns whether or not the regular expression re2
was compiled
with the literal option set to true.
@return [Boolean] the literal option @example
re2 = RE2::Regexp.new("woo?", :literal => true) re2.literal? #=> true
static VALUE re2_regexp_literal(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().literal()); }
Returns whether or not the regular expression re2
was compiled
with the log_errors option set to true.
@return [Boolean] the log_errors option @example
re2 = RE2::Regexp.new("woo?", :log_errors => true) re2.log_errors? #=> true
static VALUE re2_regexp_log_errors(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().log_errors()); }
Returns whether or not the regular expression re2
was compiled
with the longest_match option set to true.
@return [Boolean] the longest_match option @example
re2 = RE2::Regexp.new("woo?", :longest_match => true) re2.longest_match? #=> true
static VALUE re2_regexp_longest_match(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().longest_match()); }
Match the pattern against the given text
and return either a
boolean (if no submatches are required) or a {RE2::MatchData} instance.
@return [Boolean, RE2::MatchData]
@overload match(text)
Returns an {RE2::MatchData} containing the matching pattern and all subpatterns resulting from looking for the regexp in +text+. @param [String] text the text to search @return [RE2::MatchData] the matches @raise [NoMemoryError] if there was not enough memory to allocate the matches @example r = RE2::Regexp.new('w(o)(o)') r.match('woo') #=> #<RE2::MatchData "woo" 1:"o" 2:"o">
@overload match(text, 0)
Returns either true or false indicating whether a successful match was made. @param [String] text the text to search @return [Boolean] whether the match was successful @raise [NoMemoryError] if there was not enough memory to allocate the matches @example r = RE2::Regexp.new('w(o)(o)') r.match('woo', 0) #=> true r.match('bob', 0) #=> false
@overload match(text, number_of_matches)
See +match(text)+ but with a specific number of matches returned (padded with nils if necessary). @param [String] text the text to search @param [Fixnum] number_of_matches the number of matches to return @return [RE2::MatchData] the matches @raise [NoMemoryError] if there was not enough memory to allocate the matches @example r = RE2::Regexp.new('w(o)(o)') r.match('woo', 1) #=> #<RE2::MatchData "woo" 1:"o"> r.match('woo', 3) #=> #<RE2::MatchData "woo" 1:"o" 2:"o" 3:nil>
static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) { int n; bool matched; re2_pattern *p; re2_matchdata *m; VALUE text, number_of_matches, matchdata; rb_scan_args(argc, argv, "11", &text, &number_of_matches); /* Ensure text is a string. */ text = StringValue(text); Data_Get_Struct(self, re2_pattern, p); if (RTEST(number_of_matches)) { n = NUM2INT(number_of_matches); } else { n = p->pattern->NumberOfCapturingGroups(); } if (n == 0) { matched = match(p->pattern, StringValuePtr(text), 0, static_cast<int>(RSTRING_LEN(text)), RE2::UNANCHORED, 0, 0); return BOOL2RUBY(matched); } else { /* Because match returns the whole match as well. */ n += 1; matchdata = rb_class_new_instance(0, 0, re2_cMatchData); Data_Get_Struct(matchdata, re2_matchdata, m); m->matches = new(nothrow) re2::StringPiece[n]; m->regexp = self; m->text = rb_str_dup(text); rb_str_freeze(m->text); if (m->matches == 0) { rb_raise(rb_eNoMemError, "not enough memory to allocate StringPieces for matches"); } m->number_of_matches = n; matched = match(p->pattern, StringValuePtr(m->text), 0, static_cast<int>(RSTRING_LEN(m->text)), RE2::UNANCHORED, m->matches, n); if (matched) { return matchdata; } else { return Qnil; } } }
Returns true or false to indicate a successful match. Equivalent to +re2.match(text, 0)+.
@return [Boolean] whether the match was successful
static VALUE re2_regexp_match_query(VALUE self, VALUE text) { VALUE argv[2]; argv[0] = text; argv[1] = INT2FIX(0); return re2_regexp_match(2, argv, self); }
Returns the #max_mem setting for
the regular expression re2
.
@return [Fixnum] the #max_mem option @example
re2 = RE2::Regexp.new("woo?", :max_mem => 1024) re2.max_mem #=> 1024
static VALUE re2_regexp_max_mem(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return INT2FIX(p->pattern->options().max_mem()); }
Returns a hash of names to capturing indices of groups.
@return [Hash] a hash of names to capturing indices
static VALUE re2_regexp_named_capturing_groups(VALUE self) { VALUE capturing_groups; re2_pattern *p; map<string, int> groups; map<string, int>::iterator iterator; Data_Get_Struct(self, re2_pattern, p); groups = p->pattern->NamedCapturingGroups(); capturing_groups = rb_hash_new(); for (iterator = groups.begin(); iterator != groups.end(); iterator++) { rb_hash_aset(capturing_groups, ENCODED_STR_NEW(iterator->first.data(), iterator->first.size(), p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"), INT2FIX(iterator->second)); } return capturing_groups; }
Returns whether or not the regular expression re2
was compiled
with the never_nl option set to true.
@return [Boolean] the never_nl option @example
re2 = RE2::Regexp.new("woo?", :never_nl => true) re2.never_nl? #=> true
static VALUE re2_regexp_never_nl(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().never_nl()); }
Returns the number of capturing subpatterns, or -1 if the regexp wasn't valid on construction. The overall match ($0) does not count: if the regexp is “(a)(b)”, returns 2.
@return [Fixnum] the number of capturing subpatterns
static VALUE re2_regexp_number_of_capturing_groups(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return INT2FIX(p->pattern->NumberOfCapturingGroups()); }
Returns whether or not the regular expression re2
was compiled
successfully or not.
@return [Boolean] whether or not compilation was successful @example
re2 = RE2::Regexp.new("woo?") re2.ok? #=> true
static VALUE re2_regexp_ok(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->ok()); }
Returns whether or not the regular expression re2
was compiled
with the one_line option set to true.
@return [Boolean] the one_line option @example
re2 = RE2::Regexp.new("woo?", :one_line => true) re2.one_line? #=> true
static VALUE re2_regexp_one_line(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().one_line()); }
Returns a hash of the options currently set for re2
.
@return [Hash] the options
static VALUE re2_regexp_options(VALUE self) { VALUE options; re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); options = rb_hash_new(); rb_hash_aset(options, ID2SYM(id_utf8), BOOL2RUBY(p->pattern->options().utf8())); rb_hash_aset(options, ID2SYM(id_posix_syntax), BOOL2RUBY(p->pattern->options().posix_syntax())); rb_hash_aset(options, ID2SYM(id_longest_match), BOOL2RUBY(p->pattern->options().longest_match())); rb_hash_aset(options, ID2SYM(id_log_errors), BOOL2RUBY(p->pattern->options().log_errors())); rb_hash_aset(options, ID2SYM(id_max_mem), INT2FIX(p->pattern->options().max_mem())); rb_hash_aset(options, ID2SYM(id_literal), BOOL2RUBY(p->pattern->options().literal())); rb_hash_aset(options, ID2SYM(id_never_nl), BOOL2RUBY(p->pattern->options().never_nl())); rb_hash_aset(options, ID2SYM(id_case_sensitive), BOOL2RUBY(p->pattern->options().case_sensitive())); rb_hash_aset(options, ID2SYM(id_perl_classes), BOOL2RUBY(p->pattern->options().perl_classes())); rb_hash_aset(options, ID2SYM(id_word_boundary), BOOL2RUBY(p->pattern->options().word_boundary())); rb_hash_aset(options, ID2SYM(id_one_line), BOOL2RUBY(p->pattern->options().one_line())); /* This is a read-only hash after all... */ rb_obj_freeze(options); return options; }
Returns a string version of the regular expression re2
.
@return [String] a string version of the regular expression @example
re2 = RE2::Regexp.new("woo?") re2.to_s #=> "woo?"
static VALUE re2_regexp_to_s(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return ENCODED_STR_NEW(p->pattern->pattern().data(), p->pattern->pattern().size(), p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"); }
Returns whether or not the regular expression re2
was compiled
with the perl_classes option set to true.
@return [Boolean] the perl_classes option @example
re2 = RE2::Regexp.new("woo?", :perl_classes => true) re2.perl_classes? #=> true
static VALUE re2_regexp_perl_classes(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().perl_classes()); }
Returns whether or not the regular expression re2
was compiled
with the posix_syntax option set to true.
@return [Boolean] the posix_syntax option @example
re2 = RE2::Regexp.new("woo?", :posix_syntax => true) re2.posix_syntax? #=> true
static VALUE re2_regexp_posix_syntax(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().posix_syntax()); }
Returns the program size, a very approximate measure of a regexp's “cost”. Larger numbers are more expensive than smaller numbers.
@return [Fixnum] the regexp “cost”
static VALUE re2_regexp_program_size(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return INT2FIX(p->pattern->ProgramSize()); }
Returns a {RE2::Scanner} for scanning the given text incrementally.
@example
c = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
static VALUE re2_regexp_scan(VALUE self, VALUE text) { re2_pattern *p; re2_scanner *c; VALUE scanner; Data_Get_Struct(self, re2_pattern, p); scanner = rb_class_new_instance(0, 0, re2_cScanner); Data_Get_Struct(scanner, re2_scanner, c); c->input = new(nothrow) re2::StringPiece(StringValuePtr(text)); c->regexp = self; c->text = text; c->number_of_capturing_groups = p->pattern->NumberOfCapturingGroups(); c->eof = false; return scanner; }
Returns a string version of the regular expression re2
.
@return [String] a string version of the regular expression @example
re2 = RE2::Regexp.new("woo?") re2.to_s #=> "woo?"
static VALUE re2_regexp_to_s(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return ENCODED_STR_NEW(p->pattern->pattern().data(), p->pattern->pattern().size(), p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"); }
Returns a string version of the regular expression re2
.
@return [String] a string version of the regular expression @example
re2 = RE2::Regexp.new("woo?") re2.to_s #=> "woo?"
static VALUE re2_regexp_to_s(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return ENCODED_STR_NEW(p->pattern->pattern().data(), p->pattern->pattern().size(), p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"); }
Returns a string version of the regular expression re2
.
@return [String] a string version of the regular expression @example
re2 = RE2::Regexp.new("woo?") re2.to_s #=> "woo?"
static VALUE re2_regexp_to_s(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return ENCODED_STR_NEW(p->pattern->pattern().data(), p->pattern->pattern().size(), p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"); }
Returns whether or not the regular expression re2
was compiled
with the utf8 option set to true.
@return [Boolean] the utf8 option @example
re2 = RE2::Regexp.new("woo?", :utf8 => true) re2.utf8? #=> true
static VALUE re2_regexp_utf8(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().utf8()); }
Returns whether or not the regular expression re2
was compiled
with the word_boundary option set to true.
@return [Boolean] the word_boundary option @example
re2 = RE2::Regexp.new("woo?", :word_boundary => true) re2.word_boundary? #=> true
static VALUE re2_regexp_word_boundary(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().word_boundary()); }