class RE2::Regexp

Public Class Methods

escape(p1) click to toggle source

Returns a version of str with all potentially meaningful regexp characters escaped. The returned string, used as a regular expression, will exactly match the original string.

@param [String] unquoted the unquoted string @return [String] the escaped string @example

RE2::Regexp.escape("1.5-2.0?")    #=> "1\.5\-2\.0\?"
static VALUE re2_QuoteMeta(VALUE self, VALUE unquoted) {
  UNUSED(self);
  string quoted_string = RE2::QuoteMeta(StringValuePtr(unquoted));
  return rb_str_new(quoted_string.data(), quoted_string.size());
}
new(p1, p2 = v2) click to toggle source

Returns a new {RE2::Regexp} object with a compiled version of pattern stored inside.

@return [RE2::Regexp]

@overload initialize(pattern)

Returns a new {RE2::Regexp} object with a compiled version of
+pattern+ stored inside with the default options.

@param [String] pattern the pattern to compile
@return [RE2::Regexp] an RE2::Regexp with the specified pattern
@raise [NoMemoryError] if memory could not be allocated for the compiled
                       pattern

@overload initialize(pattern, options)

Returns a new {RE2::Regexp} object with a compiled version of
+pattern+ stored inside with the specified options.

@param [String] pattern the pattern to compile
@param [Hash] options the options with which to compile the pattern
@option options [Boolean] :utf8 (true) text and pattern are UTF-8; otherwise Latin-1
@option options [Boolean] :posix_syntax (false) restrict regexps to POSIX egrep syntax
@option options [Boolean] :longest_match (false) search for longest match, not first match
@option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR
@option options [Fixnum] :max_mem approx. max memory footprint of RE2
@option options [Boolean] :literal (false) interpret string as literal, not regexp
@option options [Boolean] :never_nl (false) never match \n, even if it is in regexp
@option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode)
@option options [Boolean] :perl_classes (false) allow Perl's \d \s \w \D \S \W when in posix_syntax mode
@option options [Boolean] :word_boundary (false) allow \b \B (word boundary and not) when in posix_syntax mode
@option options [Boolean] :one_line (false) ^ and $ only match beginning and end of text when in posix_syntax mode
@return [RE2::Regexp] an RE2::Regexp with the specified pattern and options
@raise [NoMemoryError] if memory could not be allocated for the compiled pattern
static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) {
  VALUE pattern, options, utf8, posix_syntax, longest_match, log_errors,
        max_mem, literal, never_nl, case_sensitive, perl_classes,
        word_boundary, one_line;
  re2_pattern *p;

  rb_scan_args(argc, argv, "11", &pattern, &options);
  Data_Get_Struct(self, re2_pattern, p);

  if (RTEST(options)) {
    if (TYPE(options) != T_HASH) {
      rb_raise(rb_eArgError, "options should be a hash");
    }

    RE2::Options re2_options;

    utf8 = rb_hash_aref(options, ID2SYM(id_utf8));
    if (!NIL_P(utf8)) {
      re2_options.set_utf8(RTEST(utf8));
    }

    posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax));
    if (!NIL_P(posix_syntax)) {
      re2_options.set_posix_syntax(RTEST(posix_syntax));
    }

    longest_match = rb_hash_aref(options, ID2SYM(id_longest_match));
    if (!NIL_P(longest_match)) {
      re2_options.set_longest_match(RTEST(longest_match));
    }

    log_errors = rb_hash_aref(options, ID2SYM(id_log_errors));
    if (!NIL_P(log_errors)) {
      re2_options.set_log_errors(RTEST(log_errors));
    }

    max_mem = rb_hash_aref(options, ID2SYM(id_max_mem));
    if (!NIL_P(max_mem)) {
      re2_options.set_max_mem(NUM2INT(max_mem));
    }

    literal = rb_hash_aref(options, ID2SYM(id_literal));
    if (!NIL_P(literal)) {
      re2_options.set_literal(RTEST(literal));
    }

    never_nl = rb_hash_aref(options, ID2SYM(id_never_nl));
    if (!NIL_P(never_nl)) {
      re2_options.set_never_nl(RTEST(never_nl));
    }

    case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive));
    if (!NIL_P(case_sensitive)) {
      re2_options.set_case_sensitive(RTEST(case_sensitive));
    }

    perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes));
    if (!NIL_P(perl_classes)) {
      re2_options.set_perl_classes(RTEST(perl_classes));
    }

    word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary));
    if (!NIL_P(word_boundary)) {
      re2_options.set_word_boundary(RTEST(word_boundary));
    }

    one_line = rb_hash_aref(options, ID2SYM(id_one_line));
    if (!NIL_P(one_line)) {
      re2_options.set_one_line(RTEST(one_line));
    }

    p->pattern = new(nothrow) RE2(StringValuePtr(pattern), re2_options);
  } else {
    p->pattern = new(nothrow) RE2(StringValuePtr(pattern));
  }

  if (p->pattern == 0) {
    rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
  }

  return self;
}
quote(p1) click to toggle source

Returns a version of str with all potentially meaningful regexp characters escaped. The returned string, used as a regular expression, will exactly match the original string.

@param [String] unquoted the unquoted string @return [String] the escaped string @example

RE2::Regexp.escape("1.5-2.0?")    #=> "1\.5\-2\.0\?"
static VALUE re2_QuoteMeta(VALUE self, VALUE unquoted) {
  UNUSED(self);
  string quoted_string = RE2::QuoteMeta(StringValuePtr(unquoted));
  return rb_str_new(quoted_string.data(), quoted_string.size());
}

Public Instance Methods

===(p1) click to toggle source

Returns true or false to indicate a successful match. Equivalent to +re2.match(text, 0)+.

@return [Boolean] whether the match was successful

static VALUE re2_regexp_match_query(VALUE self, VALUE text) {
  VALUE argv[2];
  argv[0] = text;
  argv[1] = INT2FIX(0);

  return re2_regexp_match(2, argv, self);
}
=~(p1) click to toggle source

Returns true or false to indicate a successful match. Equivalent to +re2.match(text, 0)+.

@return [Boolean] whether the match was successful

static VALUE re2_regexp_match_query(VALUE self, VALUE text) {
  VALUE argv[2];
  argv[0] = text;
  argv[1] = INT2FIX(0);

  return re2_regexp_match(2, argv, self);
}
case_insensitive?() click to toggle source

Returns whether or not the regular expression re2 was compiled with the case_sensitive option set to false.

@return [Boolean] the inverse of the case_sensitive option @example

re2 = RE2::Regexp.new("woo?", :case_sensitive => true)
re2.case_insensitive?    #=> false
re2.casefold?    #=> false
static VALUE re2_regexp_case_insensitive(VALUE self) {
  return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue);
}
case_sensitive?() click to toggle source

Returns whether or not the regular expression re2 was compiled with the case_sensitive option set to true.

@return [Boolean] the case_sensitive option @example

re2 = RE2::Regexp.new("woo?", :case_sensitive => true)
re2.case_sensitive?    #=> true
static VALUE re2_regexp_case_sensitive(VALUE self) {
  re2_pattern *p;
  Data_Get_Struct(self, re2_pattern, p);
  return BOOL2RUBY(p->pattern->options().case_sensitive());
}
casefold?() click to toggle source

Returns whether or not the regular expression re2 was compiled with the case_sensitive option set to false.

@return [Boolean] the inverse of the case_sensitive option @example

re2 = RE2::Regexp.new("woo?", :case_sensitive => true)
re2.case_insensitive?    #=> false
re2.casefold?    #=> false
static VALUE re2_regexp_case_insensitive(VALUE self) {
  return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue);
}
error() click to toggle source

If the RE2 could not be created properly, returns an error string otherwise returns nil.

@return [String, nil] the error string or nil

static VALUE re2_regexp_error(VALUE self) {
  re2_pattern *p;
  Data_Get_Struct(self, re2_pattern, p);
  if (p->pattern->ok()) {
    return Qnil;
  } else {
    return rb_str_new(p->pattern->error().data(), p->pattern->error().size());
  }
}
error_arg() click to toggle source

If the RE2 could not be created properly, returns the offending portion of the regexp otherwise returns nil.

@return [String, nil] the offending portion of the regexp or nil

static VALUE re2_regexp_error_arg(VALUE self) {
  re2_pattern *p;
  Data_Get_Struct(self, re2_pattern, p);
  if (p->pattern->ok()) {
    return Qnil;
  } else {
    return ENCODED_STR_NEW(p->pattern->error_arg().data(),
        p->pattern->error_arg().size(),
        p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
  }
}
inspect() click to toggle source

Returns a printable version of the regular expression re2.

@return [String] a printable version of the regular expression @example

re2 = RE2::Regexp.new("woo?")
re2.inspect    #=> "#<RE2::Regexp /woo?/>"
static VALUE re2_regexp_inspect(VALUE self) {
  re2_pattern *p;
  VALUE result;
  ostringstream output;

  Data_Get_Struct(self, re2_pattern, p);

  output << "#<RE2::Regexp /" << p->pattern->pattern() << "/>";

  result = ENCODED_STR_NEW(output.str().data(), output.str().length(),
      p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");

  return result;
}
literal?() click to toggle source

Returns whether or not the regular expression re2 was compiled with the literal option set to true.

@return [Boolean] the literal option @example

re2 = RE2::Regexp.new("woo?", :literal => true)
re2.literal?    #=> true
static VALUE re2_regexp_literal(VALUE self) {
  re2_pattern *p;
  Data_Get_Struct(self, re2_pattern, p);
  return BOOL2RUBY(p->pattern->options().literal());
}
log_errors?() click to toggle source

Returns whether or not the regular expression re2 was compiled with the log_errors option set to true.

@return [Boolean] the log_errors option @example

re2 = RE2::Regexp.new("woo?", :log_errors => true)
re2.log_errors?    #=> true
static VALUE re2_regexp_log_errors(VALUE self) {
  re2_pattern *p;
  Data_Get_Struct(self, re2_pattern, p);
  return BOOL2RUBY(p->pattern->options().log_errors());
}
longest_match?() click to toggle source

Returns whether or not the regular expression re2 was compiled with the longest_match option set to true.

@return [Boolean] the longest_match option @example

re2 = RE2::Regexp.new("woo?", :longest_match => true)
re2.longest_match?    #=> true
static VALUE re2_regexp_longest_match(VALUE self) {
  re2_pattern *p;
  Data_Get_Struct(self, re2_pattern, p);
  return BOOL2RUBY(p->pattern->options().longest_match());
}
match(p1, p2 = v2) click to toggle source

Match the pattern against the given text and return either a boolean (if no submatches are required) or a {RE2::MatchData} instance.

@return [Boolean, RE2::MatchData]

@overload match(text)

Returns an {RE2::MatchData} containing the matching
pattern and all subpatterns resulting from looking for
the regexp in +text+.

@param [String] text the text to search
@return [RE2::MatchData] the matches
@raise [NoMemoryError] if there was not enough memory to allocate the matches
@example
  r = RE2::Regexp.new('w(o)(o)')
  r.match('woo')    #=> #<RE2::MatchData "woo" 1:"o" 2:"o">

@overload match(text, 0)

Returns either true or false indicating whether a
successful match was made.

@param [String] text the text to search
@return [Boolean] whether the match was successful
@raise [NoMemoryError] if there was not enough memory to allocate the matches
@example
  r = RE2::Regexp.new('w(o)(o)')
  r.match('woo', 0) #=> true
  r.match('bob', 0) #=> false

@overload match(text, number_of_matches)

See +match(text)+ but with a specific number of
matches returned (padded with nils if necessary).

@param [String] text the text to search
@param [Fixnum] number_of_matches the number of matches to return
@return [RE2::MatchData] the matches
@raise [NoMemoryError] if there was not enough memory to allocate the matches
@example
  r = RE2::Regexp.new('w(o)(o)')
  r.match('woo', 1) #=> #<RE2::MatchData "woo" 1:"o">
  r.match('woo', 3) #=> #<RE2::MatchData "woo" 1:"o" 2:"o" 3:nil>
static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) {
  int n;
  bool matched;
  re2_pattern *p;
  re2_matchdata *m;
  VALUE text, number_of_matches, matchdata;

  rb_scan_args(argc, argv, "11", &text, &number_of_matches);

  /* Ensure text is a string. */
  text = StringValue(text);

  Data_Get_Struct(self, re2_pattern, p);

  if (RTEST(number_of_matches)) {
    n = NUM2INT(number_of_matches);
  } else {
    n = p->pattern->NumberOfCapturingGroups();
  }

  if (n == 0) {
    matched = match(p->pattern, StringValuePtr(text), 0,
        static_cast<int>(RSTRING_LEN(text)), RE2::UNANCHORED, 0, 0);
    return BOOL2RUBY(matched);
  } else {

    /* Because match returns the whole match as well. */
    n += 1;

    matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
    Data_Get_Struct(matchdata, re2_matchdata, m);
    m->matches = new(nothrow) re2::StringPiece[n];
    m->regexp = self;
    m->text = rb_str_dup(text);
    rb_str_freeze(m->text);

    if (m->matches == 0) {
      rb_raise(rb_eNoMemError,
               "not enough memory to allocate StringPieces for matches");
    }

    m->number_of_matches = n;

    matched = match(p->pattern, StringValuePtr(m->text), 0,
                    static_cast<int>(RSTRING_LEN(m->text)),
                    RE2::UNANCHORED, m->matches, n);

    if (matched) {
      return matchdata;
    } else {
      return Qnil;
    }
  }
}
match?(p1) click to toggle source

Returns true or false to indicate a successful match. Equivalent to +re2.match(text, 0)+.

@return [Boolean] whether the match was successful

static VALUE re2_regexp_match_query(VALUE self, VALUE text) {
  VALUE argv[2];
  argv[0] = text;
  argv[1] = INT2FIX(0);

  return re2_regexp_match(2, argv, self);
}
max_mem() click to toggle source

Returns the #max_mem setting for the regular expression re2.

@return [Fixnum] the #max_mem option @example

re2 = RE2::Regexp.new("woo?", :max_mem => 1024)
re2.max_mem    #=> 1024
static VALUE re2_regexp_max_mem(VALUE self) {
  re2_pattern *p;
  Data_Get_Struct(self, re2_pattern, p);
  return INT2FIX(p->pattern->options().max_mem());
}
named_capturing_groups() click to toggle source

Returns a hash of names to capturing indices of groups.

@return [Hash] a hash of names to capturing indices

static VALUE re2_regexp_named_capturing_groups(VALUE self) {
  VALUE capturing_groups;
  re2_pattern *p;
  map<string, int> groups;
  map<string, int>::iterator iterator;

  Data_Get_Struct(self, re2_pattern, p);
  groups = p->pattern->NamedCapturingGroups();
  capturing_groups = rb_hash_new();

  for (iterator = groups.begin(); iterator != groups.end(); iterator++) {
    rb_hash_aset(capturing_groups,
        ENCODED_STR_NEW(iterator->first.data(), iterator->first.size(),
          p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"),
        INT2FIX(iterator->second));
  }

  return capturing_groups;
}
never_nl?() click to toggle source

Returns whether or not the regular expression re2 was compiled with the never_nl option set to true.

@return [Boolean] the never_nl option @example

re2 = RE2::Regexp.new("woo?", :never_nl => true)
re2.never_nl?    #=> true
static VALUE re2_regexp_never_nl(VALUE self) {
  re2_pattern *p;
  Data_Get_Struct(self, re2_pattern, p);
  return BOOL2RUBY(p->pattern->options().never_nl());
}
number_of_capturing_groups() click to toggle source

Returns the number of capturing subpatterns, or -1 if the regexp wasn't valid on construction. The overall match ($0) does not count: if the regexp is “(a)(b)”, returns 2.

@return [Fixnum] the number of capturing subpatterns

static VALUE re2_regexp_number_of_capturing_groups(VALUE self) {
  re2_pattern *p;

  Data_Get_Struct(self, re2_pattern, p);
  return INT2FIX(p->pattern->NumberOfCapturingGroups());
}
ok?() click to toggle source

Returns whether or not the regular expression re2 was compiled successfully or not.

@return [Boolean] whether or not compilation was successful @example

re2 = RE2::Regexp.new("woo?")
re2.ok?    #=> true
static VALUE re2_regexp_ok(VALUE self) {
  re2_pattern *p;
  Data_Get_Struct(self, re2_pattern, p);
  return BOOL2RUBY(p->pattern->ok());
}
one_line?() click to toggle source

Returns whether or not the regular expression re2 was compiled with the one_line option set to true.

@return [Boolean] the one_line option @example

re2 = RE2::Regexp.new("woo?", :one_line => true)
re2.one_line?    #=> true
static VALUE re2_regexp_one_line(VALUE self) {
  re2_pattern *p;
  Data_Get_Struct(self, re2_pattern, p);
  return BOOL2RUBY(p->pattern->options().one_line());
}
options() click to toggle source

Returns a hash of the options currently set for re2.

@return [Hash] the options

static VALUE re2_regexp_options(VALUE self) {
  VALUE options;
  re2_pattern *p;

  Data_Get_Struct(self, re2_pattern, p);
  options = rb_hash_new();

  rb_hash_aset(options, ID2SYM(id_utf8),
      BOOL2RUBY(p->pattern->options().utf8()));

  rb_hash_aset(options, ID2SYM(id_posix_syntax),
      BOOL2RUBY(p->pattern->options().posix_syntax()));

  rb_hash_aset(options, ID2SYM(id_longest_match),
      BOOL2RUBY(p->pattern->options().longest_match()));

  rb_hash_aset(options, ID2SYM(id_log_errors),
      BOOL2RUBY(p->pattern->options().log_errors()));

  rb_hash_aset(options, ID2SYM(id_max_mem),
      INT2FIX(p->pattern->options().max_mem()));

  rb_hash_aset(options, ID2SYM(id_literal),
      BOOL2RUBY(p->pattern->options().literal()));

  rb_hash_aset(options, ID2SYM(id_never_nl),
      BOOL2RUBY(p->pattern->options().never_nl()));

  rb_hash_aset(options, ID2SYM(id_case_sensitive),
      BOOL2RUBY(p->pattern->options().case_sensitive()));

  rb_hash_aset(options, ID2SYM(id_perl_classes),
      BOOL2RUBY(p->pattern->options().perl_classes()));

  rb_hash_aset(options, ID2SYM(id_word_boundary),
      BOOL2RUBY(p->pattern->options().word_boundary()));

  rb_hash_aset(options, ID2SYM(id_one_line),
      BOOL2RUBY(p->pattern->options().one_line()));

  /* This is a read-only hash after all... */
  rb_obj_freeze(options);

  return options;
}
pattern() click to toggle source

Returns a string version of the regular expression re2.

@return [String] a string version of the regular expression @example

re2 = RE2::Regexp.new("woo?")
re2.to_s    #=> "woo?"
static VALUE re2_regexp_to_s(VALUE self) {
  re2_pattern *p;
  Data_Get_Struct(self, re2_pattern, p);
  return ENCODED_STR_NEW(p->pattern->pattern().data(),
      p->pattern->pattern().size(),
      p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
}
perl_classes?() click to toggle source

Returns whether or not the regular expression re2 was compiled with the perl_classes option set to true.

@return [Boolean] the perl_classes option @example

re2 = RE2::Regexp.new("woo?", :perl_classes => true)
re2.perl_classes?    #=> true
static VALUE re2_regexp_perl_classes(VALUE self) {
  re2_pattern *p;
  Data_Get_Struct(self, re2_pattern, p);
  return BOOL2RUBY(p->pattern->options().perl_classes());
}
posix_syntax?() click to toggle source

Returns whether or not the regular expression re2 was compiled with the posix_syntax option set to true.

@return [Boolean] the posix_syntax option @example

re2 = RE2::Regexp.new("woo?", :posix_syntax => true)
re2.posix_syntax?    #=> true
static VALUE re2_regexp_posix_syntax(VALUE self) {
  re2_pattern *p;
  Data_Get_Struct(self, re2_pattern, p);
  return BOOL2RUBY(p->pattern->options().posix_syntax());
}
program_size() click to toggle source

Returns the program size, a very approximate measure of a regexp's “cost”. Larger numbers are more expensive than smaller numbers.

@return [Fixnum] the regexp “cost”

static VALUE re2_regexp_program_size(VALUE self) {
  re2_pattern *p;
  Data_Get_Struct(self, re2_pattern, p);
  return INT2FIX(p->pattern->ProgramSize());
}
scan(p1) click to toggle source

Returns a {RE2::Scanner} for scanning the given text incrementally.

@example

c = RE2::Regexp.new('(\w+)').scan("Foo bar baz")
static VALUE re2_regexp_scan(VALUE self, VALUE text) {
  re2_pattern *p;
  re2_scanner *c;
  VALUE scanner;

  Data_Get_Struct(self, re2_pattern, p);
  scanner = rb_class_new_instance(0, 0, re2_cScanner);
  Data_Get_Struct(scanner, re2_scanner, c);

  c->input = new(nothrow) re2::StringPiece(StringValuePtr(text));
  c->regexp = self;
  c->text = text;
  c->number_of_capturing_groups = p->pattern->NumberOfCapturingGroups();
  c->eof = false;

  return scanner;
}
source() click to toggle source

Returns a string version of the regular expression re2.

@return [String] a string version of the regular expression @example

re2 = RE2::Regexp.new("woo?")
re2.to_s    #=> "woo?"
static VALUE re2_regexp_to_s(VALUE self) {
  re2_pattern *p;
  Data_Get_Struct(self, re2_pattern, p);
  return ENCODED_STR_NEW(p->pattern->pattern().data(),
      p->pattern->pattern().size(),
      p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
}
to_s() click to toggle source

Returns a string version of the regular expression re2.

@return [String] a string version of the regular expression @example

re2 = RE2::Regexp.new("woo?")
re2.to_s    #=> "woo?"
static VALUE re2_regexp_to_s(VALUE self) {
  re2_pattern *p;
  Data_Get_Struct(self, re2_pattern, p);
  return ENCODED_STR_NEW(p->pattern->pattern().data(),
      p->pattern->pattern().size(),
      p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
}
to_str() click to toggle source

Returns a string version of the regular expression re2.

@return [String] a string version of the regular expression @example

re2 = RE2::Regexp.new("woo?")
re2.to_s    #=> "woo?"
static VALUE re2_regexp_to_s(VALUE self) {
  re2_pattern *p;
  Data_Get_Struct(self, re2_pattern, p);
  return ENCODED_STR_NEW(p->pattern->pattern().data(),
      p->pattern->pattern().size(),
      p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1");
}
utf8?() click to toggle source

Returns whether or not the regular expression re2 was compiled with the utf8 option set to true.

@return [Boolean] the utf8 option @example

re2 = RE2::Regexp.new("woo?", :utf8 => true)
re2.utf8?    #=> true
static VALUE re2_regexp_utf8(VALUE self) {
  re2_pattern *p;
  Data_Get_Struct(self, re2_pattern, p);
  return BOOL2RUBY(p->pattern->options().utf8());
}
word_boundary?() click to toggle source

Returns whether or not the regular expression re2 was compiled with the word_boundary option set to true.

@return [Boolean] the word_boundary option @example

re2 = RE2::Regexp.new("woo?", :word_boundary => true)
re2.word_boundary?    #=> true
static VALUE re2_regexp_word_boundary(VALUE self) {
  re2_pattern *p;
  Data_Get_Struct(self, re2_pattern, p);
  return BOOL2RUBY(p->pattern->options().word_boundary());
}