regexp.h

Go to the documentation of this file.
00001 /*
00002 
00003 Copyright (C) 2012 John W. Eaton
00004 Copyright (C) 2005-2012 David Bateman
00005 
00006 This file is part of Octave.
00007 
00008 Octave is free software; you can redistribute it and/or modify it
00009 under the terms of the GNU General Public License as published by the
00010 Free Software Foundation; either version 3 of the License, or (at your
00011 option) any later version.
00012 
00013 Octave is distributed in the hope that it will be useful, but WITHOUT
00014 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00015 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
00016 for more details.
00017 
00018 You should have received a copy of the GNU General Public License
00019 along with Octave; see the file COPYING.  If not, see
00020 <http://www.gnu.org/licenses/>.
00021 
00022 */
00023 
00024 #if !defined (octave_regexp_match_h)
00025 #define octave_regexp_match_h 1
00026 
00027 #include <list>
00028 #include <sstream>
00029 #include <string>
00030 
00031 #include "Array.h"
00032 #include "Matrix.h"
00033 #include "base-list.h"
00034 #include "str-vec.h"
00035 
00036 class regexp
00037 {
00038 public:
00039 
00040   class opts;
00041   class match_data;
00042 
00043   regexp (const std::string& pat = "",
00044           const regexp::opts& opt = regexp::opts (),
00045           const std::string& w = "regexp")
00046     : pattern (pat), options (opt), data (0), named_pats (),
00047       nnames (0), named_idx (), who (w)
00048   {
00049     compile_internal ();
00050   }
00051 
00052   regexp (const regexp& rx)
00053     : pattern (rx.pattern), data (rx.data), named_pats (rx.named_pats),
00054       nnames (rx.nnames), named_idx (rx.named_idx)
00055   { }
00056 
00057   regexp& operator = (const regexp& rx)
00058   {
00059     if (this != &rx)
00060       {
00061         pattern = rx.pattern;
00062         data = rx.data;
00063         named_pats = rx.named_pats;
00064         nnames = rx.nnames;
00065         named_idx = rx.named_idx;
00066       }
00067 
00068     return *this;
00069   }
00070 
00071   ~regexp (void) { free (); }
00072 
00073   void compile (const std::string& pat,
00074                 const regexp::opts& opt = regexp::opts ())
00075   {
00076     pattern = pat;
00077     options = opt;
00078     compile_internal ();
00079   }
00080 
00081   match_data match (const std::string& buffer);
00082 
00083   bool is_match (const std::string& buffer);
00084 
00085   Array<bool> is_match (const string_vector& buffer);
00086 
00087   std::string replace (const std::string& buffer,
00088                        const std::string& replacement);
00089 
00090   struct opts
00091   {
00092   public:
00093 
00094     opts (void)
00095       : x_case_insensitive (false), x_dotexceptnewline (false),
00096         x_freespacing (false), x_lineanchors (false), x_once (false) { }
00097 
00098     opts (const opts& o)
00099       : x_case_insensitive (o.x_case_insensitive),
00100         x_dotexceptnewline (o.x_dotexceptnewline),
00101         x_freespacing (o.x_freespacing),
00102         x_lineanchors (o.x_lineanchors),
00103         x_once (o.x_once)
00104     { }
00105 
00106     opts& operator = (const opts& o)
00107     {
00108       if (this != &o)
00109         {
00110           x_case_insensitive = o.x_case_insensitive;
00111           x_dotexceptnewline = o.x_dotexceptnewline;
00112           x_freespacing = o.x_freespacing;
00113           x_lineanchors = o.x_lineanchors;
00114           x_once = o.x_once;
00115         }
00116 
00117       return *this;
00118     }
00119 
00120     ~opts (void) { }
00121 
00122     void case_insensitive (bool val) { x_case_insensitive = val; }
00123     void dotexceptnewline (bool val) { x_dotexceptnewline = val; }
00124     void freespacing (bool val) { x_freespacing = val; }
00125     void lineanchors (bool val) { x_lineanchors = val; }
00126     void once (bool val) { x_once = val; }
00127 
00128     bool case_insensitive (void) const { return x_case_insensitive; }
00129     bool dotexceptnewline (void) const { return x_dotexceptnewline; }
00130     bool freespacing (void) const { return x_freespacing; }
00131     bool lineanchors (void) const { return x_lineanchors; }
00132     bool once (void) const { return x_once; }
00133 
00134   private:
00135 
00136     bool x_case_insensitive;
00137     bool x_dotexceptnewline;
00138     bool x_freespacing;
00139     bool x_lineanchors;
00140     bool x_once;
00141   };
00142 
00143   class match_element
00144   {
00145   public:
00146 
00147     match_element (const string_vector& nt, const string_vector& t,
00148                    const std::string& ms, const Matrix& te,
00149                    double s, double e)
00150       : x_match_string (ms), x_named_tokens (nt), x_tokens (t),
00151         x_token_extents (te), x_start (s), x_end (e)
00152     { }
00153 
00154     match_element (const match_element &a)
00155       : x_match_string (a.x_match_string),
00156         x_named_tokens (a.x_named_tokens), x_tokens (a.x_tokens),
00157         x_token_extents (a.x_token_extents),
00158         x_start (a.x_start), x_end (a.x_end)
00159     { }
00160 
00161     std::string match_string (void) const { return x_match_string; }
00162     string_vector named_tokens (void) const { return x_named_tokens; }
00163     string_vector tokens (void) const { return x_tokens; }
00164     Matrix token_extents (void) const { return x_token_extents; }
00165     double start (void) const { return x_start; }
00166     double end (void) const { return x_end; }
00167 
00168   private:
00169 
00170     std::string x_match_string;
00171     string_vector x_named_tokens;
00172     string_vector x_tokens;
00173     Matrix x_token_extents;
00174     double x_start;
00175     double x_end;
00176   };
00177 
00178   class match_data : public octave_base_list<match_element>
00179   {
00180   public:
00181 
00182     match_data (void)
00183       : octave_base_list<match_element> (), named_pats ()
00184     { }
00185 
00186     match_data (const std::list<match_element>& l, const string_vector& np)
00187       : octave_base_list<match_element> (l), named_pats (np)
00188     { }
00189 
00190     match_data (const match_data& rx_lst)
00191       : octave_base_list<match_element> (rx_lst),
00192         named_pats (rx_lst.named_pats)
00193     { }
00194 
00195     match_data& operator = (const match_data& rx_lst)
00196     {
00197       if (this != &rx_lst)
00198         {
00199           octave_base_list<match_element>::operator = (rx_lst);
00200           named_pats = rx_lst.named_pats;
00201         }
00202 
00203       return *this;
00204     }
00205 
00206     ~match_data (void) { }
00207 
00208     string_vector named_patterns (void) { return named_pats; }
00209 
00210   private:
00211 
00212     string_vector named_pats;
00213   };
00214 
00215 private:
00216 
00217   // The pattern we've been asked to match.
00218   std::string pattern;
00219 
00220   opts options;
00221 
00222   // Internal data describing the regular expression.
00223   void *data;
00224 
00225   std::string m;
00226   string_vector named_pats;
00227   int nnames;
00228   Array<int> named_idx;
00229   std::string who;
00230 
00231   void free (void);
00232 
00233   void compile_internal (void);
00234 };
00235 
00236 inline regexp::match_data
00237 regexp_match (const std::string& pat,
00238               const std::string& buffer,
00239               const regexp::opts& opt = regexp::opts (),
00240               const std::string& who = "regexp")
00241 {
00242   regexp rx (pat, opt, who);
00243 
00244   return rx.match (buffer);
00245 }
00246 
00247 inline bool
00248 is_regexp_match (const std::string& pat,
00249                  const std::string& buffer,
00250                  const regexp::opts& opt = regexp::opts (),
00251                  const std::string& who = "regexp")
00252 {
00253   regexp rx (pat, opt, who);
00254 
00255   return rx.is_match (buffer);
00256 }
00257 
00258 inline Array<bool>
00259 is_regexp_match (const std::string& pat,
00260                  const string_vector& buffer,
00261                  const regexp::opts& opt = regexp::opts (),
00262                  const std::string& who = "regexp")
00263 {
00264   regexp rx (pat, opt, who);
00265 
00266   return rx.is_match (buffer);
00267 }
00268 
00269 inline std::string
00270 regexp_replace (const std::string& pat,
00271                 const std::string& buffer,
00272                 const std::string& replacement,
00273                 const regexp::opts& opt = regexp::opts (),
00274                 const std::string& who = "regexp")
00275 {
00276   regexp rx (pat, opt, who);
00277 
00278   return rx.replace (buffer, replacement);
00279 }
00280 
00281 #endif
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Defines