wibble  1.1
regexp.h
Go to the documentation of this file.
1 #ifndef WIBBLE_REGEXP_H
2 #define WIBBLE_REGEXP_H
3 
4 /*
5  * OO wrapper for regular expression functions
6  *
7  * Copyright (C) 2003--2006 Enrico Zini <enrico@debian.org>
8  *
9  * This library is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * This library is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with this library; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22  */
23 
24 #include <wibble/exception.h>
25 #include <sys/types.h>
26 #include <regex.h>
27 
28 namespace wibble {
29 namespace exception {
30 
32 
34 {
35 protected:
36  int m_code;
37  std::string m_message;
38 
39 public:
40  Regexp(const regex_t& re, int code, const std::string& context)
41  throw ();
42  ~Regexp() throw () {}
43 
45  virtual int code() const throw () { return m_code; }
46 
47  virtual const char* type() const throw () { return "Regexp"; }
48  virtual std::string desc() const throw () { return m_message; }
49 };
50 
51 }
52 
53 class Regexp
54 {
55 protected:
56  regex_t re;
57  regmatch_t* pmatch;
58  int nmatch;
59  std::string lastMatch;
60 
61 public:
62  /* Note that match_count is required to be >1 to enable
63  sub-regexp capture. The maximum *INCLUDES* the whole-regexp
64  match (indexed 0). [TODO we may want to fix this to be more
65  friendly?] */
66  Regexp(const std::string& expr, int match_count = 0, int flags = 0);
67  ~Regexp() throw ();
68 
69  bool match(const std::string& str, int flags = 0);
70 
71  /* Indexing is from 1 for capture matches, like perl's $0,
72  $1... 0 is whole-regexp match, not a capture. TODO
73  the range is miscalculated (an off-by-one, wrt. the
74  counterintuitive match counting). */
75  std::string operator[](int idx);
76 
77  size_t matchStart(int idx);
78  size_t matchEnd(int idx);
79  size_t matchLength(int idx);
80 };
81 
82 class ERegexp : public Regexp
83 {
84 public:
85  ERegexp(const std::string& expr, int match_count = 0, int flags = 0)
86  : Regexp(expr, match_count, flags | REG_EXTENDED) {}
87 };
88 
89 class Tokenizer
90 {
91  const std::string& str;
93 
94 public:
96  {
97  Tokenizer& tok;
98  size_t beg, end;
99  public:
100  typedef std::string value_type;
101  typedef ptrdiff_t difference_type;
102  typedef value_type *pointer;
104  typedef std::forward_iterator_tag iterator_category;
105 
106  const_iterator(Tokenizer& tok) : tok(tok), beg(0), end(0) { operator++(); }
107  const_iterator(Tokenizer& tok, bool) : tok(tok), beg(tok.str.size()), end(tok.str.size()) {}
108 
109  const_iterator& operator++();
110 
111  std::string operator*() const
112  {
113  return tok.str.substr(beg, end-beg);
114  }
115  bool operator==(const const_iterator& ti) const
116  {
117  return beg == ti.beg && end == ti.end;
118  }
119  bool operator!=(const const_iterator& ti) const
120  {
121  return beg != ti.beg || end != ti.end;
122  }
123  };
124 
125  Tokenizer(const std::string& str, const std::string& re, int flags)
126  : str(str), re(re, 1, flags) {}
127 
128  const_iterator begin() { return const_iterator(*this); }
129  const_iterator end() { return const_iterator(*this, false); }
130 };
131 
145 class Splitter
146 {
148 
149 public:
154  // TODO: add iterator_traits
156  {
158  std::string cur;
159  std::string next;
160 
161  public:
162  typedef std::string value_type;
163  typedef ptrdiff_t difference_type;
164  typedef value_type *pointer;
166  typedef std::forward_iterator_tag iterator_category;
167 
168  const_iterator(wibble::Regexp& re, const std::string& str) : re(re), next(str) { ++*this; }
170 
171  const_iterator& operator++();
172 
173  const std::string& operator*() const
174  {
175  return cur;
176  }
177  const std::string* operator->() const
178  {
179  return &cur;
180  }
181  bool operator==(const const_iterator& ti) const
182  {
183  return cur == ti.cur && next == ti.next;
184  }
185  bool operator!=(const const_iterator& ti) const
186  {
187  return cur != ti.cur || next != ti.next;
188  }
189  };
190 
194  Splitter(const std::string& re, int flags)
195  : re(re, 1, flags) {}
196 
200  const_iterator begin(const std::string& str) { return const_iterator(re, str); }
202 };
203 
204 }
205 
206 // vim:set ts=4 sw=4:
207 #endif
Definition: regexp.h:83
ERegexp(const std::string &expr, int match_count=0, int flags=0)
Definition: regexp.h:85
Definition: regexp.h:54
size_t matchLength(int idx)
Definition: regexp.cpp:118
std::string lastMatch
Definition: regexp.h:59
regmatch_t * pmatch
Definition: regexp.h:57
size_t matchStart(int idx)
Definition: regexp.cpp:104
regex_t re
Definition: regexp.h:56
Regexp(const std::string &expr, int match_count=0, int flags=0)
Definition: regexp.cpp:52
int nmatch
Definition: regexp.h:58
size_t matchEnd(int idx)
Definition: regexp.cpp:111
bool match(const std::string &str, int flags=0)
Definition: regexp.cpp:73
~Regexp()
Definition: regexp.cpp:65
Warning: the various iterators reuse the Regexps and therefore only one iteration of a Splitter can b...
Definition: regexp.h:156
value_type * pointer
Definition: regexp.h:164
std::string value_type
Definition: regexp.h:162
bool operator!=(const const_iterator &ti) const
Definition: regexp.h:185
const std::string & operator*() const
Definition: regexp.h:173
const_iterator(wibble::Regexp &re, const std::string &str)
Definition: regexp.h:168
const_iterator(wibble::Regexp &re)
Definition: regexp.h:169
const std::string * operator->() const
Definition: regexp.h:177
std::forward_iterator_tag iterator_category
Definition: regexp.h:166
bool operator==(const const_iterator &ti) const
Definition: regexp.h:181
value_type & reference
Definition: regexp.h:165
ptrdiff_t difference_type
Definition: regexp.h:163
Split a string using a regular expression to match the token separators.
Definition: regexp.h:146
Splitter(const std::string &re, int flags)
Create a splitter that uses the given regular expression to find tokens.
Definition: regexp.h:194
const_iterator end()
Definition: regexp.h:201
const_iterator begin(const std::string &str)
Split the string and iterate the resulting tokens.
Definition: regexp.h:200
Definition: regexp.h:96
std::forward_iterator_tag iterator_category
Definition: regexp.h:104
std::string value_type
Definition: regexp.h:100
const_iterator(Tokenizer &tok)
Definition: regexp.h:106
value_type & reference
Definition: regexp.h:103
const_iterator(Tokenizer &tok, bool)
Definition: regexp.h:107
value_type * pointer
Definition: regexp.h:102
bool operator!=(const const_iterator &ti) const
Definition: regexp.h:119
bool operator==(const const_iterator &ti) const
Definition: regexp.h:115
std::string operator*() const
Definition: regexp.h:111
ptrdiff_t difference_type
Definition: regexp.h:101
Definition: regexp.h:90
Tokenizer(const std::string &str, const std::string &re, int flags)
Definition: regexp.h:125
const_iterator end()
Definition: regexp.h:129
const_iterator begin()
Definition: regexp.h:128
const std::vector< std::string > & context() const
Definition: exception.h:166
Base class for all exceptions.
Definition: exception.h:180
Definition: regexp.h:34
Regexp(const regex_t &re, int code, const std::string &context)
Definition: regexp.cpp:31
int m_code
Definition: regexp.h:36
std::string m_message
Definition: regexp.h:37
virtual std::string desc() const
Get a string describing what happened that threw the exception.
Definition: regexp.h:48
virtual int code() const
Get the regexp error code associated to the exception.
Definition: regexp.h:45
virtual const char * type() const
Get a string tag identifying the exception type.
Definition: regexp.h:47
~Regexp()
Definition: regexp.h:42
ListIterator< List > end(List)
Definition: list.h:425
size_t size(const std::string &file)
File size.
Definition: fs.cpp:287
Definition: amorph.h:17