NuriaProject Framework  0.1
The NuriaProject Framework
tokenizer.hpp
1 /* Copyright (c) 2014-2015, The Nuria Project
2  * The NuriaProject Framework is free software: you can redistribute it and/or
3  * modify it under the terms of the GNU Lesser General Public License as
4  * published by the Free Software Foundation, either version 3 of the License,
5  * or (at your option) any later version.
6  *
7  * The NuriaProject Framework is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU Lesser General Public License for more details.
11  *
12  * You should have received a copy of the GNU Lesser General Public License
13  * along with The NuriaProject Framework.
14  * If not, see <http://www.gnu.org/licenses/>.
15  */
16 
17 #ifndef NURIA_TOKENIZER_HPP
18 #define NURIA_TOKENIZER_HPP
19 
20 #include "essentials.hpp"
21 #include <QSharedData>
22 #include <functional>
23 #include <QVariant>
24 #include <QObject>
25 #include <regex>
26 
27 namespace Nuria {
28 
29 class TokenizerRulesPrivate;
30 class TokenizerPrivate;
31 class Tokenizer;
32 
47 struct NURIA_CORE_EXPORT Token {
48  Token (int tokenId = -1, int row = 0, int column = 0,
49  const QVariant &value = QVariant ())
50  : row (row), column (column), tokenId (tokenId), value (value)
51  { }
52 
54  int tokenId;
55 
57  int row;
58 
60  int column;
61 
63  QVariant value;
64 
66  bool operator< (const Token &right) const;
67 
68 };
69 
95 class NURIA_CORE_EXPORT TokenizerRules {
96 public:
97 
100 
102  AutoHandleWhitespace = 0,
103 
105  ManualWhitespaceHandling
106 
107  };
108 
110  typedef std::function< bool(Token &, Tokenizer *) > TokenAction;
111 
113  TokenizerRules (WhitespaceMode mode = AutoHandleWhitespace);
114 
116  TokenizerRules (const TokenizerRules &other);
117 
119  TokenizerRules &operator= (const TokenizerRules &other);
120 
122  ~TokenizerRules ();
123 
127  void addStringToken (int tokenId, const QByteArray &terminal);
128 
136  void addRegexToken (int tokenId, const QByteArray &regularExpression);
137 
141  void addRegexToken (int tokenId, const std::regex &regularExpression);
142 
161  void setTokenAction (int tokenId, TokenAction action);
162 
164  WhitespaceMode whitespaceMode () const;
165 
167  void setWhitespaceMode (WhitespaceMode mode);
168 
169 private:
170  friend class Tokenizer;
171  QSharedDataPointer< TokenizerRulesPrivate > d;
172 };
173 
231 class NURIA_CORE_EXPORT Tokenizer : public QObject {
232  Q_OBJECT
233 public:
234 
236  Tokenizer (QObject *parent = nullptr);
237 
239  ~Tokenizer () override;
240 
242  TokenizerRules &defaultTokenizerRules ();
243 
245  void setDefaultTokenizerRules (const TokenizerRules &ruleSet);
246 
248  void addTokenizerRules (const QString &name, const TokenizerRules &ruleSet);
249 
251  TokenizerRules tokenizerRules (const QString &name) const;
252 
259  void removeTokenizerRules (const QString &name);
260 
265  void setCurrentTokenizerRules (const QString &name);
266 
268  const TokenizerRules &currentTokenizerRules () const;
269 
274  void tokenize (const QByteArray &data);
275 
279  QByteArray tokenizeData () const;
280 
295  Token nextToken ();
296 
300  bool atEnd () const;
301 
303  bool hasError () const;
304 
306  int errorColumn () const;
307 
309  int errorRow () const;
310 
312  int errorPosition () const;
313 
315  int currentColumn () const;
316 
318  int currentRow () const;
319 
321  int currentPosition () const;
322 
328  void setPosition (int position, int column, int row);
329 
330 private:
331 
332  void advanceLocation (char c);
333  void skipWhitespace ();
334  bool readTokens ();
335  bool readAndHandleTokens ();
336  bool checkStringToken ();
337  bool checkStringToken (const QByteArray &token, int tok);
338  bool checkRegexToken ();
339  bool checkRegexToken (const std::regex &regex, int tok);
340 
341  TokenizerPrivate *d_ptr;
342 
343 };
344 
345 }
346 
347 NURIA_CORE_EXPORT QDebug operator<< (QDebug debug, const Nuria::Token &token);
348 
349 #endif // NURIA_TOKENIZER_HPP
std::function< bool(Token &, Tokenizer *) > TokenAction
Definition: tokenizer.hpp:110
int column
Definition: tokenizer.hpp:60
int tokenId
Definition: tokenizer.hpp:54
WhitespaceMode
Definition: tokenizer.hpp:99
QVariant value
Definition: tokenizer.hpp:63
Definition: abstractsessionmanager.hpp:24
Storage of rules used by Nuria::Tokenizer.
Definition: tokenizer.hpp:95
int row
Definition: tokenizer.hpp:57
Token as returned by Nuria::Tokenizer.
Definition: tokenizer.hpp:47
General-purpose run-time tokenizer.
Definition: tokenizer.hpp:231