GNU Octave  3.8.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
lex.h
Go to the documentation of this file.
1 /*
2 
3 Copyright (C) 1993-2013 John W. Eaton
4 
5 This file is part of Octave.
6 
7 Octave is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3 of the License, or (at your
10 option) any later version.
11 
12 Octave is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with Octave; see the file COPYING. If not, see
19 <http://www.gnu.org/licenses/>.
20 
21 */
22 
23 #if !defined (octave_lex_h)
24 #define octave_lex_h 1
25 
26 #include <deque>
27 #include <limits>
28 #include <list>
29 #include <set>
30 #include <stack>
31 
32 #include "comment-list.h"
33 #include "input.h"
34 #include "token.h"
35 
36 // Is the given string a keyword?
37 extern bool is_keyword (const std::string& s);
38 
39 // For communication between the lexer and parser.
40 
41 class
43 {
44 public:
45 
46  // Track symbol table information when parsing functions.
47 
49  {
50  public:
51 
52  symbol_table_context (void) : frame_stack () { }
53 
54  void clear (void)
55  {
56  while (! frame_stack.empty ())
57  frame_stack.pop ();
58  }
59 
60  bool empty (void) const { return frame_stack.empty (); }
61 
62  void pop (void)
63  {
64  if (empty ())
66 
67  frame_stack.pop ();
68  }
69 
71  {
72  frame_stack.push (scope);
73  }
74 
75  symbol_table::scope_id curr_scope (void) const
76  {
77  return empty () ? symbol_table::current_scope () : frame_stack.top ();
78  }
79 
80  private:
81 
82  std::stack<symbol_table::scope_id> frame_stack;
83  };
84 
85  // Track nesting of square brackets, curly braces, and parentheses.
86 
88  {
89  private:
90 
92  {
93  BRACKET = 1,
94  BRACE = 2,
95  PAREN = 3,
96  ANON_FCN_BODY = 4
97  };
98 
99  public:
100 
101  bbp_nesting_level (void) : context () { }
102 
104 
105  bbp_nesting_level& operator = (const bbp_nesting_level& nl)
106  {
107  if (&nl != this)
108  context = nl.context;
109 
110  return *this;
111  }
112 
114 
115  void reset (void)
116  {
117  while (! context.empty ())
118  context.pop ();
119  }
120 
121  void bracket (void) { context.push (BRACKET); }
122 
123  bool is_bracket (void)
124  {
125  return ! context.empty () && context.top () == BRACKET;
126  }
127 
128  void brace (void) { context.push (BRACE); }
129 
130  bool is_brace (void)
131  {
132  return ! context.empty () && context.top () == BRACE;
133  }
134 
135  void paren (void) { context.push (PAREN); }
136 
137  bool is_paren (void)
138  {
139  return ! context.empty () && context.top () == PAREN;
140  }
141 
142  void anon_fcn_body (void) { context.push (ANON_FCN_BODY); }
143 
144  bool is_anon_fcn_body (void)
145  {
146  return ! context.empty () && context.top () == ANON_FCN_BODY;
147  }
148 
149  bool is_bracket_or_brace (void)
150  {
151  return (! context.empty ()
152  && (context.top () == BRACKET || context.top () == BRACE));
153  }
154 
155  bool none (void) { return context.empty (); }
156 
157  void remove (void)
158  {
159  if (! context.empty ())
160  context.pop ();
161  }
162 
163  void clear (void)
164  {
165  while (! context.empty ())
166  context.pop ();
167  }
168 
169  private:
170 
171  std::stack<int> context;
172  };
173 
175  {
176  public:
177 
178  // Store an "unlimited" number of tokens.
180  : buffer (), sz (sz_arg)
181  { }
182 
183  void push (token *tok)
184  {
185  if (buffer.size () == sz)
186  pop ();
187 
188  buffer.push_front (tok);
189  }
190 
191  void pop (void)
192  {
193  if (! empty ())
194  {
195  delete buffer.back ();
196  buffer.pop_back ();
197  }
198  }
199 
200  // Direct access.
201  token *at (size_t n)
202  {
203  return empty () ? 0 : buffer.at (n);
204  }
205 
206  const token *at (size_t n) const
207  {
208  return empty () ? 0 : buffer.at (n);
209  }
210 
211  // Most recently pushed.
212  token *front (void)
213  {
214  return empty () ? 0 : buffer.front ();
215  }
216 
217  const token *front (void) const
218  {
219  return empty () ? 0 : buffer.front ();
220  }
221 
222  token *back (void)
223  {
224  return empty () ? 0 : buffer.back ();
225  }
226 
227  const token *back (void) const
228  {
229  return empty () ? 0 : buffer.back ();
230  }
231 
232  // Number of elements currently in the buffer, max of sz.
233  size_t size (void) const { return buffer.size (); }
234 
235  bool empty (void) const { return buffer.empty (); }
236 
237  void clear (void)
238  {
239  while (! empty ())
240  pop ();
241  }
242 
243  private:
244 
245  std::deque<token *> buffer;
246 
247  size_t sz;
248 
249  // No copying!
250 
251  token_cache (const token_cache&);
252 
253  token_cache& operator = (const token_cache&);
254  };
255 
257  : end_of_input (false), at_beginning_of_statement (true),
258  looking_at_anon_fcn_args (false), looking_at_return_list (false),
259  looking_at_parameter_list (false), looking_at_decl_list (false),
260  looking_at_initializer_expression (false),
261  looking_at_matrix_or_assign_lhs (false),
262  looking_for_object_index (false),
263  looking_at_indirect_ref (false), parsing_class_method (false),
264  maybe_classdef_get_set_method (false), parsing_classdef (false),
265  quote_is_transpose (false), force_script (false),
266  reading_fcn_file (false), reading_script_file (false),
267  reading_classdef_file (false),
268  input_line_number (1), current_input_column (1),
269  bracketflag (0), braceflag (0),
270  looping (0), defining_func (0), looking_at_function_handle (0),
271  block_comment_nesting_level (0), token_count (0),
272  current_input_line (), comment_text (), help_text (),
273  string_text (), string_line (0), string_column (0),
274  fcn_file_name (), fcn_file_full_name (), looking_at_object_index (),
275  parsed_function_name (), pending_local_variables (),
276  symtab_context (), nesting_level (), tokens ()
277  {
278  init ();
279  }
280 
281  ~lexical_feedback (void);
282 
283  void init (void);
284 
285  void reset (void);
286 
287  int previous_token_value (void) const;
288 
289  bool previous_token_value_is (int tok_val) const;
290 
291  void mark_previous_token_trailing_space (void);
292 
293  bool space_follows_previous_token (void) const;
294 
295  bool previous_token_is_binop (void) const;
296 
297  bool previous_token_is_keyword (void) const;
298 
299  bool previous_token_may_be_command (void) const;
300 
301  void maybe_mark_previous_token_as_variable (void);
302 
303  void mark_as_variable (const std::string& nm);
304  void mark_as_variables (const std::list<std::string>& lst);
305 
306  // true means that we have encountered eof on the input stream.
308 
309  // true means we are at the beginning of a statement, where a
310  // command name is possible.
312 
313  // true means we are parsing an anonymous function argument list.
315 
316  // true means we're parsing the return list for a function.
318 
319  // true means we're parsing the parameter list for a function.
321 
322  // true means we're parsing a declaration list (global or
323  // persistent).
325 
326  // true means we are looking at the initializer expression for a
327  // parameter list element.
329 
330  // true means we're parsing a matrix or the left hand side of
331  // multi-value assignment statement.
333 
334  // object index not possible until we've seen something.
336 
337  // true means we're looking at an indirect reference to a
338  // structure element.
340 
341  // true means we are parsing a class method in function or classdef file.
343 
344  // true means we are parsing a class method declaration line in a
345  // classdef file and can accept a property get or set method name.
346  // for example, "get.propertyname" is recognized as a function name.
348 
349  // true means we are parsing a classdef file
351 
352  // return transpose or start a string?
354 
355  // TRUE means treat the current file as a script even if the first
356  // token is "function" or "classdef".
358 
359  // TRUE means we're parsing a function file.
361 
362  // TRUE means we're parsing a script file.
364 
365  // TRUE means we're parsing a classdef file.
367 
368  // the current input line number.
370 
371  // the column of the current token.
373 
374  // square bracket level count.
376 
377  // curly brace level count.
379 
380  // true means we're in the middle of defining a loop.
381  int looping;
382 
383  // nonzero means we're in the middle of defining a function.
385 
386  // nonzero means we are parsing a function handle.
388 
389  // nestng level for blcok comments.
391 
392  // Count of tokens recognized by this lexer since initialized or
393  // since the last reset.
394  size_t token_count;
395 
396  // The current line of input.
397  std::string current_input_line;
398 
399  // The current comment text.
400  std::string comment_text;
401 
402  // The current help text.
403  std::string help_text;
404 
405  // The current character string text.
406  std::string string_text;
407 
408  // The position of the beginning of the current character string.
411 
412  // Simple name of function file we are reading.
413  std::string fcn_file_name;
414 
415  // Full name of file we are reading.
416  std::string fcn_file_full_name;
417 
418  // if the front of the list is true, the closest paren, brace, or
419  // bracket nesting is an index for an object.
420  std::list<bool> looking_at_object_index;
421 
422  // if the top of the stack is true, then we've already seen the name
423  // of the current function. should only matter if
424  // current_function_level > 0
425  std::stack<bool> parsed_function_name;
426 
427  // set of identifiers that might be local variable names.
428  std::set<std::string> pending_local_variables;
429 
430  // Track current symbol table scope and context.
432 
433  // is the closest nesting level a square bracket, squiggly brace,
434  // a paren, or an anonymous function body?
436 
437  // Tokens generated by the lexer.
439 
440 private:
441 
442  // No copying!
443 
445 
446  lexical_feedback& operator = (const lexical_feedback&);
447 };
448 
449 // octave_base_lexer inherits from lexical_feedback because we will
450 // eventually have several different constructors and it is easier to
451 // intialize if everything is grouped in a parent class rather than
452 // listing all the members in the octave_base_lexer class.
453 
454 class
456 {
457 public:
458 
459  // Handle buffering of input for lexer.
460 
462  {
463  public:
464 
466  : buffer (), pos (0), chars_left (0), eof (false)
467  { }
468 
469  void fill (const std::string& input, bool eof_arg);
470 
471  // Copy at most max_size characters to buf.
472  int copy_chunk (char *buf, size_t max_size);
473 
474  bool empty (void) const { return chars_left == 0; }
475 
476  bool at_eof (void) const { return eof; }
477 
478  private:
479 
480  std::string buffer;
481  const char *pos;
482  size_t chars_left;
483  bool eof;
484  };
485 
486  // Collect comment text.
487 
488  class
490  {
491  public:
492 
493  comment_buffer (void) : comment_list (0) { }
494 
495  ~comment_buffer (void) { delete comment_list; }
496 
497  void append (const std::string& s, octave_comment_elt::comment_type t)
498  {
499  if (! comment_list)
500  comment_list = new octave_comment_list ();
501 
502  comment_list->append (s, t);
503  }
504 
505  // Caller is expected to delete the returned value.
506 
507  octave_comment_list *get_comment (void)
508  {
509  octave_comment_list *retval = comment_list;
510 
511  comment_list = 0;
512 
513  return retval;
514  }
515 
516  void reset (void)
517  {
518  delete comment_list;
519 
520  comment_list = 0;
521  }
522 
523  private:
524 
526  };
527 
529  : lexical_feedback (), scanner (0), input_buf (), comment_buf ()
530  {
531  init ();
532  }
533 
534  virtual ~octave_base_lexer (void);
535 
536  void init (void);
537 
538  virtual bool is_push_lexer (void) const { return false; }
539 
540  virtual void reset (void);
541 
542  void prep_for_file (void);
543 
544  void begin_string (int state);
545 
546  virtual int fill_flex_buffer (char *buf, unsigned int max_size) = 0;
547 
548  bool at_end_of_buffer (void) const { return input_buf.empty (); }
549 
550  bool at_end_of_file (void) const { return input_buf.at_eof (); }
551 
552  int handle_end_of_input (void);
553 
554  char *flex_yytext (void);
555 
556  int flex_yyleng (void);
557 
558  int text_yyinput (void);
559 
560  void xunput (char c, char *buf);
561 
562  void xunput (char c);
563 
564  bool looking_at_space (void);
565 
566  bool inside_any_object_index (void);
567 
568  bool is_variable (const std::string& name);
569 
570  int is_keyword_token (const std::string& s);
571 
572  bool whitespace_is_significant (void);
573 
574  void handle_number (void);
575 
576  void handle_continuation (void);
577 
578  void finish_comment (octave_comment_elt::comment_type typ);
579 
580  octave_comment_list *get_comment (void) { return comment_buf.get_comment (); }
581 
582  int handle_close_bracket (int bracket_type);
583 
584  bool looks_like_command_arg (void);
585 
586  int handle_superclass_identifier (void);
587 
588  int handle_meta_identifier (void);
589 
590  int handle_identifier (void);
591 
592  void maybe_warn_separator_insert (char sep);
593 
594  void gripe_single_quote_string (void);
595 
596  void gripe_matlab_incompatible (const std::string& msg);
597 
598  void maybe_gripe_matlab_incompatible_comment (char c);
599 
600  void gripe_matlab_incompatible_continuation (void);
601 
602  void gripe_matlab_incompatible_operator (const std::string& op);
603 
604  void push_token (token *);
605 
606  token *current_token (void);
607 
608  void display_token (int tok);
609 
610  void fatal_error (const char *msg);
611 
612  void lexer_debug (const char *pattern);
613 
614  // Internal state of the flex-generated lexer.
615  void *scanner;
616 
617  // Object that reads and buffers input.
619 
620  // Object that collects comment text.
622 
623  virtual void increment_promptflag (void) = 0;
624 
625  virtual void decrement_promptflag (void) = 0;
626 
627  virtual int promptflag (void) const = 0;
628 
629  virtual int promptflag (int) = 0;
630 
631  virtual std::string input_source (void) const { return "unknown"; }
632 
633  virtual bool input_from_terminal (void) const { return false; }
634 
635  virtual bool input_from_file (void) const { return false; }
636 
637  virtual bool input_from_eval_string (void) const { return false; }
638 
639  void push_start_state (int state);
640 
641  void pop_start_state (void);
642 
643  void clear_start_state (void);
644 
645  int start_state (void) const { return start_state_stack.top (); }
646 
647  void display_start_state (void) const;
648 
649  int handle_op (const char *pattern, int tok, bool bos = false);
650 
651  int handle_incompatible_op (const char *pattern, int tok, bool bos = false);
652 
653  bool maybe_unput_comma_before_unary_op (int tok);
654 
655  int handle_unary_op (int tok, bool bos = false);
656 
657  int handle_incompatible_unary_op (int tok, bool bos = false);
658 
659  int handle_assign_op (const char *pattern, int tok);
660 
661  int handle_incompatible_assign_op (const char *pattern, int tok);
662 
663  int handle_op_internal (int tok, bool bos, bool compat);
664 
665  int handle_token (const std::string& name, int tok);
666 
667  int handle_token (int tok, token *tok_val = 0);
668 
669  int count_token (int tok);
670 
671  int count_token_internal (int tok);
672 
673  int show_token (int tok);
674 
675 protected:
676 
677  std::stack<int> start_state_stack;
678 
679  // No copying!
680 
682 
683  octave_base_lexer& operator = (const octave_base_lexer&);
684 };
685 
686 class
688 {
689 public:
690 
692  : octave_base_lexer (), input_reader (this)
693  { }
694 
695  octave_lexer (FILE *file)
696  : octave_base_lexer (), input_reader (file, this)
697  { }
698 
699  octave_lexer (const std::string& eval_string)
700  : octave_base_lexer (), input_reader (eval_string, this)
701  { }
702 
703  void reset (void)
704  {
705  input_reader.reset ();
706 
708  }
709 
710  void increment_promptflag (void) { input_reader.increment_promptflag (); }
711 
712  void decrement_promptflag (void) { input_reader.decrement_promptflag (); }
713 
714  int promptflag (void) const { return input_reader.promptflag (); }
715 
716  int promptflag (int n) { return input_reader.promptflag (n); }
717 
718  std::string input_source (void) const
719  {
720  return input_reader.input_source ();
721  }
722 
723  bool input_from_terminal (void) const
724  {
725  return input_reader.input_from_terminal ();
726  }
727 
728  bool input_from_file (void) const
729  {
730  return input_reader.input_from_file ();
731  }
732 
733  bool input_from_eval_string (void) const
734  {
735  return input_reader.input_from_eval_string ();
736  }
737 
738  int fill_flex_buffer (char *buf, unsigned int max_size);
739 
741 
742 protected:
743 
744  // No copying!
745 
746  octave_lexer (const octave_lexer&);
747 
748  octave_lexer& operator = (const octave_lexer&);
749 };
750 
751 class
753 {
754 public:
755 
756  octave_push_lexer (const std::string& input = std::string (),
757  bool eof = false)
758  : octave_base_lexer (), pflag (1)
759  {
760  append_input (input, eof);
761  }
762 
763  bool is_push_lexer (void) const { return true; }
764 
765  void reset (void)
766  {
767  promptflag (1);
768 
770  }
771 
772  void append_input (const std::string& input, bool eof)
773  {
774  input_buf.fill (input, eof);
775  }
776 
777  void increment_promptflag (void) { pflag++; }
778 
779  void decrement_promptflag (void) { pflag--; }
780 
781  int promptflag (void) const { return pflag; }
782 
783  int promptflag (int n)
784  {
785  int retval = pflag;
786  pflag = n;
787  return retval;
788  }
789 
790  std::string input_source (void) const { return "push buffer"; }
791 
792  int fill_flex_buffer (char *buf, unsigned int max_size);
793 
794 protected:
795 
796  int pflag;
797 
798  // No copying!
799 
801 
802  octave_push_lexer& operator = (const octave_push_lexer&);
803 };
804 
805 #endif