Calculator  Step 5
parse.cpp
Go to the documentation of this file.
1 #include <cstdlib>
2 #include <iterator>
3 #include <sstream>
4 
5 #include "calc_error.hpp"
6 #include "node.hpp"
7 #include "parse.hpp"
8 #include "variables.hpp"
9 
10 parser::parser(std::istream& input)
11 : input_(input),
12  ctype_(std::use_facet<std::ctype<char> >(input.getloc())),
13  token_(),
14  kind_()
15 {}
16 
17 std::string parser::charify(char c)
18 {
19  if (c == '\a') return R"('\a')";
20  if (c == '\b') return R"('\b')";
21  if (c == '\f') return R"('\f')";
22  if (c == '\n') return R"('\n')";
23  if (c == '\r') return R"('\r')";
24  if (c == '\t') return R"('\t')";
25  if (c == '\v') return R"('\v')";
26  if (c == '\'') return R"('\'')";
27  if (c == '\\') return R"('\\')";
28 
29  if (isprint(c))
30  return std::string{"\'"} + std::string(1,c) + "\'";
31  else {
32  std::ostringstream stream{};
33  stream << "'\\x" << std::hex;
34  stream.fill('0');
35  stream.width(2);
36  stream << (std::char_traits<char>::to_int_type(c) & 0xFF) << '\'';
37  return stream.str();
38  }
39 }
40 
41 void parser::get_escape(std::string& str)
42 {
43  char c{};
44  if (not input_.get(c))
45  throw syntax_error("incomplete escape");
46  if (c == '\n')
47  return;
48 
49  if (c == 'a')
50  str += '\a';
51  else if (c == 'b')
52  str += '\b';
53  else if (c == 'n')
54  str += '\n';
55  else if (c == 'f')
56  str += '\f';
57  else if (c == 'r')
58  str += '\r';
59  else if (c == 't')
60  str += '\t';
61  else if (c == 'v')
62  str += '\v';
63  else if (c == 'x') {
64  // hexadecimal sequence
65  std::string digits{};
66  while(input_.get(c) and ctype_.is(ctype_.xdigit, c))
67  digits += c;
68  if (input_)
69  input_.unget();
70  unsigned long value{std::stoul(digits, nullptr, 16)};
71  str += static_cast<char>(static_cast<unsigned char>(value));
72  } else if (c >= '0' and c <= '7') {
73  // octal sequence
74  std::string digits{};
75  for (int i = 0; i < 3 and input_.get(c) and c >= '0' and c <= '7'; ++i)
76  digits += c;
77  if (input_ or c < '0' or c > '7')
78  input_.unget();
79  unsigned long value{std::stoul(digits, nullptr, 8)};
80  str += static_cast<char>(static_cast<unsigned char>(value));
81  } else {
82  str += c;
83  }
84 }
85 
86 void parser::get_string(std::string& result, char delimiter)
87 {
88  char c{};
89  while (input_.get(c)) {
90  if (c == delimiter)
91  return;
92  else if (c == '\\')
93  get_escape(result);
94  else if (c == '\n')
95  throw syntax_error("unterminated string");
96  else
97  result += c;
98  }
99  throw syntax_error("unterminated string");
100 }
101 
102 void parser::get_identifier(std::string& identifier)
103 {
104  identifier.clear();
105  char c;
106  if (not input_.get(c))
107  return;
108  if (not isalpha(c))
109  throw syntax_error("expected alphabetic, got " + charify(c));
110  identifier += c;
111  while (input_.get(c)) {
112  if (not isalnum(c)) {
113  input_.unget();
114  return;
115  }
116  identifier += c;
117  }
118  return;
119 }
120 
121 void parser::push_back(std::string const& token, kind k)
122 {
123  kind_ = k;
124  if (kind_ == eof)
125  token_ = "end of line";
126  else
127  token_ = token;
128 }
129 
130 parser::kind parser::get_token(std::string& token)
131 {
132  if (not token_.empty())
133  {
134  kind result(kind_);
135  token = token_;
136 
137  token_.clear();
138  kind_ = eof;
139 
140  return result;
141  }
142 
143  char c;
144  if (not (input_ >> c)) {
145  token = "end of line";
146  return eof;
147  }
148  if (isalpha(c)) {
149  input_.unget();
150  get_identifier(token);
151  return identifier;
152  }
153 
154  if (c == '\'' or c == '"') {
155  // Quoted string
156  token.clear();
157  get_string(token, c);
158  return string;
159  }
160 
161  // Get a numeric literal.
162  token.clear();
163  if (c == '+' or c == '-' or c == '*' or c == '/' or c == '%' or c == '(' or c == ')' or c == '=' or c == ',') {
164  token += c;
165  return kind(c);
166  }
167 
168  if (c < '0' or c > '9') {
169  input_.unget();
170  throw syntax_error("expected digit, got " + charify(c));
171  }
172  while (c >= '0' and c <= '9') {
173  token += c;
174  if (not input_.get(c))
175  return number;
176  }
177  if (c == '.') {
178  token += c;
179  if (not input_.get(c))
180  throw syntax_error("unterminated number: expected digit after the decimal point");
181  if (c < '0' or c > '9') {
182  input_.unget();
183  throw syntax_error("expected digit after decimal point, got " + charify(c));
184  }
185  while (c >= '0' and c <= '9') {
186  token += c;
187  if (not input_.get(c))
188  return number;
189  }
190  }
191  if (c == 'e' or c == 'E') {
192  token += c;
193  if (not input_.get(c))
194  throw syntax_error("unterminated number: expected digit in the exponent");
195  if (c == '-' or c == '+') {
196  token += c;
197  if (not input_.get(c))
198  throw syntax_error("unterminated number: expected digit after sign in the exponent");
199  }
200  if (c < '0' or c > '9') {
201  input_.unget();
202  throw syntax_error("expected digit in the exponent, got " + charify(c));
203  }
204  while (c >= '0' and c <= '9') {
205  token += c;
206  if (not input_.get(c))
207  return number;
208  }
209  }
210  input_.unget();
211  return number;
212 }
213 
214 bool parser::get_number(std::string const& token, node& result)
215 {
216  std::istringstream stream(token);
217  // If the value overflows or is otherwise invalid, return false.
218  double value;
219  if (not (stream >> value))
220  return false;
221  result = node(value);
222  return true;
223 }
224 
225 void parser::get_definition(std::string& name, identifier_list& parameters, node& definition)
226 {
227  // Define a variable.
228  kind k = get_token(name);
229  if (k != identifier)
230  throw syntax_error("expected IDENTIFIER, got " + name);
231 
232  std::string token;
233  k = get_token(token);
234  if (k == '(') {
235  get_namelist(std::back_inserter(parameters));
236  k = get_token(token);
237  }
238 
239  if (k != '=')
240  throw syntax_error("expected = in definition, got " + token);
241 
242  if (not get_expr(definition))
243  throw syntax_error("expected exprssion in assignment");
244 }
245 
246 bool parser::get_statement(std::ostream& output)
247 {
248  std::string token;
249  kind k(get_token(token));
250  if (k == eof)
251  return false;
252 
253  if (k == identifier and token == "def") {
254  node definition;
255  identifier_list parameters;
256  get_definition(token, parameters, definition);
257  set_function(token, node(parameters, definition));
258  return true;
259  }
260 
261  if (k == identifier and token == "quit")
262  std::exit(0);
263 
264  if (k == identifier and token == "save") {
265  std::string filename;
266  if (get_token(filename) != string)
267  throw syntax_error("expected FILENAME after save, got " + token);
268  save_library(filename);
269  output << "Library saved to " << filename << '\n';
270  }
271 
272  if (k == identifier and token == "load") {
273  std::string filename;
274  if (get_token(filename) != string)
275  throw syntax_error("expected FILENAME after load, got " + token);
276  load_library(filename);
277  output << "Library loaded from " << filename << '\n';
278  }
279  // Otherwise, the statement must be an expression.
280  push_back(token, k);
281  node n;
282  if (not get_expr(n))
283  return false;
284  else {
285  // Evaluate the expression and print the result.
286  output << n.evaluate() << '\n';
287  return true;
288  }
289 }
290 
291 bool parser::get_expr(node& result)
292 {
293  return get_add_expr(result);
294 }
295 
297 {
298  if (not get_mul_expr(result))
299  return false;
300  std::string token;
301  while (kind k = get_token(token)) {
302  if (k != '+' and k != '-') {
303  push_back(token, k);
304  return true;
305  } else {
306  node right;
307  if (not get_mul_expr(right))
308  throw syntax_error{"unterminated expression. Expected a multiplicative-expression after " + token};
309  result = node(result, k, right);
310  }
311  }
312  return true;
313 }
314 
316 {
317  if (not get_unary(result))
318  return false;
319  std::string token{};
320  while (kind k = get_token(token)) {
321  if (k != '*' and k != '/') {
322  push_back(token, k);
323  return true;
324  } else {
325  node right{};
326  if (not get_unary(right))
327  throw syntax_error{"unterminated expression. Expected a unary-expression after " + token};
328  result = node(result, k, right);
329  }
330  }
331  return true;
332 }
333 
334 bool parser::get_unary(node& result)
335 {
336  std::string token{};
337  kind k = get_token(token);
338  if (k == eof)
339  return false;
340  if (k == '-') {
341  if (not get_primary(result))
342  throw syntax_error{"expected primary after unary " + token + ", got end of line"};
343  result = node(k, result);
344  return true;
345  } else if (k == '+') {
346  if (not get_primary(result))
347  throw syntax_error{"expected primary after unary +, got end of line"};
348  return true;
349  } else {
350  push_back(token, k);
351  return get_primary(result);
352  }
353 }
354 
356 {
357  result.clear();
358  std::string token{};
359  while (kind k = get_token(token)) {
360  if (k == ')')
361  return;
362  push_back(token, k);
363  node expr{};
364  if (not get_expr(expr))
365  throw syntax_error{"unexpected end of line in function argument"};
366  result.push_back(expr);
367  k = get_token(token);
368  if (k == ')')
369  return;
370  else if (k != ',')
371  throw syntax_error{"expected comma in argument list, got " + token};
372  }
373  throw syntax_error{"unexpected end of line in function argument list"};
374 }
375 
377 {
378  std::string token{};
379  kind k = get_token(token);
380  if (k == eof)
381  return false;
382 
383  if (k == '(') {
384  // Parenthesized expression
385  if (not get_expr(result))
386  throw syntax_error{"expected expression, got end of line"};
387  k = get_token(token);
388  if (k != ')')
389  throw syntax_error{"expected ')', got " + token};
390  else
391  return true;
392  }
393 
394  if (k == number) {
395  // Numeric literal
396  if (not get_number(token, result))
397  throw syntax_error{"Invalid numeric literal: " + token};
398  return true;
399  }
400 
401  if (k == identifier) {
402  // Identifier: variable or function call
403  std::string next{};
404  k = get_token(next);
405  if (k == '(') {
406  // function call
407  node_list arguments{};
408  get_expr_list(arguments);
409  result = node{std::move(token), std::move(arguments)};
410  } else {
411  static const node_list no_arguments;
412  // Variable reference or function call with no arguments
413  push_back(next, k);
414  result = node{std::move(token), no_arguments};
415  }
416  return true;
417  }
418  throw syntax_error{"expected a primary, got " + token};
419 }
420 
421 void parse_loop(std::istream& input, std::ostream& output)
422 {
423  std::string line{};
424  // No portable way to test whether the console is an interactive terminal
425  // vs. a non-interactive file. If you have a system-specific way to test,
426  // output the prompt only for the interactive case.
427  for (output << "> "; std::getline(input, line); output << "> ") {
428  std::istringstream input{std::move(line)};
429  parser p(input);
430  try {
431  while (p.get_statement(output)) {
432  /* empty */
433  }
434  } catch(calc_error const& ex) {
435  output << ex.what() << '\n';
436  } catch(std::exception const& ex) {
437  output << "exception: " << ex.what() << '\n';
438  }
439  }
440 }
void load_library(std::string const &filename)
Definition: variables.cpp:100
void get_string(std::string &result, char delimiter)
Definition: parse.cpp:86
void parse_loop(std::istream &input, std::ostream &output)
Definition: parse.cpp:421
bool isprint(char c) const
Definition: parse.hpp:191
bool get_expr(node &result)
Definition: parse.cpp:291
void save_library(std::string const &filename)
Definition: variables.cpp:79
parser(std::istream &input)
Definition: parse.cpp:10
void get_identifier(std::string &identifier)
Definition: parse.cpp:102
std::string charify(char c)
Definition: parse.cpp:17
Definition: node.hpp:26
void get_definition(std::string &name, identifier_list &parameters, node &definition)
Definition: parse.cpp:225
void push_back(std::string const &token, kind k)
Definition: parse.cpp:121
bool get_statement(std::ostream &output)
Definition: parse.cpp:246
Definition: parse.hpp:25
bool get_primary(node &result)
Definition: parse.cpp:376
std::vector< std::string > identifier_list
A sequence of identifiers (e.g., parameter names).
Definition: node.hpp:19
OutputIterator get_namelist(OutputIterator output)
Definition: parse.hpp:207
bool get_unary(node &result)
Definition: parse.cpp:334
bool isalpha(char c) const
Definition: parse.hpp:176
kind get_token(std::string &token)
Definition: parse.cpp:130
std::string token_
One token push-back.
Definition: parse.hpp:195
void set_function(std::string const &name, node value)
Definition: variables.cpp:74
std::istream & input_
Share the input stream.
Definition: parse.hpp:193
kind kind_
The kind of token that was pushed back.
Definition: parse.hpp:196
double evaluate() const
Definition: node.cpp:60
kind
Definition: parse.hpp:31
std::vector< node > node_list
A sequence of nodes.
Definition: node.hpp:13
bool get_number(std::string const &token, node &result)
Definition: parse.cpp:214
std::ctype< char > const & ctype_
Cache the ctype facet for checking character categories.
Definition: parse.hpp:194
bool get_mul_expr(node &result)
Definition: parse.cpp:315
bool get_add_expr(node &result)
Definition: parse.cpp:296
void get_expr_list(node_list &result)
Definition: parse.cpp:355
bool isalnum(char c) const
Definition: parse.hpp:181
void get_escape(std::string &str)
Definition: parse.cpp:41