#include <parse.hpp>
Public Types | |
| enum | kind { eof, identifier, integer, floating_point, string, plus = '+', minus = '-', times = '*', slash = '/', lparen = '(', rparen = ')', equal = '=', comma = ',' } |
Public Member Functions | |
| parser (std::istream &input) | |
| bool | get_statement (std::ostream &output) |
Private Member Functions | |
| std::string | charify (char c) |
| bool | get_float (std::string const &token, node &result) |
| bool | get_integer (std::string const &token, node &result) |
| bool | get_expr (node &result) |
| bool | get_add_expr (node &result) |
| bool | get_mul_expr (node &result) |
| bool | get_primary (node &result) |
| bool | get_unary (node &result) |
| void | get_definition (std::string &name, identifier_list ¶meters, node &definition) |
| kind | get_token (std::string &token) |
| void | get_identifier (std::string &identifier) |
| void | get_expr_list (node_list &result) |
| template<class OutputIterator> | |
| OutputIterator | get_namelist (OutputIterator output) |
| void | get_escape (std::string &str) |
| void | get_string (std::string &result, char delimiter) |
| void | push_back (std::string const &token, kind k) |
| bool | isalpha (char c) const |
| bool | isalnum (char c) const |
| bool | isdigit (char c) const |
| bool | isprint (char c) const |
Private Attributes | |
| std::istream & | input_ |
| Share the input stream. | |
| std::ctype< char > const & | ctype_ |
| Cache the ctype facet for checking character categories. | |
| std::string | token_ |
| One token push-back. | |
| kind | kind_ |
| The kind of token that was pushed back. | |
Because the recursive-descent parser can examine too many tokens from the input stream, it keeps a push-back token. Once the parser knows it has gone too far, it pushes back the most recently read token. The next call to get_token() retrieves the pushed-back token.
Only one push-back is available, which limits the complexity of the syntax.
Definition at line 26 of file parse.hpp.
| enum parser::kind |
Token kind. Declare a name for each single-character token, to ensure the enumerated type can represent any operator or punctuator character.
| eof | |
| identifier | |
| integer | |
| floating_point | |
| string | |
| plus | |
| minus | |
| times | |
| slash | |
| lparen | |
| rparen | |
| equal | |
| comma |
Definition at line 32 of file parse.hpp.
00032 { eof, identifier, integer, floating_point, string, 00033 plus='+', minus='-', times='*', slash='/', lparen = '(', rparen=')', equal='=', 00034 comma=','};
| parser::parser | ( | std::istream & | input | ) |
| bool parser::get_statement | ( | std::ostream & | output | ) |
Read one statement and store the parse tree in result. If the statement is an assignment or function definition, store the variable or function. If the statement is an expression, print the result to output.
STATEMENT ::= DEFINITION | QUIT | EXPR | SAVE | LOAD
SAVE ::= "save" FILENAME
LOAD ::= "load" FILENAME
FILENAME ::= quoted-string
| output | The output stream. |
| parse_error | for various syntax and other errors |
Definition at line 262 of file parse.cpp.
References eof, node::evaluate(), get_definition(), get_expr(), get_token(), identifier, load_library(), push_back(), save_library(), and set_function().
Referenced by parse_loop().
00263 { 00264 std::string token; 00265 kind k(get_token(token)); 00266 if (k == eof) 00267 return false; 00268 00269 if (k == identifier and token == "def") { 00270 node definition; 00271 identifier_list parameters; 00272 get_definition(token, parameters, definition); 00273 set_function(token, node(parameters, definition)); 00274 return true; 00275 } 00276 00277 if (k == identifier and token == "quit") 00278 std::exit(0); 00279 00280 if (k == identifier and token == "save") { 00281 std::string filename; 00282 if (get_token(filename) != string) 00283 throw syntax_error("expected FILENAME after save, got " + token); 00284 save_library(filename); 00285 output << "Library saved to " << filename << '\n'; 00286 } 00287 00288 if (k == identifier and token == "load") { 00289 std::string filename; 00290 if (get_token(filename) != string) 00291 throw syntax_error("expected FILENAME after load, got " + token); 00292 load_library(filename); 00293 output << "Library loaded from " << filename << '\n'; 00294 } 00295 // Otherwise, the statement must be an expression. 00296 push_back(token, k); 00297 node n; 00298 if (not get_expr(n)) 00299 return false; 00300 else { 00301 // Evaluate the expression and print the result. 00302 output << n.evaluate() << '\n'; 00303 return true; 00304 } 00305 }
| std::string parser::charify | ( | char | c | ) | [private] |
Convert a characer to a readable form.
| c | The character |
c is readable. Definition at line 17 of file parse.cpp.
References isprint().
Referenced by get_identifier(), and get_token().
00018 { 00019 if (c == '\a') return "\'\\a\'"; 00020 if (c == '\b') return "\'\\b\'"; 00021 if (c == '\f') return "\'\\f\'"; 00022 if (c == '\n') return "\'\\n\'"; 00023 if (c == '\r') return "\'\\r\'"; 00024 if (c == '\t') return "\'\\t\'"; 00025 if (c == '\v') return "\'\\v\'"; 00026 if (c == '\'') return "\'\\'\'"; 00027 if (c == '\\') return "\'\\\\\'"; 00028 00029 if (isprint(c)) 00030 return std::string("\'") + c + '\''; 00031 else { 00032 std::ostringstream stream; 00033 stream << "'\\x" << std::hex; 00034 stream.fill('0'); 00035 stream.width(2); 00036 stream << (std::char_traits<char>::to_int_type(c) & 0xFF) << '\''; 00037 return stream.str(); 00038 } 00039 }
| bool parser::get_float | ( | std::string const & | token, | |
| node & | result | |||
| ) | [private] |
Parse a floating number.
| token | The token to parse | |
| result | Store the number here |
token is a valid number or false for an error Definition at line 230 of file parse.cpp.
Referenced by get_primary().
00231 { 00232 std::istringstream stream(token); 00233 // If the value overflows or is otherwise invalid, return false. 00234 double value; 00235 if (not (stream >> value)) 00236 return false; 00237 result = node(number(value)); 00238 return true; 00239 }
| bool parser::get_integer | ( | std::string const & | token, | |
| node & | result | |||
| ) | [private] |
Parse an integer.
| token | The token to parse | |
| result | Store the number here |
token is a valid number or false for an error Definition at line 219 of file parse.cpp.
Referenced by get_primary().
00220 { 00221 std::istringstream stream(token); 00222 // If the value overflows or is otherwise invalid, return false. 00223 long value; 00224 if (not (stream >> value)) 00225 return false; 00226 result = node(number(value)); 00227 return true; 00228 }
| bool parser::get_expr | ( | node & | result | ) | [private] |
Parse an expression
| result | Store the result here |
Definition at line 307 of file parse.cpp.
References get_add_expr().
Referenced by get_definition(), get_expr_list(), get_primary(), and get_statement().
00308 { 00309 return get_add_expr(result); 00310 }
| bool parser::get_add_expr | ( | node & | result | ) | [private] |
Parse an addition expression
ADD_EXPR ::= MUL_EXPR | ADD_EXPR + MUL_EXPR | ADD_EXPR - MUL_EXPR
| result | Store the result here |
Definition at line 312 of file parse.cpp.
References get_mul_expr(), get_token(), and push_back().
Referenced by get_expr().
00313 { 00314 if (not get_mul_expr(result)) 00315 return false; 00316 std::string token; 00317 while (kind k = get_token(token)) { 00318 if (k != '+' and k != '-') { 00319 push_back(token, k); 00320 return true; 00321 } else { 00322 node right; 00323 if (not get_mul_expr(right)) 00324 throw syntax_error("unterminated expression. Expected a multiplicative-expression after " + token); 00325 result = node(result, k, right); 00326 } 00327 } 00328 return true; 00329 }
| bool parser::get_mul_expr | ( | node & | result | ) | [private] |
Parse a multiplicative expression.
MUL_EXPR ::= UNARY | MUL_EXPR + UNARY | MUL_EXPR - UNARY
| result | Store the result here |
Definition at line 331 of file parse.cpp.
References get_token(), get_unary(), and push_back().
Referenced by get_add_expr().
00332 { 00333 if (not get_unary(result)) 00334 return false; 00335 std::string token; 00336 while (kind k = get_token(token)) { 00337 if (k != '*' and k != '/') { 00338 push_back(token, k); 00339 return true; 00340 } else { 00341 node right; 00342 if (not get_unary(right)) 00343 throw syntax_error("unterminated expression. Expected a unary-expression after " + token); 00344 result = node(result, k, right); 00345 } 00346 } 00347 return true; 00348 }
| bool parser::get_primary | ( | node & | result | ) | [private] |
Parse a primary expression.
PRIMARY ::= NUMBER | IDENTIFIER | '(' EXPR ')' | FUNCTION_CALL
FUNCTION_CALL ::= IDENTIFIER '(' OPT_EXPR_LIST ')'
OPT_EXPR_LIST ::= empty | EXPR_LIST
EXPR_LIST ::= EXPR | EXPR_LIST ',' EXPR
| result | Store the result here |
Definition at line 392 of file parse.cpp.
References eof, floating_point, get_expr(), get_expr_list(), get_float(), get_integer(), get_token(), identifier, integer, and push_back().
Referenced by get_unary().
00393 { 00394 std::string token; 00395 kind k = get_token(token); 00396 if (k == eof) 00397 return false; 00398 00399 if (k == '(') { 00400 // Parenthesized expression 00401 if (not get_expr(result)) 00402 throw syntax_error("expected expression, got end of line"); 00403 k = get_token(token); 00404 if (k != ')') 00405 throw syntax_error("expected ')', got " + token); 00406 else 00407 return true; 00408 } 00409 00410 if (k == integer) { 00411 // Integer literal 00412 if (not get_integer(token, result)) 00413 throw syntax_error("Invalid integer literal: " + token); 00414 return true; 00415 } 00416 00417 if (k == floating_point) { 00418 // Integer literal 00419 if (not get_float(token, result)) 00420 throw syntax_error("Invalid integer literal: " + token); 00421 return true; 00422 } 00423 00424 if (k == identifier) { 00425 // Identifier: variable or function call 00426 std::string next; 00427 k = get_token(next); 00428 if (k == '(') { 00429 // function call 00430 node_list arguments; 00431 get_expr_list(arguments); 00432 result = node(token, arguments); 00433 } else { 00434 static const node_list no_arguments; 00435 // Variable reference or function call with no arguments 00436 push_back(next, k); 00437 result = node(token); 00438 } 00439 return true; 00440 } 00441 throw syntax_error("expected a primary, got " + token); 00442 }
| bool parser::get_unary | ( | node & | result | ) | [private] |
Parse a unary expression.
UNARY ::= '-' PRIMARY | '+' PRIMARY | PRIMARY
| result | Store the result here |
Definition at line 350 of file parse.cpp.
References eof, get_primary(), get_token(), and push_back().
Referenced by get_mul_expr().
00351 { 00352 std::string token; 00353 kind k = get_token(token); 00354 if (k == eof) 00355 return false; 00356 if (k == '-') { 00357 if (not get_primary(result)) 00358 throw syntax_error("expected primary after unary " + token + ", got end of line"); 00359 result = node(k, result); 00360 return true; 00361 } else if (k == '+') { 00362 if (not get_primary(result)) 00363 throw syntax_error("expected primary after unary +, got end of line"); 00364 return true; 00365 } else { 00366 push_back(token, k); 00367 return get_primary(result); 00368 } 00369 }
| void parser::get_definition | ( | std::string & | name, | |
| identifier_list & | parameters, | |||
| node & | definition | |||
| ) | [private] |
Parse a function or variable definition A variable is just like a function that takes no parameters.
DEFINITION ::= DEF IDENTIFIER OPT_PARAMETERS '=' EXPR
OPT_PARAMETERS ::= emtpy | '(' OPT_IDENTIFIER_LIST ')'
OPT_IDENTIFIER_LIST ::= empty | IDENTIFIER_LIST
IDENTIFIER_LIST ::= IDENTIFIER | IDENTIFIER_LIST ',' IDENTIFIER
| [out] | name | Store the variable or function name here |
| [out] | parameters | Store the list of parameter names here |
| [out] | definition | Store the definition expression here |
Definition at line 241 of file parse.cpp.
References get_expr(), get_namelist(), get_token(), and identifier.
Referenced by get_statement().
00242 { 00243 // Define a variable. 00244 kind k = get_token(name); 00245 if (k != identifier) 00246 throw syntax_error("expected IDENTIFIER, got " + name); 00247 00248 std::string token; 00249 k = get_token(token); 00250 if (k == '(') { 00251 get_namelist(std::back_inserter(parameters)); 00252 k = get_token(token); 00253 } 00254 00255 if (k != '=') 00256 throw syntax_error("expected = in definition, got " + token); 00257 00258 if (not get_expr(definition)) 00259 throw syntax_error("expected exprssion in assignment"); 00260 }
| parser::kind parser::get_token | ( | std::string & | token | ) | [private] |
Parse a token. A token can be a keyword, a literal or a symbol.
TOKEN ::= IDENTIFIER | NUMBER | SYMBOL | STRING
IDENTIIFER ::= ALPHA (ALPHA | DIGIT)*
NUMBER ::= INTEGER | FLOATING-POINT
INTEGER ::= DIGIT+
FLOATING_POINT ::= DIGIT+ '.' DIGITS+ ('E' SIGN? DIGITS+)? | DIGIT+ ('.' DIGITS+)? 'E' SIGN? DIGITS+
SYMBOL ::= '+' | '-' | '*' | '/' | '%' | '(' | ')' | '=' | ','
| token | Store the text of the token here. |
Definition at line 130 of file parse.cpp.
References charify(), eof, floating_point, get_identifier(), get_string(), identifier, input_, integer, isalpha(), kind_, string, and token_.
Referenced by get_add_expr(), get_definition(), get_expr_list(), get_mul_expr(), get_namelist(), get_primary(), get_statement(), and get_unary().
00131 { 00132 if (not token_.empty()) 00133 { 00134 kind result(kind_); 00135 token = token_; 00136 00137 token_.clear(); 00138 kind_ = eof; 00139 00140 return result; 00141 } 00142 00143 char c; 00144 if (not (input_ >> c)) { 00145 token = "end of line"; 00146 return eof; 00147 } 00148 if (isalpha(c)) { 00149 input_.unget(); 00150 get_identifier(token); 00151 return identifier; 00152 } 00153 00154 if (c == '\'' or c == '"') { 00155 // Quoted string 00156 token.clear(); 00157 get_string(token, c); 00158 return string; 00159 } 00160 00161 // Get a numeric literal. 00162 token.clear(); 00163 if (c == '+' or c == '-' or c == '*' or c == '/' or c == '%' or c == '(' or c == ')' or c == '=' or c == ',') { 00164 token += c; 00165 return kind(c); 00166 } 00167 00168 if (c < '0' or c > '9') { 00169 input_.unget(); 00170 throw syntax_error("expected digit, got " + charify(c)); 00171 } 00172 00173 kind k = integer; 00174 00175 while (c >= '0' and c <= '9') { 00176 token += c; 00177 if (not input_.get(c)) 00178 return k; 00179 } 00180 if (c == '.') { 00181 k = floating_point; 00182 token += c; 00183 if (not input_.get(c)) 00184 throw syntax_error("unterminated number: expected digit after the decimal point"); 00185 if (c < '0' or c > '9') { 00186 input_.unget(); 00187 throw syntax_error("expected digit after decimal point, got " + charify(c)); 00188 } 00189 while (c >= '0' and c <= '9') { 00190 token += c; 00191 if (not input_.get(c)) 00192 return k; 00193 } 00194 } 00195 if (c == 'e' or c == 'E') { 00196 k = floating_point; 00197 token += c; 00198 if (not input_.get(c)) 00199 throw syntax_error("unterminated number: expected digit in the exponent"); 00200 if (c == '-' or c == '+') { 00201 token += c; 00202 if (not input_.get(c)) 00203 throw syntax_error("unterminated number: expected digit after sign in the exponent"); 00204 } 00205 if (c < '0' or c > '9') { 00206 input_.unget(); 00207 throw syntax_error("expected digit in the exponent, got " + charify(c)); 00208 } 00209 while (c >= '0' and c <= '9') { 00210 token += c; 00211 if (not input_.get(c)) 00212 return k; 00213 } 00214 } 00215 input_.unget(); 00216 return k; 00217 }
| void parser::get_identifier | ( | std::string & | identifier | ) | [private] |
Parse an identifer.
| identifier | Store the identifier here. |
Definition at line 102 of file parse.cpp.
References charify(), input_, isalnum(), and isalpha().
Referenced by get_token().
00103 { 00104 identifier.clear(); 00105 char c; 00106 if (not input_.get(c)) 00107 return; 00108 if (not isalpha(c)) 00109 throw syntax_error("expected alphabetic, got " + charify(c)); 00110 identifier += c; 00111 while (input_.get(c)) { 00112 if (not isalnum(c)) { 00113 input_.unget(); 00114 return; 00115 } 00116 identifier += c; 00117 } 00118 return; 00119 }
| void parser::get_expr_list | ( | node_list & | result | ) | [private] |
Parse a comma-separated expression list.
| [out] | result | Store the result here |
Definition at line 371 of file parse.cpp.
References get_expr(), get_token(), and push_back().
Referenced by get_primary().
00372 { 00373 result.clear(); 00374 std::string token; 00375 while (kind k = get_token(token)) { 00376 if (k == ')') 00377 return; 00378 push_back(token, k); 00379 node expr; 00380 if (not get_expr(expr)) 00381 throw syntax_error("unexpected end of line in function argument"); 00382 result.push_back(expr); 00383 k = get_token(token); 00384 if (k == ')') 00385 return; 00386 else if (k != ',') 00387 throw syntax_error("expected comma in argument list, got " + token); 00388 } 00389 throw syntax_error("unexpected end of line in function argument list"); 00390 }
| OutputIterator parser::get_namelist | ( | OutputIterator | output | ) | [inline, private] |
Parse a list of parameter names. Names are identifiers, separated by commas. The list can be empty. This is a template so the container type is unimportant. Any output iterator will do.
| [out] | output | Store the identifiers here |
output after storing all the identifiers Definition at line 216 of file parse.hpp.
References get_token(), and identifier.
Referenced by get_definition().
00217 { 00218 std::string token; 00219 while (kind k = get_token(token)) { 00220 if (k == ')') 00221 return output; 00222 else if (k != identifier) 00223 throw syntax_error("expected function parameter, got " + token); 00224 else { 00225 *output = token; 00226 ++output; 00227 00228 k = get_token(token); 00229 if (k == ')') 00230 return output; 00231 if (k != ',') 00232 throw syntax_error("expected comma in function paramter list, got " + token); 00233 } 00234 } 00235 throw syntax_error("unexpected end of line in function parameter list"); 00236 }
| void parser::get_escape | ( | std::string & | str | ) | [private] |
Interpret a backslash escape sequence. The caller must have read the backslash already.
| [out] | str | Write the string equivalent of the escape sequence at the end of this string |
Definition at line 41 of file parse.cpp.
References ctype_, and input_.
Referenced by get_string().
00042 { 00043 char c; 00044 if (not input_.get(c)) 00045 throw syntax_error("incomplete escape"); 00046 if (c == '\n') 00047 return; 00048 00049 if (c == 'a') 00050 str += '\a'; 00051 else if (c == 'b') 00052 str += '\b'; 00053 else if (c == 'n') 00054 str += '\n'; 00055 else if (c == 'f') 00056 str += '\f'; 00057 else if (c == 'r') 00058 str += '\r'; 00059 else if (c == 't') 00060 str += '\t'; 00061 else if (c == 'v') 00062 str += '\v'; 00063 else if (c == 'x') { 00064 // hexadecimal sequence 00065 std::string digits; 00066 while(input_.get(c) and ctype_.is(ctype_.xdigit, c)) 00067 digits += c; 00068 if (input_) 00069 input_.unget(); 00070 unsigned long value(std::strtoul(digits.c_str(), 0, 16)); 00071 str += static_cast<char>(static_cast<unsigned char>(value)); 00072 } else if (c >= '0' and c <= '7') { 00073 // octal sequence 00074 std::string digits; 00075 for (int i = 0; i < 3 and input_.get(c) and c >= '0' and c <= '7'; ++i) 00076 digits += c; 00077 if (input_ or c < '0' or c > '7') 00078 input_.unget(); 00079 unsigned long value(std::strtoul(digits.c_str(), 0, 8)); 00080 str += static_cast<char>(static_cast<unsigned char>(value)); 00081 } else { 00082 str += c; 00083 } 00084 }
| void parser::get_string | ( | std::string & | result, | |
| char | delimiter | |||
| ) | [private] |
Parse a quoted string. The caller passes the quote character in the delimiter argument.
| [out] | result | Store the token here. |
| [in] | delimiter | The quote character (' or ") |
Definition at line 86 of file parse.cpp.
References get_escape(), and input_.
Referenced by get_token().
00087 { 00088 char c; 00089 while (input_.get(c)) { 00090 if (c == delimiter) 00091 return; 00092 else if (c == '\\') 00093 get_escape(result); 00094 else if (c == '\n') 00095 throw syntax_error("unterminated string"); 00096 else 00097 result += c; 00098 } 00099 throw syntax_error("unterminated string"); 00100 }
| void parser::push_back | ( | std::string const & | token, | |
| kind | k | |||
| ) | [private] |
Push back a token. The next call to get_token() will return the pushed-back token.
| token | The token to push back. | |
| k | The kind of token being pushed back |
Definition at line 121 of file parse.cpp.
References eof, kind_, and token_.
Referenced by get_add_expr(), get_expr_list(), get_mul_expr(), get_primary(), get_statement(), and get_unary().
00122 { 00123 kind_ = k; 00124 if (kind_ == eof) 00125 token_ = "end of line"; 00126 else 00127 token_ = token; 00128 }
| bool parser::isalpha | ( | char | c | ) | const [inline, private] |
Return true if c is alphabetic. Use the locale of the input stream.
| c | The character to test. |
Definition at line 185 of file parse.hpp.
References ctype_.
Referenced by get_identifier(), and get_token().
| bool parser::isalnum | ( | char | c | ) | const [inline, private] |
| bool parser::isdigit | ( | char | c | ) | const [inline, private] |
| bool parser::isprint | ( | char | c | ) | const [inline, private] |
std::istream& parser::input_ [private] |
Share the input stream.
Definition at line 202 of file parse.hpp.
Referenced by get_escape(), get_identifier(), get_string(), and get_token().
std::ctype<char> const& parser::ctype_ [private] |
std::string parser::token_ [private] |
One token push-back.
Definition at line 204 of file parse.hpp.
Referenced by get_token(), and push_back().
kind parser::kind_ [private] |
The kind of token that was pushed back.
Definition at line 205 of file parse.hpp.
Referenced by get_token(), and push_back().
1.5.3