#include <parse.hpp>
Public Types | |
| enum | kind { eof, identifier, number, string, plus = '+', minus = '-', times = '*', slash = '/', lparen = '(', rparen = ')', equal = '=', comma = ',' } |
Public Member Functions | |
| parser (std::istream &input) | |
| bool | get_statement (std::ostream &output) |
Private Member Functions | |
| std::string | charify (char c) |
| bool | get_number (std::string const &token, node &result) |
| bool | get_expr (node &result) |
| bool | get_add_expr (node &result) |
| bool | get_mul_expr (node &result) |
| bool | get_primary (node &result) |
| bool | get_unary (node &result) |
| void | get_definition (std::string &name, identifier_list ¶meters, node &definition) |
| kind | get_token (std::string &token) |
| void | get_identifier (std::string &identifier) |
| void | get_expr_list (node_list &result) |
| template<class OutputIterator> | |
| OutputIterator | get_namelist (OutputIterator output) |
| void | get_escape (std::string &str) |
| void | get_string (std::string &result, char delimiter) |
| void | push_back (std::string const &token, kind k) |
| bool | isalpha (char c) const |
| bool | isalnum (char c) const |
| bool | isdigit (char c) const |
| bool | isprint (char c) const |
Private Attributes | |
| std::istream & | input_ |
| Share the input stream. | |
| std::ctype< char > const & | ctype_ |
| Cache the ctype facet for checking character categories. | |
| std::string | token_ |
| One token push-back. | |
| kind | kind_ |
| The kind of token that was pushed back. | |
Because the recursive-descent parser can examine too many tokens from the input stream, it keeps a push-back token. Once the parser knows it has gone too far, it pushes back the most recently read token. The next call to get_token() retrieves the pushed-back token.
Only one push-back is available, which limits the complexity of the syntax.
Definition at line 25 of file parse.hpp.
| enum parser::kind |
Token kind. Declare a name for each single-character token, to ensure the enumerated type can represent any operator or punctuator character.
Definition at line 31 of file parse.hpp.
00031 { eof, identifier, number, string, 00032 plus='+', minus='-', times='*', slash='/', lparen = '(', rparen=')', equal='=', 00033 comma=','};
| parser::parser | ( | std::istream & | input | ) |
| bool parser::get_statement | ( | std::ostream & | output | ) |
Read one statement and store the parse tree in result. If the statement is an assignment or function definition, store the variable or function. If the statement is an expression, print the result to output.
STATEMENT ::= DEFINITION | QUIT | EXPR | SAVE | LOAD
SAVE ::= "save" FILENAME
LOAD ::= "load" FILENAME
FILENAME ::= quoted-string
| output | The output stream. |
| parse_error | for various syntax and other errors |
Definition at line 246 of file parse.cpp.
References eof, node::evaluate(), get_definition(), get_expr(), get_token(), identifier, load_library(), push_back(), save_library(), and set_function().
Referenced by parse_loop().
00247 { 00248 std::string token; 00249 kind k(get_token(token)); 00250 if (k == eof) 00251 return false; 00252 00253 if (k == identifier and token == "def") { 00254 node definition; 00255 identifier_list parameters; 00256 get_definition(token, parameters, definition); 00257 set_function(token, node(parameters, definition)); 00258 return true; 00259 } 00260 00261 if (k == identifier and token == "quit") 00262 std::exit(0); 00263 00264 if (k == identifier and token == "save") { 00265 std::string filename; 00266 if (get_token(filename) != string) 00267 throw syntax_error("expected FILENAME after save, got " + token); 00268 save_library(filename); 00269 output << "Library saved to " << filename << '\n'; 00270 } 00271 00272 if (k == identifier and token == "load") { 00273 std::string filename; 00274 if (get_token(filename) != string) 00275 throw syntax_error("expected FILENAME after load, got " + token); 00276 load_library(filename); 00277 output << "Library loaded from " << filename << '\n'; 00278 } 00279 // Otherwise, the statement must be an expression. 00280 push_back(token, k); 00281 node n; 00282 if (not get_expr(n)) 00283 return false; 00284 else { 00285 // Evaluate the expression and print the result. 00286 output << n.evaluate() << '\n'; 00287 return true; 00288 } 00289 }
| std::string parser::charify | ( | char | c | ) | [private] |
Convert a characer to a readable form.
| c | The character |
c is readable. Definition at line 17 of file parse.cpp.
References isprint().
Referenced by get_identifier(), and get_token().
00018 { 00019 if (c == '\a') return "\'\\a\'"; 00020 if (c == '\b') return "\'\\b\'"; 00021 if (c == '\f') return "\'\\f\'"; 00022 if (c == '\n') return "\'\\n\'"; 00023 if (c == '\r') return "\'\\r\'"; 00024 if (c == '\t') return "\'\\t\'"; 00025 if (c == '\v') return "\'\\v\'"; 00026 if (c == '\'') return "\'\\'\'"; 00027 if (c == '\\') return "\'\\\\\'"; 00028 00029 if (isprint(c)) 00030 return std::string("\'") + c + '\''; 00031 else { 00032 std::ostringstream stream; 00033 stream << "'\\x" << std::hex; 00034 stream.fill('0'); 00035 stream.width(2); 00036 stream << (std::char_traits<char>::to_int_type(c) & 0xFF) << '\''; 00037 return stream.str(); 00038 } 00039 }
| bool parser::get_number | ( | std::string const & | token, | |
| node & | result | |||
| ) | [private] |
Parse a floating number.
| token | The token to parse | |
| result | Store the number here |
token is a valid number or false for an error Definition at line 214 of file parse.cpp.
Referenced by get_primary().
00215 { 00216 std::istringstream stream(token); 00217 // If the value overflows or is otherwise invalid, return false. 00218 double value; 00219 if (not (stream >> value)) 00220 return false; 00221 result = node(value); 00222 return true; 00223 }
| bool parser::get_expr | ( | node & | result | ) | [private] |
Parse an expression
| result | Store the result here |
Definition at line 291 of file parse.cpp.
References get_add_expr().
Referenced by get_definition(), get_expr_list(), get_primary(), and get_statement().
00292 { 00293 return get_add_expr(result); 00294 }
| bool parser::get_add_expr | ( | node & | result | ) | [private] |
Parse an addition expression
ADD_EXPR ::= MUL_EXPR | ADD_EXPR + MUL_EXPR | ADD_EXPR - MUL_EXPR
| result | Store the result here |
Definition at line 296 of file parse.cpp.
References get_mul_expr(), get_token(), and push_back().
Referenced by get_expr().
00297 { 00298 if (not get_mul_expr(result)) 00299 return false; 00300 std::string token; 00301 while (kind k = get_token(token)) { 00302 if (k != '+' and k != '-') { 00303 push_back(token, k); 00304 return true; 00305 } else { 00306 node right; 00307 if (not get_mul_expr(right)) 00308 throw syntax_error("unterminated expression. Expected a multiplicative-expression after " + token); 00309 result = node(result, k, right); 00310 } 00311 } 00312 return true; 00313 }
| bool parser::get_mul_expr | ( | node & | result | ) | [private] |
Parse a multiplicative expression.
MUL_EXPR ::= UNARY | MUL_EXPR + UNARY | MUL_EXPR - UNARY
| result | Store the result here |
Definition at line 315 of file parse.cpp.
References get_token(), get_unary(), and push_back().
Referenced by get_add_expr().
00316 { 00317 if (not get_unary(result)) 00318 return false; 00319 std::string token; 00320 while (kind k = get_token(token)) { 00321 if (k != '*' and k != '/') { 00322 push_back(token, k); 00323 return true; 00324 } else { 00325 node right; 00326 if (not get_unary(right)) 00327 throw syntax_error("unterminated expression. Expected a unary-expression after " + token); 00328 result = node(result, k, right); 00329 } 00330 } 00331 return true; 00332 }
| bool parser::get_primary | ( | node & | result | ) | [private] |
Parse a primary expression.
PRIMARY ::= NUMBER | IDENTIFIER | '(' EXPR ')' | FUNCTION_CALL
FUNCTION_CALL ::= IDENTIFIER '(' OPT_EXPR_LIST ')'
OPT_EXPR_LIST ::= empty | EXPR_LIST
EXPR_LIST ::= EXPR | EXPR_LIST ',' EXPR
| result | Store the result here |
Definition at line 376 of file parse.cpp.
References eof, get_expr(), get_expr_list(), get_number(), get_token(), identifier, number, and push_back().
Referenced by get_unary().
00377 { 00378 std::string token; 00379 kind k = get_token(token); 00380 if (k == eof) 00381 return false; 00382 00383 if (k == '(') { 00384 // Parenthesized expression 00385 if (not get_expr(result)) 00386 throw syntax_error("expected expression, got end of line"); 00387 k = get_token(token); 00388 if (k != ')') 00389 throw syntax_error("expected ')', got " + token); 00390 else 00391 return true; 00392 } 00393 00394 if (k == number) { 00395 // Numeric literal 00396 if (not get_number(token, result)) 00397 throw syntax_error("Invalid numeric literal: " + token); 00398 return true; 00399 } 00400 00401 if (k == identifier) { 00402 // Identifier: variable or function call 00403 std::string next; 00404 k = get_token(next); 00405 if (k == '(') { 00406 // function call 00407 node_list arguments; 00408 get_expr_list(arguments); 00409 result = node(token, arguments); 00410 } else { 00411 static const node_list no_arguments; 00412 // Variable reference or function call with no arguments 00413 push_back(next, k); 00414 result = node(token); 00415 } 00416 return true; 00417 } 00418 throw syntax_error("expected a primary, got " + token); 00419 }
| bool parser::get_unary | ( | node & | result | ) | [private] |
Parse a unary expression.
UNARY ::= '-' PRIMARY | '+' PRIMARY | PRIMARY
| result | Store the result here |
Definition at line 334 of file parse.cpp.
References eof, get_primary(), get_token(), and push_back().
Referenced by get_mul_expr().
00335 { 00336 std::string token; 00337 kind k = get_token(token); 00338 if (k == eof) 00339 return false; 00340 if (k == '-') { 00341 if (not get_primary(result)) 00342 throw syntax_error("expected primary after unary " + token + ", got end of line"); 00343 result = node(k, result); 00344 return true; 00345 } else if (k == '+') { 00346 if (not get_primary(result)) 00347 throw syntax_error("expected primary after unary +, got end of line"); 00348 return true; 00349 } else { 00350 push_back(token, k); 00351 return get_primary(result); 00352 } 00353 }
| void parser::get_definition | ( | std::string & | name, | |
| identifier_list & | parameters, | |||
| node & | definition | |||
| ) | [private] |
Parse a function or variable definition A variable is just like a function that takes no parameters.
DEFINITION ::= DEF IDENTIFIER OPT_PARAMETERS '=' EXPR
OPT_PARAMETERS ::= emtpy | '(' OPT_IDENTIFIER_LIST ')'
OPT_IDENTIFIER_LIST ::= empty | IDENTIFIER_LIST
IDENTIFIER_LIST ::= IDENTIFIER | IDENTIFIER_LIST ',' IDENTIFIER
| [out] | name | Store the variable or function name here |
| [out] | parameters | Store the list of parameter names here |
| [out] | definition | Store the definition expression here |
Definition at line 225 of file parse.cpp.
References get_expr(), get_namelist(), get_token(), and identifier.
Referenced by get_statement().
00226 { 00227 // Define a variable. 00228 kind k = get_token(name); 00229 if (k != identifier) 00230 throw syntax_error("expected IDENTIFIER, got " + name); 00231 00232 std::string token; 00233 k = get_token(token); 00234 if (k == '(') { 00235 get_namelist(std::back_inserter(parameters)); 00236 k = get_token(token); 00237 } 00238 00239 if (k != '=') 00240 throw syntax_error("expected = in definition, got " + token); 00241 00242 if (not get_expr(definition)) 00243 throw syntax_error("expected exprssion in assignment"); 00244 }
| parser::kind parser::get_token | ( | std::string & | token | ) | [private] |
Parse a token. A token can be a keyword, a literal or a symbol.
TOKEN ::= IDENTIFIER | NUMBER | SYMBOL | STRING
IDENTIIFER ::= ALPHA (ALPHA | DIGIT)*
NUMBER ::= DIGIT+ ('.' DIGITS+)? ('E' SIGN? DIGITS+)?
SYMBOL ::= '+' | '-' | '*' | '/' | '%' | '(' | ')' | '=' | ','
| token | Store the text of the token here. |
Definition at line 130 of file parse.cpp.
References charify(), eof, get_identifier(), get_string(), identifier, input_, isalpha(), kind_, number, string, and token_.
Referenced by get_add_expr(), get_definition(), get_expr_list(), get_mul_expr(), get_namelist(), get_primary(), get_statement(), and get_unary().
00131 { 00132 if (not token_.empty()) 00133 { 00134 kind result(kind_); 00135 token = token_; 00136 00137 token_.clear(); 00138 kind_ = eof; 00139 00140 return result; 00141 } 00142 00143 char c; 00144 if (not (input_ >> c)) { 00145 token = "end of line"; 00146 return eof; 00147 } 00148 if (isalpha(c)) { 00149 input_.unget(); 00150 get_identifier(token); 00151 return identifier; 00152 } 00153 00154 if (c == '\'' or c == '"') { 00155 // Quoted string 00156 token.clear(); 00157 get_string(token, c); 00158 return string; 00159 } 00160 00161 // Get a numeric literal. 00162 token.clear(); 00163 if (c == '+' or c == '-' or c == '*' or c == '/' or c == '%' or c == '(' or c == ')' or c == '=' or c == ',') { 00164 token += c; 00165 return kind(c); 00166 } 00167 00168 if (c < '0' or c > '9') { 00169 input_.unget(); 00170 throw syntax_error("expected digit, got " + charify(c)); 00171 } 00172 while (c >= '0' and c <= '9') { 00173 token += c; 00174 if (not input_.get(c)) 00175 return number; 00176 } 00177 if (c == '.') { 00178 token += c; 00179 if (not input_.get(c)) 00180 throw syntax_error("unterminated number: expected digit after the decimal point"); 00181 if (c < '0' or c > '9') { 00182 input_.unget(); 00183 throw syntax_error("expected digit after decimal point, got " + charify(c)); 00184 } 00185 while (c >= '0' and c <= '9') { 00186 token += c; 00187 if (not input_.get(c)) 00188 return number; 00189 } 00190 } 00191 if (c == 'e' or c == 'E') { 00192 token += c; 00193 if (not input_.get(c)) 00194 throw syntax_error("unterminated number: expected digit in the exponent"); 00195 if (c == '-' or c == '+') { 00196 token += c; 00197 if (not input_.get(c)) 00198 throw syntax_error("unterminated number: expected digit after sign in the exponent"); 00199 } 00200 if (c < '0' or c > '9') { 00201 input_.unget(); 00202 throw syntax_error("expected digit in the exponent, got " + charify(c)); 00203 } 00204 while (c >= '0' and c <= '9') { 00205 token += c; 00206 if (not input_.get(c)) 00207 return number; 00208 } 00209 } 00210 input_.unget(); 00211 return number; 00212 }
| void parser::get_identifier | ( | std::string & | identifier | ) | [private] |
Parse an identifer.
| identifier | Store the identifier here. |
Definition at line 102 of file parse.cpp.
References charify(), input_, isalnum(), and isalpha().
Referenced by get_token().
00103 { 00104 identifier.clear(); 00105 char c; 00106 if (not input_.get(c)) 00107 return; 00108 if (not isalpha(c)) 00109 throw syntax_error("expected alphabetic, got " + charify(c)); 00110 identifier += c; 00111 while (input_.get(c)) { 00112 if (not isalnum(c)) { 00113 input_.unget(); 00114 return; 00115 } 00116 identifier += c; 00117 } 00118 return; 00119 }
| void parser::get_expr_list | ( | node_list & | result | ) | [private] |
Parse a comma-separated expression list.
| [out] | result | Store the result here |
Definition at line 355 of file parse.cpp.
References get_expr(), get_token(), and push_back().
Referenced by get_primary().
00356 { 00357 result.clear(); 00358 std::string token; 00359 while (kind k = get_token(token)) { 00360 if (k == ')') 00361 return; 00362 push_back(token, k); 00363 node expr; 00364 if (not get_expr(expr)) 00365 throw syntax_error("unexpected end of line in function argument"); 00366 result.push_back(expr); 00367 k = get_token(token); 00368 if (k == ')') 00369 return; 00370 else if (k != ',') 00371 throw syntax_error("expected comma in argument list, got " + token); 00372 } 00373 throw syntax_error("unexpected end of line in function argument list"); 00374 }
| OutputIterator parser::get_namelist | ( | OutputIterator | output | ) | [inline, private] |
Parse a list of parameter names. Names are identifiers, separated by commas. The list can be empty. This is a template so the container type is unimportant. Any output iterator will do.
| [out] | output | Store the identifiers here |
output after storing all the identifiers Definition at line 207 of file parse.hpp.
References get_token(), and identifier.
Referenced by get_definition().
00208 { 00209 std::string token; 00210 while (kind k = get_token(token)) { 00211 if (k == ')') 00212 return output; 00213 else if (k != identifier) 00214 throw syntax_error("expected function parameter, got " + token); 00215 else { 00216 *output = token; 00217 ++output; 00218 00219 k = get_token(token); 00220 if (k == ')') 00221 return output; 00222 if (k != ',') 00223 throw syntax_error("expected comma in function paramter list, got " + token); 00224 } 00225 } 00226 throw syntax_error("unexpected end of line in function parameter list"); 00227 }
| void parser::get_escape | ( | std::string & | str | ) | [private] |
Interpret a backslash escape sequence. The caller must have read the backslash already.
| [out] | str | Write the string equivalent of the escape sequence at the end of this string |
Definition at line 41 of file parse.cpp.
References ctype_, and input_.
Referenced by get_string().
00042 { 00043 char c; 00044 if (not input_.get(c)) 00045 throw syntax_error("incomplete escape"); 00046 if (c == '\n') 00047 return; 00048 00049 if (c == 'a') 00050 str += '\a'; 00051 else if (c == 'b') 00052 str += '\b'; 00053 else if (c == 'n') 00054 str += '\n'; 00055 else if (c == 'f') 00056 str += '\f'; 00057 else if (c == 'r') 00058 str += '\r'; 00059 else if (c == 't') 00060 str += '\t'; 00061 else if (c == 'v') 00062 str += '\v'; 00063 else if (c == 'x') { 00064 // hexadecimal sequence 00065 std::string digits; 00066 while(input_.get(c) and ctype_.is(ctype_.xdigit, c)) 00067 digits += c; 00068 if (input_) 00069 input_.unget(); 00070 unsigned long value(std::strtoul(digits.c_str(), 0, 16)); 00071 str += static_cast<char>(static_cast<unsigned char>(value)); 00072 } else if (c >= '0' and c <= '7') { 00073 // octal sequence 00074 std::string digits; 00075 for (int i = 0; i < 3 and input_.get(c) and c >= '0' and c <= '7'; ++i) 00076 digits += c; 00077 if (input_ or c < '0' or c > '7') 00078 input_.unget(); 00079 unsigned long value(std::strtoul(digits.c_str(), 0, 8)); 00080 str += static_cast<char>(static_cast<unsigned char>(value)); 00081 } else { 00082 str += c; 00083 } 00084 }
| void parser::get_string | ( | std::string & | result, | |
| char | delimiter | |||
| ) | [private] |
Parse a quoted string. The caller passes the quote character in the delimiter argument.
| [out] | result | Store the token here. |
| [in] | delimiter | The quote character (' or ") |
Definition at line 86 of file parse.cpp.
References get_escape(), and input_.
Referenced by get_token().
00087 { 00088 char c; 00089 while (input_.get(c)) { 00090 if (c == delimiter) 00091 return; 00092 else if (c == '\\') 00093 get_escape(result); 00094 else if (c == '\n') 00095 throw syntax_error("unterminated string"); 00096 else 00097 result += c; 00098 } 00099 throw syntax_error("unterminated string"); 00100 }
| void parser::push_back | ( | std::string const & | token, | |
| kind | k | |||
| ) | [private] |
Push back a token. The next call to get_token() will return the pushed-back token.
| token | The token to push back. | |
| k | The kind of token being pushed back |
Definition at line 121 of file parse.cpp.
References eof, kind_, and token_.
Referenced by get_add_expr(), get_expr_list(), get_mul_expr(), get_primary(), get_statement(), and get_unary().
00122 { 00123 kind_ = k; 00124 if (kind_ == eof) 00125 token_ = "end of line"; 00126 else 00127 token_ = token; 00128 }
| bool parser::isalpha | ( | char | c | ) | const [inline, private] |
Return true if c is alphabetic. Use the locale of the input stream.
| c | The character to test. |
Definition at line 176 of file parse.hpp.
References ctype_.
Referenced by get_identifier(), and get_token().
| bool parser::isalnum | ( | char | c | ) | const [inline, private] |
| bool parser::isdigit | ( | char | c | ) | const [inline, private] |
| bool parser::isprint | ( | char | c | ) | const [inline, private] |
std::istream& parser::input_ [private] |
Share the input stream.
Definition at line 193 of file parse.hpp.
Referenced by get_escape(), get_identifier(), get_string(), and get_token().
std::ctype<char> const& parser::ctype_ [private] |
std::string parser::token_ [private] |
One token push-back.
Definition at line 195 of file parse.hpp.
Referenced by get_token(), and push_back().
kind parser::kind_ [private] |
The kind of token that was pushed back.
Definition at line 196 of file parse.hpp.
Referenced by get_token(), and push_back().
1.5.3