#include <parse.hpp>
Public Types | |
| enum | kind { eof, identifier, number, plus = '+', minus = '-', times = '*', slash = '/', lparen = '(', rparen = ')', equal = '=', comma = ',' } |
Public Member Functions | |
| parser (std::istream &input) | |
| bool | get_statement (std::ostream &output) |
Private Member Functions | |
| std::string | charify (char c) |
| bool | get_number (std::string const &token, node &result) |
| bool | get_expr (node &result) |
| bool | get_add_expr (node &result) |
| bool | get_mul_expr (node &result) |
| bool | get_primary (node &result) |
| bool | get_unary (node &result) |
| void | get_definition (std::string &name, identifier_list ¶meters, node &definition) |
| kind | get_token (std::string &token) |
| void | get_identifier (std::string &identifier) |
| void | get_expr_list (node_list &result) |
| template<class OutputIterator> | |
| OutputIterator | get_namelist (OutputIterator output) |
| void | push_back (std::string const &token, kind k) |
| bool | isalpha (char c) const |
| bool | isalnum (char c) const |
| bool | isdigit (char c) const |
| bool | isprint (char c) const |
Private Attributes | |
| std::istream & | input_ |
| Share the input stream. | |
| std::ctype< char > const & | ctype_ |
| Cache the ctype facet for checking character categories. | |
| std::string | token_ |
| One token push-back. | |
| kind | kind_ |
| The kind of token that was pushed back. | |
Because the recursive-descent parser can examine too many tokens from the input stream, it keeps a push-back token. Once the parser knows it has gone too far, it pushes back the most recently read token. The next call to get_token() retrieves the pushed-back token.
Only one push-back is available, which limits the complexity of the syntax.
Definition at line 25 of file parse.hpp.
| enum parser::kind |
Token kind. Declare a name for each single-character token, to ensure the enumerated type can represent any operator or punctuator character.
Definition at line 31 of file parse.hpp.
00031 { eof, identifier, number, 00032 plus='+', minus='-', times='*', slash='/', lparen = '(', rparen=')', equal='=', 00033 comma=','};
| parser::parser | ( | std::istream & | input | ) |
| bool parser::get_statement | ( | std::ostream & | output | ) |
Read one statement and store the parse tree in result. If the statement is an assignment or function definition, store the variable or function. If the statement is an expression, print the result to output.
STATEMENT ::= DEFINITION | QUIT | EXPR
| output | The output stream. |
| parse_error | for various syntax and other errors |
Definition at line 178 of file parse.cpp.
References eof, node::evaluate(), get_definition(), get_expr(), get_token(), identifier, push_back(), and set_function().
Referenced by parse_loop().
00179 { 00180 std::string token; 00181 kind k(get_token(token)); 00182 if (k == eof) 00183 return false; 00184 00185 if (k == identifier and token == "def") { 00186 node definition; 00187 identifier_list parameters; 00188 get_definition(token, parameters, definition); 00189 set_function(token, node(parameters, definition)); 00190 return true; 00191 } 00192 00193 if (k == identifier and token == "quit") 00194 std::exit(0); 00195 00196 // Otherwise, the statement must be an expression. 00197 push_back(token, k); 00198 node n; 00199 if (not get_expr(n)) 00200 return false; 00201 else { 00202 // Evaluate the expression and print the result. 00203 output << n.evaluate() << '\n'; 00204 return true; 00205 } 00206 }
| std::string parser::charify | ( | char | c | ) | [private] |
Convert a characer to a readable form.
| c | The character |
c is readable. Definition at line 17 of file parse.cpp.
References isprint().
Referenced by get_identifier(), and get_token().
00018 { 00019 if (c == '\a') return "\'\\a\'"; 00020 if (c == '\b') return "\'\\b\'"; 00021 if (c == '\f') return "\'\\f\'"; 00022 if (c == '\n') return "\'\\n\'"; 00023 if (c == '\r') return "\'\\r\'"; 00024 if (c == '\t') return "\'\\t\'"; 00025 if (c == '\v') return "\'\\v\'"; 00026 if (c == '\'') return "\'\\'\'"; 00027 if (c == '\\') return "\'\\\\\'"; 00028 00029 if (isprint(c)) 00030 return std::string("\'") + c + '\''; 00031 else { 00032 std::ostringstream stream; 00033 stream << "'\\x" << std::hex; 00034 stream.fill('0'); 00035 stream.width(2); 00036 stream << (std::char_traits<char>::to_int_type(c) & 0xFF) << '\''; 00037 return stream.str(); 00038 } 00039 }
| bool parser::get_number | ( | std::string const & | token, | |
| node & | result | |||
| ) | [private] |
Parse a floating number.
| token | The token to parse | |
| result | Store the number here |
token is a valid number or false for an error Definition at line 146 of file parse.cpp.
Referenced by get_primary().
00147 { 00148 std::istringstream stream(token); 00149 // If the value overflows or is otherwise invalid, return false. 00150 double value; 00151 if (not (stream >> value)) 00152 return false; 00153 result = node(value); 00154 return true; 00155 }
| bool parser::get_expr | ( | node & | result | ) | [private] |
Parse an expression
| result | Store the result here |
Definition at line 208 of file parse.cpp.
References get_add_expr().
Referenced by get_definition(), get_expr_list(), get_primary(), and get_statement().
00209 { 00210 return get_add_expr(result); 00211 }
| bool parser::get_add_expr | ( | node & | result | ) | [private] |
Parse an addition expression
ADD_EXPR ::= MUL_EXPR | ADD_EXPR + MUL_EXPR | ADD_EXPR - MUL_EXPR
| result | Store the result here |
Definition at line 213 of file parse.cpp.
References get_mul_expr(), get_token(), and push_back().
Referenced by get_expr().
00214 { 00215 if (not get_mul_expr(result)) 00216 return false; 00217 std::string token; 00218 while (kind k = get_token(token)) { 00219 if (k != '+' and k != '-') { 00220 push_back(token, k); 00221 return true; 00222 } else { 00223 node right; 00224 if (not get_mul_expr(right)) 00225 throw syntax_error("unterminated expression. Expected a multiplicative-expression after " + token); 00226 result = node(result, k, right); 00227 } 00228 } 00229 return true; 00230 }
| bool parser::get_mul_expr | ( | node & | result | ) | [private] |
Parse a multiplicative expression.
MUL_EXPR ::= UNARY | MUL_EXPR + UNARY | MUL_EXPR - UNARY
| result | Store the result here |
Definition at line 232 of file parse.cpp.
References get_token(), get_unary(), and push_back().
Referenced by get_add_expr().
00233 { 00234 if (not get_unary(result)) 00235 return false; 00236 std::string token; 00237 while (kind k = get_token(token)) { 00238 if (k != '*' and k != '/') { 00239 push_back(token, k); 00240 return true; 00241 } else { 00242 node right; 00243 if (not get_unary(right)) 00244 throw syntax_error("unterminated expression. Expected a unary-expression after " + token); 00245 result = node(result, k, right); 00246 } 00247 } 00248 return true; 00249 }
| bool parser::get_primary | ( | node & | result | ) | [private] |
Parse a primary expression.
PRIMARY ::= NUMBER | IDENTIFIER | '(' EXPR ')' | FUNCTION_CALL
FUNCTION_CALL ::= IDENTIFIER '(' OPT_EXPR_LIST ')'
OPT_EXPR_LIST ::= empty | EXPR_LIST
EXPR_LIST ::= EXPR | EXPR_LIST ',' EXPR
| result | Store the result here |
Definition at line 293 of file parse.cpp.
References eof, get_expr(), get_expr_list(), get_number(), get_token(), identifier, number, and push_back().
Referenced by get_unary().
00294 { 00295 std::string token; 00296 kind k = get_token(token); 00297 if (k == eof) 00298 return false; 00299 00300 if (k == '(') { 00301 // Parenthesized expression 00302 if (not get_expr(result)) 00303 throw syntax_error("expected expression, got end of line"); 00304 k = get_token(token); 00305 if (k != ')') 00306 throw syntax_error("expected ')', got " + token); 00307 else 00308 return true; 00309 } 00310 00311 if (k == number) { 00312 // Numeric literal 00313 if (not get_number(token, result)) 00314 throw syntax_error("Invalid numeric literal: " + token); 00315 return true; 00316 } 00317 00318 if (k == identifier) { 00319 // Identifier: variable or function call 00320 std::string next; 00321 k = get_token(next); 00322 if (k == '(') { 00323 // function call 00324 node_list arguments; 00325 get_expr_list(arguments); 00326 result = node(token, arguments); 00327 } else { 00328 static const node_list no_arguments; 00329 // Variable reference or function call with no arguments 00330 push_back(next, k); 00331 result = node(token); 00332 } 00333 return true; 00334 } 00335 throw syntax_error("expected a primary, got " + token); 00336 }
| bool parser::get_unary | ( | node & | result | ) | [private] |
Parse a unary expression.
UNARY ::= '-' PRIMARY | '+' PRIMARY | PRIMARY
| result | Store the result here |
Definition at line 251 of file parse.cpp.
References eof, get_primary(), get_token(), and push_back().
Referenced by get_mul_expr().
00252 { 00253 std::string token; 00254 kind k = get_token(token); 00255 if (k == eof) 00256 return false; 00257 if (k == '-') { 00258 if (not get_primary(result)) 00259 throw syntax_error("expected primary after unary " + token + ", got end of line"); 00260 result = node(k, result); 00261 return true; 00262 } else if (k == '+') { 00263 if (not get_primary(result)) 00264 throw syntax_error("expected primary after unary +, got end of line"); 00265 return true; 00266 } else { 00267 push_back(token, k); 00268 return get_primary(result); 00269 } 00270 }
| void parser::get_definition | ( | std::string & | name, | |
| identifier_list & | parameters, | |||
| node & | definition | |||
| ) | [private] |
Parse a function or variable definition A variable is just like a function that takes no parameters.
DEFINITION ::= DEF IDENTIFIER OPT_PARAMETERS '=' EXPR
OPT_PARAMETERS ::= emtpy | '(' OPT_IDENTIFIER_LIST ')'
OPT_IDENTIFIER_LIST ::= empty | IDENTIFIER_LIST
IDENTIFIER_LIST ::= IDENTIFIER | IDENTIFIER_LIST ',' IDENTIFIER
| [out] | name | Store the variable or function name here |
| [out] | parameters | Store the list of parameter names here |
| [out] | definition | Store the definition expression here |
Definition at line 157 of file parse.cpp.
References get_expr(), get_namelist(), get_token(), and identifier.
Referenced by get_statement().
00158 { 00159 // Define a variable. 00160 kind k = get_token(name); 00161 if (k != identifier) 00162 throw syntax_error("expected IDENTIFIER, got " + name); 00163 00164 std::string token; 00165 k = get_token(token); 00166 if (k == '(') { 00167 get_namelist(std::back_inserter(parameters)); 00168 k = get_token(token); 00169 } 00170 00171 if (k != '=') 00172 throw syntax_error("expected = in definition, got " + token); 00173 00174 if (not get_expr(definition)) 00175 throw syntax_error("expected exprssion in assignment"); 00176 }
| parser::kind parser::get_token | ( | std::string & | token | ) | [private] |
Parse a token. A token can be a keyword, a literal or a symbol.
TOKEN ::= IDENTIFIER | NUMBER | SYMBOL
IDENTIIFER ::= ALPHA (ALPHA | DIGIT)*
NUMBER ::= DIGIT+ ('.' DIGITS+)? ('E' SIGN? DIGITS+)?
SYMBOL ::= '+' | '-' | '*' | '/' | '%' | '(' | ')' | '=' | ','
| token | Store the text of the token here. |
Definition at line 69 of file parse.cpp.
References charify(), eof, get_identifier(), identifier, input_, isalpha(), kind_, number, and token_.
Referenced by get_add_expr(), get_definition(), get_expr_list(), get_mul_expr(), get_namelist(), get_primary(), get_statement(), and get_unary().
00070 { 00071 if (not token_.empty()) 00072 { 00073 kind result(kind_); 00074 token = token_; 00075 00076 token_.clear(); 00077 kind_ = eof; 00078 00079 return result; 00080 } 00081 00082 char c; 00083 if (not (input_ >> c)) { 00084 token = "end of line"; 00085 return eof; 00086 } 00087 if (isalpha(c)) { 00088 input_.unget(); 00089 get_identifier(token); 00090 return identifier; 00091 } 00092 00093 // Get a numeric literal. 00094 token.clear(); 00095 if (c == '+' or c == '-' or c == '*' or c == '/' or c == '%' or c == '(' or c == ')' or c == '=' or c == ',') { 00096 token += c; 00097 return kind(c); 00098 } 00099 00100 if (c < '0' or c > '9') { 00101 input_.unget(); 00102 throw syntax_error("expected digit, got " + charify(c)); 00103 } 00104 while (c >= '0' and c <= '9') { 00105 token += c; 00106 if (not input_.get(c)) 00107 return number; 00108 } 00109 if (c == '.') { 00110 token += c; 00111 if (not input_.get(c)) 00112 throw syntax_error("unterminated number: expected digit after the decimal point"); 00113 if (c < '0' or c > '9') { 00114 input_.unget(); 00115 throw syntax_error("expected digit after decimal point, got " + charify(c)); 00116 } 00117 while (c >= '0' and c <= '9') { 00118 token += c; 00119 if (not input_.get(c)) 00120 return number; 00121 } 00122 } 00123 if (c == 'e' or c == 'E') { 00124 token += c; 00125 if (not input_.get(c)) 00126 throw syntax_error("unterminated number: expected digit in the exponent"); 00127 if (c == '-' or c == '+') { 00128 token += c; 00129 if (not input_.get(c)) 00130 throw syntax_error("unterminated number: expected digit after sign in the exponent"); 00131 } 00132 if (c < '0' or c > '9') { 00133 input_.unget(); 00134 throw syntax_error("expected digit in the exponent, got " + charify(c)); 00135 } 00136 while (c >= '0' and c <= '9') { 00137 token += c; 00138 if (not input_.get(c)) 00139 return number; 00140 } 00141 } 00142 input_.unget(); 00143 return number; 00144 }
| void parser::get_identifier | ( | std::string & | identifier | ) | [private] |
Parse an identifer.
| identifier | Store the identifier here. |
Definition at line 41 of file parse.cpp.
References charify(), input_, isalnum(), and isalpha().
Referenced by get_token().
00042 { 00043 identifier.clear(); 00044 char c; 00045 if (not input_.get(c)) 00046 return; 00047 if (not isalpha(c)) 00048 throw syntax_error("expected alphabetic, got " + charify(c)); 00049 identifier += c; 00050 while (input_.get(c)) { 00051 if (not isalnum(c)) { 00052 input_.unget(); 00053 return; 00054 } 00055 identifier += c; 00056 } 00057 return; 00058 }
| void parser::get_expr_list | ( | node_list & | result | ) | [private] |
Parse a comma-separated expression list.
| [out] | result | Store the result here |
Definition at line 272 of file parse.cpp.
References get_expr(), get_token(), and push_back().
Referenced by get_primary().
00273 { 00274 result.clear(); 00275 std::string token; 00276 while (kind k = get_token(token)) { 00277 if (k == ')') 00278 return; 00279 push_back(token, k); 00280 node expr; 00281 if (not get_expr(expr)) 00282 throw syntax_error("unexpected end of line in function argument"); 00283 result.push_back(expr); 00284 k = get_token(token); 00285 if (k == ')') 00286 return; 00287 else if (k != ',') 00288 throw syntax_error("expected comma in argument list, got " + token); 00289 } 00290 throw syntax_error("unexpected end of line in function argument list"); 00291 }
| OutputIterator parser::get_namelist | ( | OutputIterator | output | ) | [inline, private] |
Parse a list of parameter names. Names are identifiers, separated by commas. The list can be empty. This is a template so the container type is unimportant. Any output iterator will do.
| [out] | output | Store the identifiers here |
output after storing all the identifiers Definition at line 193 of file parse.hpp.
References get_token(), and identifier.
Referenced by get_definition().
00194 { 00195 std::string token; 00196 while (kind k = get_token(token)) { 00197 if (k == ')') 00198 return output; 00199 else if (k != identifier) 00200 throw syntax_error("expected function parameter, got " + token); 00201 else { 00202 *output = token; 00203 ++output; 00204 00205 k = get_token(token); 00206 if (k == ')') 00207 return output; 00208 if (k != ',') 00209 throw syntax_error("expected comma in function paramter list, got " + token); 00210 } 00211 } 00212 throw syntax_error("unexpected end of line in function parameter list"); 00213 }
| void parser::push_back | ( | std::string const & | token, | |
| kind | k | |||
| ) | [private] |
Push back a token. The next call to get_token() will return the pushed-back token.
| token | The token to push back. | |
| k | The kind of token being pushed back |
Definition at line 60 of file parse.cpp.
References eof, kind_, and token_.
Referenced by get_add_expr(), get_expr_list(), get_mul_expr(), get_primary(), get_statement(), and get_unary().
00061 { 00062 kind_ = k; 00063 if (kind_ == eof) 00064 token_ = "end of line"; 00065 else 00066 token_ = token; 00067 }
| bool parser::isalpha | ( | char | c | ) | const [inline, private] |
Return true if c is alphabetic. Use the locale of the input stream.
| c | The character to test. |
Definition at line 162 of file parse.hpp.
References ctype_.
Referenced by get_identifier(), and get_token().
| bool parser::isalnum | ( | char | c | ) | const [inline, private] |
| bool parser::isdigit | ( | char | c | ) | const [inline, private] |
| bool parser::isprint | ( | char | c | ) | const [inline, private] |
std::istream& parser::input_ [private] |
Share the input stream.
Definition at line 179 of file parse.hpp.
Referenced by get_identifier(), and get_token().
std::ctype<char> const& parser::ctype_ [private] |
std::string parser::token_ [private] |
One token push-back.
Definition at line 181 of file parse.hpp.
Referenced by get_token(), and push_back().
kind parser::kind_ [private] |
The kind of token that was pushed back.
Definition at line 182 of file parse.hpp.
Referenced by get_token(), and push_back().
1.5.3