#include <parse.hpp>
Public Types | |
enum | kind { eof, identifier, number, plus = '+', minus = '-', times = '*', slash = '/', lparen = '(', rparen = ')', equal = '=', comma = ',' } |
Public Member Functions | |
parser (std::istream &input) | |
bool | get_statement (std::ostream &output) |
Private Member Functions | |
std::string | charify (char c) |
bool | get_number (std::string const &token, node &result) |
bool | get_expr (node &result) |
bool | get_add_expr (node &result) |
bool | get_mul_expr (node &result) |
bool | get_primary (node &result) |
bool | get_unary (node &result) |
void | get_definition (std::string &name, identifier_list ¶meters, node &definition) |
kind | get_token (std::string &token) |
void | get_identifier (std::string &identifier) |
void | get_expr_list (node_list &result) |
template<class OutputIterator> | |
OutputIterator | get_namelist (OutputIterator output) |
void | push_back (std::string const &token, kind k) |
bool | isalpha (char c) const |
bool | isalnum (char c) const |
bool | isdigit (char c) const |
bool | isprint (char c) const |
Private Attributes | |
std::istream & | input_ |
Share the input stream. | |
std::ctype< char > const & | ctype_ |
Cache the ctype facet for checking character categories. | |
std::string | token_ |
One token push-back. | |
kind | kind_ |
The kind of token that was pushed back. |
Because the recursive-descent parser can examine too many tokens from the input stream, it keeps a push-back token. Once the parser knows it has gone too far, it pushes back the most recently read token. The next call to get_token() retrieves the pushed-back token.
Only one push-back is available, which limits the complexity of the syntax.
Definition at line 25 of file parse.hpp.
enum parser::kind |
Token kind. Declare a name for each single-character token, to ensure the enumerated type can represent any operator or punctuator character.
Definition at line 31 of file parse.hpp.
00031 { eof, identifier, number, 00032 plus='+', minus='-', times='*', slash='/', lparen = '(', rparen=')', equal='=', 00033 comma=','};
parser::parser | ( | std::istream & | input | ) |
bool parser::get_statement | ( | std::ostream & | output | ) |
Read one statement and store the parse tree in result
. If the statement is an assignment or function definition, store the variable or function. If the statement is an expression, print the result to output
.
STATEMENT ::= DEFINITION | QUIT | EXPR
output | The output stream. |
parse_error | for various syntax and other errors |
Definition at line 178 of file parse.cpp.
References eof, node::evaluate(), get_definition(), get_expr(), get_token(), identifier, push_back(), and set_function().
Referenced by parse_loop().
00179 { 00180 std::string token; 00181 kind k(get_token(token)); 00182 if (k == eof) 00183 return false; 00184 00185 if (k == identifier and token == "def") { 00186 node definition; 00187 identifier_list parameters; 00188 get_definition(token, parameters, definition); 00189 set_function(token, node(parameters, definition)); 00190 return true; 00191 } 00192 00193 if (k == identifier and token == "quit") 00194 std::exit(0); 00195 00196 // Otherwise, the statement must be an expression. 00197 push_back(token, k); 00198 node n; 00199 if (not get_expr(n)) 00200 return false; 00201 else { 00202 // Evaluate the expression and print the result. 00203 output << n.evaluate() << '\n'; 00204 return true; 00205 } 00206 }
std::string parser::charify | ( | char | c | ) | [private] |
Convert a characer to a readable form.
c | The character |
c
is readable. Definition at line 17 of file parse.cpp.
References isprint().
Referenced by get_identifier(), and get_token().
00018 { 00019 if (c == '\a') return "\'\\a\'"; 00020 if (c == '\b') return "\'\\b\'"; 00021 if (c == '\f') return "\'\\f\'"; 00022 if (c == '\n') return "\'\\n\'"; 00023 if (c == '\r') return "\'\\r\'"; 00024 if (c == '\t') return "\'\\t\'"; 00025 if (c == '\v') return "\'\\v\'"; 00026 if (c == '\'') return "\'\\'\'"; 00027 if (c == '\\') return "\'\\\\\'"; 00028 00029 if (isprint(c)) 00030 return std::string("\'") + c + '\''; 00031 else { 00032 std::ostringstream stream; 00033 stream << "'\\x" << std::hex; 00034 stream.fill('0'); 00035 stream.width(2); 00036 stream << (std::char_traits<char>::to_int_type(c) & 0xFF) << '\''; 00037 return stream.str(); 00038 } 00039 }
bool parser::get_number | ( | std::string const & | token, | |
node & | result | |||
) | [private] |
Parse a floating number.
token | The token to parse | |
result | Store the number here |
token
is a valid number or false for an error Definition at line 146 of file parse.cpp.
Referenced by get_primary().
00147 { 00148 std::istringstream stream(token); 00149 // If the value overflows or is otherwise invalid, return false. 00150 double value; 00151 if (not (stream >> value)) 00152 return false; 00153 result = node(value); 00154 return true; 00155 }
bool parser::get_expr | ( | node & | result | ) | [private] |
Parse an expression
result | Store the result here |
Definition at line 208 of file parse.cpp.
References get_add_expr().
Referenced by get_definition(), get_expr_list(), get_primary(), and get_statement().
00209 { 00210 return get_add_expr(result); 00211 }
bool parser::get_add_expr | ( | node & | result | ) | [private] |
Parse an addition expression
ADD_EXPR ::= MUL_EXPR | ADD_EXPR + MUL_EXPR | ADD_EXPR - MUL_EXPR
result | Store the result here |
Definition at line 213 of file parse.cpp.
References get_mul_expr(), get_token(), and push_back().
Referenced by get_expr().
00214 { 00215 if (not get_mul_expr(result)) 00216 return false; 00217 std::string token; 00218 while (kind k = get_token(token)) { 00219 if (k != '+' and k != '-') { 00220 push_back(token, k); 00221 return true; 00222 } else { 00223 node right; 00224 if (not get_mul_expr(right)) 00225 throw syntax_error("unterminated expression. Expected a multiplicative-expression after " + token); 00226 result = node(result, k, right); 00227 } 00228 } 00229 return true; 00230 }
bool parser::get_mul_expr | ( | node & | result | ) | [private] |
Parse a multiplicative expression.
MUL_EXPR ::= UNARY | MUL_EXPR + UNARY | MUL_EXPR - UNARY
result | Store the result here |
Definition at line 232 of file parse.cpp.
References get_token(), get_unary(), and push_back().
Referenced by get_add_expr().
00233 { 00234 if (not get_unary(result)) 00235 return false; 00236 std::string token; 00237 while (kind k = get_token(token)) { 00238 if (k != '*' and k != '/') { 00239 push_back(token, k); 00240 return true; 00241 } else { 00242 node right; 00243 if (not get_unary(right)) 00244 throw syntax_error("unterminated expression. Expected a unary-expression after " + token); 00245 result = node(result, k, right); 00246 } 00247 } 00248 return true; 00249 }
bool parser::get_primary | ( | node & | result | ) | [private] |
Parse a primary expression.
PRIMARY ::= NUMBER | IDENTIFIER | '(' EXPR ')' | FUNCTION_CALL FUNCTION_CALL ::= IDENTIFIER '(' OPT_EXPR_LIST ')' OPT_EXPR_LIST ::= empty | EXPR_LIST EXPR_LIST ::= EXPR | EXPR_LIST ',' EXPR
result | Store the result here |
Definition at line 293 of file parse.cpp.
References eof, get_expr(), get_expr_list(), get_number(), get_token(), identifier, number, and push_back().
Referenced by get_unary().
00294 { 00295 std::string token; 00296 kind k = get_token(token); 00297 if (k == eof) 00298 return false; 00299 00300 if (k == '(') { 00301 // Parenthesized expression 00302 if (not get_expr(result)) 00303 throw syntax_error("expected expression, got end of line"); 00304 k = get_token(token); 00305 if (k != ')') 00306 throw syntax_error("expected ')', got " + token); 00307 else 00308 return true; 00309 } 00310 00311 if (k == number) { 00312 // Numeric literal 00313 if (not get_number(token, result)) 00314 throw syntax_error("Invalid numeric literal: " + token); 00315 return true; 00316 } 00317 00318 if (k == identifier) { 00319 // Identifier: variable or function call 00320 std::string next; 00321 k = get_token(next); 00322 if (k == '(') { 00323 // function call 00324 node_list arguments; 00325 get_expr_list(arguments); 00326 result = node(token, arguments); 00327 } else { 00328 static const node_list no_arguments; 00329 // Variable reference or function call with no arguments 00330 push_back(next, k); 00331 result = node(token); 00332 } 00333 return true; 00334 } 00335 throw syntax_error("expected a primary, got " + token); 00336 }
bool parser::get_unary | ( | node & | result | ) | [private] |
Parse a unary expression.
UNARY ::= '-' PRIMARY | '+' PRIMARY | PRIMARY
result | Store the result here |
Definition at line 251 of file parse.cpp.
References eof, get_primary(), get_token(), and push_back().
Referenced by get_mul_expr().
00252 { 00253 std::string token; 00254 kind k = get_token(token); 00255 if (k == eof) 00256 return false; 00257 if (k == '-') { 00258 if (not get_primary(result)) 00259 throw syntax_error("expected primary after unary " + token + ", got end of line"); 00260 result = node(k, result); 00261 return true; 00262 } else if (k == '+') { 00263 if (not get_primary(result)) 00264 throw syntax_error("expected primary after unary +, got end of line"); 00265 return true; 00266 } else { 00267 push_back(token, k); 00268 return get_primary(result); 00269 } 00270 }
void parser::get_definition | ( | std::string & | name, | |
identifier_list & | parameters, | |||
node & | definition | |||
) | [private] |
Parse a function or variable definition A variable is just like a function that takes no parameters.
DEFINITION ::= DEF IDENTIFIER OPT_PARAMETERS '=' EXPR OPT_PARAMETERS ::= emtpy | '(' OPT_IDENTIFIER_LIST ')' OPT_IDENTIFIER_LIST ::= empty | IDENTIFIER_LIST IDENTIFIER_LIST ::= IDENTIFIER | IDENTIFIER_LIST ',' IDENTIFIER
[out] | name | Store the variable or function name here |
[out] | parameters | Store the list of parameter names here |
[out] | definition | Store the definition expression here |
Definition at line 157 of file parse.cpp.
References get_expr(), get_namelist(), get_token(), and identifier.
Referenced by get_statement().
00158 { 00159 // Define a variable. 00160 kind k = get_token(name); 00161 if (k != identifier) 00162 throw syntax_error("expected IDENTIFIER, got " + name); 00163 00164 std::string token; 00165 k = get_token(token); 00166 if (k == '(') { 00167 get_namelist(std::back_inserter(parameters)); 00168 k = get_token(token); 00169 } 00170 00171 if (k != '=') 00172 throw syntax_error("expected = in definition, got " + token); 00173 00174 if (not get_expr(definition)) 00175 throw syntax_error("expected exprssion in assignment"); 00176 }
parser::kind parser::get_token | ( | std::string & | token | ) | [private] |
Parse a token. A token can be a keyword, a literal or a symbol.
TOKEN ::= IDENTIFIER | NUMBER | SYMBOL IDENTIIFER ::= ALPHA (ALPHA | DIGIT)* NUMBER ::= DIGIT+ ('.' DIGITS+)? ('E' SIGN? DIGITS+)? SYMBOL ::= '+' | '-' | '*' | '/' | '%' | '(' | ')' | '=' | ','
token | Store the text of the token here. |
Definition at line 69 of file parse.cpp.
References charify(), eof, get_identifier(), identifier, input_, isalpha(), kind_, number, and token_.
Referenced by get_add_expr(), get_definition(), get_expr_list(), get_mul_expr(), get_namelist(), get_primary(), get_statement(), and get_unary().
00070 { 00071 if (not token_.empty()) 00072 { 00073 kind result(kind_); 00074 token = token_; 00075 00076 token_.clear(); 00077 kind_ = eof; 00078 00079 return result; 00080 } 00081 00082 char c; 00083 if (not (input_ >> c)) { 00084 token = "end of line"; 00085 return eof; 00086 } 00087 if (isalpha(c)) { 00088 input_.unget(); 00089 get_identifier(token); 00090 return identifier; 00091 } 00092 00093 // Get a numeric literal. 00094 token.clear(); 00095 if (c == '+' or c == '-' or c == '*' or c == '/' or c == '%' or c == '(' or c == ')' or c == '=' or c == ',') { 00096 token += c; 00097 return kind(c); 00098 } 00099 00100 if (c < '0' or c > '9') { 00101 input_.unget(); 00102 throw syntax_error("expected digit, got " + charify(c)); 00103 } 00104 while (c >= '0' and c <= '9') { 00105 token += c; 00106 if (not input_.get(c)) 00107 return number; 00108 } 00109 if (c == '.') { 00110 token += c; 00111 if (not input_.get(c)) 00112 throw syntax_error("unterminated number: expected digit after the decimal point"); 00113 if (c < '0' or c > '9') { 00114 input_.unget(); 00115 throw syntax_error("expected digit after decimal point, got " + charify(c)); 00116 } 00117 while (c >= '0' and c <= '9') { 00118 token += c; 00119 if (not input_.get(c)) 00120 return number; 00121 } 00122 } 00123 if (c == 'e' or c == 'E') { 00124 token += c; 00125 if (not input_.get(c)) 00126 throw syntax_error("unterminated number: expected digit in the exponent"); 00127 if (c == '-' or c == '+') { 00128 token += c; 00129 if (not input_.get(c)) 00130 throw syntax_error("unterminated number: expected digit after sign in the exponent"); 00131 } 00132 if (c < '0' or c > '9') { 00133 input_.unget(); 00134 throw syntax_error("expected digit in the exponent, got " + charify(c)); 00135 } 00136 while (c >= '0' and c <= '9') { 00137 token += c; 00138 if (not input_.get(c)) 00139 return number; 00140 } 00141 } 00142 input_.unget(); 00143 return number; 00144 }
void parser::get_identifier | ( | std::string & | identifier | ) | [private] |
Parse an identifer.
identifier | Store the identifier here. |
Definition at line 41 of file parse.cpp.
References charify(), input_, isalnum(), and isalpha().
Referenced by get_token().
00042 { 00043 identifier.clear(); 00044 char c; 00045 if (not input_.get(c)) 00046 return; 00047 if (not isalpha(c)) 00048 throw syntax_error("expected alphabetic, got " + charify(c)); 00049 identifier += c; 00050 while (input_.get(c)) { 00051 if (not isalnum(c)) { 00052 input_.unget(); 00053 return; 00054 } 00055 identifier += c; 00056 } 00057 return; 00058 }
void parser::get_expr_list | ( | node_list & | result | ) | [private] |
Parse a comma-separated expression list.
[out] | result | Store the result here |
Definition at line 272 of file parse.cpp.
References get_expr(), get_token(), and push_back().
Referenced by get_primary().
00273 { 00274 result.clear(); 00275 std::string token; 00276 while (kind k = get_token(token)) { 00277 if (k == ')') 00278 return; 00279 push_back(token, k); 00280 node expr; 00281 if (not get_expr(expr)) 00282 throw syntax_error("unexpected end of line in function argument"); 00283 result.push_back(expr); 00284 k = get_token(token); 00285 if (k == ')') 00286 return; 00287 else if (k != ',') 00288 throw syntax_error("expected comma in argument list, got " + token); 00289 } 00290 throw syntax_error("unexpected end of line in function argument list"); 00291 }
OutputIterator parser::get_namelist | ( | OutputIterator | output | ) | [inline, private] |
Parse a list of parameter names. Names are identifiers, separated by commas. The list can be empty. This is a template so the container type is unimportant. Any output iterator will do.
[out] | output | Store the identifiers here |
output
after storing all the identifiers Definition at line 193 of file parse.hpp.
References get_token(), and identifier.
Referenced by get_definition().
00194 { 00195 std::string token; 00196 while (kind k = get_token(token)) { 00197 if (k == ')') 00198 return output; 00199 else if (k != identifier) 00200 throw syntax_error("expected function parameter, got " + token); 00201 else { 00202 *output = token; 00203 ++output; 00204 00205 k = get_token(token); 00206 if (k == ')') 00207 return output; 00208 if (k != ',') 00209 throw syntax_error("expected comma in function paramter list, got " + token); 00210 } 00211 } 00212 throw syntax_error("unexpected end of line in function parameter list"); 00213 }
void parser::push_back | ( | std::string const & | token, | |
kind | k | |||
) | [private] |
Push back a token. The next call to get_token() will return the pushed-back token.
token | The token to push back. | |
k | The kind of token being pushed back |
Definition at line 60 of file parse.cpp.
References eof, kind_, and token_.
Referenced by get_add_expr(), get_expr_list(), get_mul_expr(), get_primary(), get_statement(), and get_unary().
00061 { 00062 kind_ = k; 00063 if (kind_ == eof) 00064 token_ = "end of line"; 00065 else 00066 token_ = token; 00067 }
bool parser::isalpha | ( | char | c | ) | const [inline, private] |
Return true if c
is alphabetic. Use the locale of the input stream.
c | The character to test. |
Definition at line 162 of file parse.hpp.
References ctype_.
Referenced by get_identifier(), and get_token().
bool parser::isalnum | ( | char | c | ) | const [inline, private] |
bool parser::isdigit | ( | char | c | ) | const [inline, private] |
bool parser::isprint | ( | char | c | ) | const [inline, private] |
std::istream& parser::input_ [private] |
Share the input stream.
Definition at line 179 of file parse.hpp.
Referenced by get_identifier(), and get_token().
std::ctype<char> const& parser::ctype_ [private] |
std::string parser::token_ [private] |
One token push-back.
Definition at line 181 of file parse.hpp.
Referenced by get_token(), and push_back().
kind parser::kind_ [private] |
The kind of token that was pushed back.
Definition at line 182 of file parse.hpp.
Referenced by get_token(), and push_back().