#include <parse.hpp>
Public Types | |
enum | kind { eof, identifier, number, string, plus = '+', minus = '-', times = '*', slash = '/', lparen = '(', rparen = ')', equal = '=', comma = ',' } |
Public Member Functions | |
parser (std::istream &input) | |
bool | get_statement (std::ostream &output) |
Private Member Functions | |
std::string | charify (char c) |
bool | get_number (std::string const &token, node &result) |
bool | get_expr (node &result) |
bool | get_add_expr (node &result) |
bool | get_mul_expr (node &result) |
bool | get_primary (node &result) |
bool | get_unary (node &result) |
void | get_definition (std::string &name, identifier_list ¶meters, node &definition) |
kind | get_token (std::string &token) |
void | get_identifier (std::string &identifier) |
void | get_expr_list (node_list &result) |
template<class OutputIterator> | |
OutputIterator | get_namelist (OutputIterator output) |
void | get_escape (std::string &str) |
void | get_string (std::string &result, char delimiter) |
void | push_back (std::string const &token, kind k) |
bool | isalpha (char c) const |
bool | isalnum (char c) const |
bool | isdigit (char c) const |
bool | isprint (char c) const |
Private Attributes | |
std::istream & | input_ |
Share the input stream. | |
std::ctype< char > const & | ctype_ |
Cache the ctype facet for checking character categories. | |
std::string | token_ |
One token push-back. | |
kind | kind_ |
The kind of token that was pushed back. |
Because the recursive-descent parser can examine too many tokens from the input stream, it keeps a push-back token. Once the parser knows it has gone too far, it pushes back the most recently read token. The next call to get_token() retrieves the pushed-back token.
Only one push-back is available, which limits the complexity of the syntax.
Definition at line 25 of file parse.hpp.
enum parser::kind |
Token kind. Declare a name for each single-character token, to ensure the enumerated type can represent any operator or punctuator character.
Definition at line 31 of file parse.hpp.
00031 { eof, identifier, number, string, 00032 plus='+', minus='-', times='*', slash='/', lparen = '(', rparen=')', equal='=', 00033 comma=','};
parser::parser | ( | std::istream & | input | ) |
bool parser::get_statement | ( | std::ostream & | output | ) |
Read one statement and store the parse tree in result
. If the statement is an assignment or function definition, store the variable or function. If the statement is an expression, print the result to output
.
STATEMENT ::= DEFINITION | QUIT | EXPR | SAVE | LOAD SAVE ::= "save" FILENAME LOAD ::= "load" FILENAME FILENAME ::= quoted-string
output | The output stream. |
parse_error | for various syntax and other errors |
Definition at line 246 of file parse.cpp.
References eof, node::evaluate(), get_definition(), get_expr(), get_token(), identifier, load_library(), push_back(), save_library(), and set_function().
Referenced by parse_loop().
00247 { 00248 std::string token; 00249 kind k(get_token(token)); 00250 if (k == eof) 00251 return false; 00252 00253 if (k == identifier and token == "def") { 00254 node definition; 00255 identifier_list parameters; 00256 get_definition(token, parameters, definition); 00257 set_function(token, node(parameters, definition)); 00258 return true; 00259 } 00260 00261 if (k == identifier and token == "quit") 00262 std::exit(0); 00263 00264 if (k == identifier and token == "save") { 00265 std::string filename; 00266 if (get_token(filename) != string) 00267 throw syntax_error("expected FILENAME after save, got " + token); 00268 save_library(filename); 00269 output << "Library saved to " << filename << '\n'; 00270 } 00271 00272 if (k == identifier and token == "load") { 00273 std::string filename; 00274 if (get_token(filename) != string) 00275 throw syntax_error("expected FILENAME after load, got " + token); 00276 load_library(filename); 00277 output << "Library loaded from " << filename << '\n'; 00278 } 00279 // Otherwise, the statement must be an expression. 00280 push_back(token, k); 00281 node n; 00282 if (not get_expr(n)) 00283 return false; 00284 else { 00285 // Evaluate the expression and print the result. 00286 output << n.evaluate() << '\n'; 00287 return true; 00288 } 00289 }
std::string parser::charify | ( | char | c | ) | [private] |
Convert a characer to a readable form.
c | The character |
c
is readable. Definition at line 17 of file parse.cpp.
References isprint().
Referenced by get_identifier(), and get_token().
00018 { 00019 if (c == '\a') return "\'\\a\'"; 00020 if (c == '\b') return "\'\\b\'"; 00021 if (c == '\f') return "\'\\f\'"; 00022 if (c == '\n') return "\'\\n\'"; 00023 if (c == '\r') return "\'\\r\'"; 00024 if (c == '\t') return "\'\\t\'"; 00025 if (c == '\v') return "\'\\v\'"; 00026 if (c == '\'') return "\'\\'\'"; 00027 if (c == '\\') return "\'\\\\\'"; 00028 00029 if (isprint(c)) 00030 return std::string("\'") + c + '\''; 00031 else { 00032 std::ostringstream stream; 00033 stream << "'\\x" << std::hex; 00034 stream.fill('0'); 00035 stream.width(2); 00036 stream << (std::char_traits<char>::to_int_type(c) & 0xFF) << '\''; 00037 return stream.str(); 00038 } 00039 }
bool parser::get_number | ( | std::string const & | token, | |
node & | result | |||
) | [private] |
Parse a floating number.
token | The token to parse | |
result | Store the number here |
token
is a valid number or false for an error Definition at line 214 of file parse.cpp.
Referenced by get_primary().
00215 { 00216 std::istringstream stream(token); 00217 // If the value overflows or is otherwise invalid, return false. 00218 double value; 00219 if (not (stream >> value)) 00220 return false; 00221 result = node(value); 00222 return true; 00223 }
bool parser::get_expr | ( | node & | result | ) | [private] |
Parse an expression
result | Store the result here |
Definition at line 291 of file parse.cpp.
References get_add_expr().
Referenced by get_definition(), get_expr_list(), get_primary(), and get_statement().
00292 { 00293 return get_add_expr(result); 00294 }
bool parser::get_add_expr | ( | node & | result | ) | [private] |
Parse an addition expression
ADD_EXPR ::= MUL_EXPR | ADD_EXPR + MUL_EXPR | ADD_EXPR - MUL_EXPR
result | Store the result here |
Definition at line 296 of file parse.cpp.
References get_mul_expr(), get_token(), and push_back().
Referenced by get_expr().
00297 { 00298 if (not get_mul_expr(result)) 00299 return false; 00300 std::string token; 00301 while (kind k = get_token(token)) { 00302 if (k != '+' and k != '-') { 00303 push_back(token, k); 00304 return true; 00305 } else { 00306 node right; 00307 if (not get_mul_expr(right)) 00308 throw syntax_error("unterminated expression. Expected a multiplicative-expression after " + token); 00309 result = node(result, k, right); 00310 } 00311 } 00312 return true; 00313 }
bool parser::get_mul_expr | ( | node & | result | ) | [private] |
Parse a multiplicative expression.
MUL_EXPR ::= UNARY | MUL_EXPR + UNARY | MUL_EXPR - UNARY
result | Store the result here |
Definition at line 315 of file parse.cpp.
References get_token(), get_unary(), and push_back().
Referenced by get_add_expr().
00316 { 00317 if (not get_unary(result)) 00318 return false; 00319 std::string token; 00320 while (kind k = get_token(token)) { 00321 if (k != '*' and k != '/') { 00322 push_back(token, k); 00323 return true; 00324 } else { 00325 node right; 00326 if (not get_unary(right)) 00327 throw syntax_error("unterminated expression. Expected a unary-expression after " + token); 00328 result = node(result, k, right); 00329 } 00330 } 00331 return true; 00332 }
bool parser::get_primary | ( | node & | result | ) | [private] |
Parse a primary expression.
PRIMARY ::= NUMBER | IDENTIFIER | '(' EXPR ')' | FUNCTION_CALL FUNCTION_CALL ::= IDENTIFIER '(' OPT_EXPR_LIST ')' OPT_EXPR_LIST ::= empty | EXPR_LIST EXPR_LIST ::= EXPR | EXPR_LIST ',' EXPR
result | Store the result here |
Definition at line 376 of file parse.cpp.
References eof, get_expr(), get_expr_list(), get_number(), get_token(), identifier, number, and push_back().
Referenced by get_unary().
00377 { 00378 std::string token; 00379 kind k = get_token(token); 00380 if (k == eof) 00381 return false; 00382 00383 if (k == '(') { 00384 // Parenthesized expression 00385 if (not get_expr(result)) 00386 throw syntax_error("expected expression, got end of line"); 00387 k = get_token(token); 00388 if (k != ')') 00389 throw syntax_error("expected ')', got " + token); 00390 else 00391 return true; 00392 } 00393 00394 if (k == number) { 00395 // Numeric literal 00396 if (not get_number(token, result)) 00397 throw syntax_error("Invalid numeric literal: " + token); 00398 return true; 00399 } 00400 00401 if (k == identifier) { 00402 // Identifier: variable or function call 00403 std::string next; 00404 k = get_token(next); 00405 if (k == '(') { 00406 // function call 00407 node_list arguments; 00408 get_expr_list(arguments); 00409 result = node(token, arguments); 00410 } else { 00411 static const node_list no_arguments; 00412 // Variable reference or function call with no arguments 00413 push_back(next, k); 00414 result = node(token); 00415 } 00416 return true; 00417 } 00418 throw syntax_error("expected a primary, got " + token); 00419 }
bool parser::get_unary | ( | node & | result | ) | [private] |
Parse a unary expression.
UNARY ::= '-' PRIMARY | '+' PRIMARY | PRIMARY
result | Store the result here |
Definition at line 334 of file parse.cpp.
References eof, get_primary(), get_token(), and push_back().
Referenced by get_mul_expr().
00335 { 00336 std::string token; 00337 kind k = get_token(token); 00338 if (k == eof) 00339 return false; 00340 if (k == '-') { 00341 if (not get_primary(result)) 00342 throw syntax_error("expected primary after unary " + token + ", got end of line"); 00343 result = node(k, result); 00344 return true; 00345 } else if (k == '+') { 00346 if (not get_primary(result)) 00347 throw syntax_error("expected primary after unary +, got end of line"); 00348 return true; 00349 } else { 00350 push_back(token, k); 00351 return get_primary(result); 00352 } 00353 }
void parser::get_definition | ( | std::string & | name, | |
identifier_list & | parameters, | |||
node & | definition | |||
) | [private] |
Parse a function or variable definition A variable is just like a function that takes no parameters.
DEFINITION ::= DEF IDENTIFIER OPT_PARAMETERS '=' EXPR OPT_PARAMETERS ::= emtpy | '(' OPT_IDENTIFIER_LIST ')' OPT_IDENTIFIER_LIST ::= empty | IDENTIFIER_LIST IDENTIFIER_LIST ::= IDENTIFIER | IDENTIFIER_LIST ',' IDENTIFIER
[out] | name | Store the variable or function name here |
[out] | parameters | Store the list of parameter names here |
[out] | definition | Store the definition expression here |
Definition at line 225 of file parse.cpp.
References get_expr(), get_namelist(), get_token(), and identifier.
Referenced by get_statement().
00226 { 00227 // Define a variable. 00228 kind k = get_token(name); 00229 if (k != identifier) 00230 throw syntax_error("expected IDENTIFIER, got " + name); 00231 00232 std::string token; 00233 k = get_token(token); 00234 if (k == '(') { 00235 get_namelist(std::back_inserter(parameters)); 00236 k = get_token(token); 00237 } 00238 00239 if (k != '=') 00240 throw syntax_error("expected = in definition, got " + token); 00241 00242 if (not get_expr(definition)) 00243 throw syntax_error("expected exprssion in assignment"); 00244 }
parser::kind parser::get_token | ( | std::string & | token | ) | [private] |
Parse a token. A token can be a keyword, a literal or a symbol.
TOKEN ::= IDENTIFIER | NUMBER | SYMBOL | STRING IDENTIIFER ::= ALPHA (ALPHA | DIGIT)* NUMBER ::= DIGIT+ ('.' DIGITS+)? ('E' SIGN? DIGITS+)? SYMBOL ::= '+' | '-' | '*' | '/' | '%' | '(' | ')' | '=' | ','
token | Store the text of the token here. |
Definition at line 130 of file parse.cpp.
References charify(), eof, get_identifier(), get_string(), identifier, input_, isalpha(), kind_, number, string, and token_.
Referenced by get_add_expr(), get_definition(), get_expr_list(), get_mul_expr(), get_namelist(), get_primary(), get_statement(), and get_unary().
00131 { 00132 if (not token_.empty()) 00133 { 00134 kind result(kind_); 00135 token = token_; 00136 00137 token_.clear(); 00138 kind_ = eof; 00139 00140 return result; 00141 } 00142 00143 char c; 00144 if (not (input_ >> c)) { 00145 token = "end of line"; 00146 return eof; 00147 } 00148 if (isalpha(c)) { 00149 input_.unget(); 00150 get_identifier(token); 00151 return identifier; 00152 } 00153 00154 if (c == '\'' or c == '"') { 00155 // Quoted string 00156 token.clear(); 00157 get_string(token, c); 00158 return string; 00159 } 00160 00161 // Get a numeric literal. 00162 token.clear(); 00163 if (c == '+' or c == '-' or c == '*' or c == '/' or c == '%' or c == '(' or c == ')' or c == '=' or c == ',') { 00164 token += c; 00165 return kind(c); 00166 } 00167 00168 if (c < '0' or c > '9') { 00169 input_.unget(); 00170 throw syntax_error("expected digit, got " + charify(c)); 00171 } 00172 while (c >= '0' and c <= '9') { 00173 token += c; 00174 if (not input_.get(c)) 00175 return number; 00176 } 00177 if (c == '.') { 00178 token += c; 00179 if (not input_.get(c)) 00180 throw syntax_error("unterminated number: expected digit after the decimal point"); 00181 if (c < '0' or c > '9') { 00182 input_.unget(); 00183 throw syntax_error("expected digit after decimal point, got " + charify(c)); 00184 } 00185 while (c >= '0' and c <= '9') { 00186 token += c; 00187 if (not input_.get(c)) 00188 return number; 00189 } 00190 } 00191 if (c == 'e' or c == 'E') { 00192 token += c; 00193 if (not input_.get(c)) 00194 throw syntax_error("unterminated number: expected digit in the exponent"); 00195 if (c == '-' or c == '+') { 00196 token += c; 00197 if (not input_.get(c)) 00198 throw syntax_error("unterminated number: expected digit after sign in the exponent"); 00199 } 00200 if (c < '0' or c > '9') { 00201 input_.unget(); 00202 throw syntax_error("expected digit in the exponent, got " + charify(c)); 00203 } 00204 while (c >= '0' and c <= '9') { 00205 token += c; 00206 if (not input_.get(c)) 00207 return number; 00208 } 00209 } 00210 input_.unget(); 00211 return number; 00212 }
void parser::get_identifier | ( | std::string & | identifier | ) | [private] |
Parse an identifer.
identifier | Store the identifier here. |
Definition at line 102 of file parse.cpp.
References charify(), input_, isalnum(), and isalpha().
Referenced by get_token().
00103 { 00104 identifier.clear(); 00105 char c; 00106 if (not input_.get(c)) 00107 return; 00108 if (not isalpha(c)) 00109 throw syntax_error("expected alphabetic, got " + charify(c)); 00110 identifier += c; 00111 while (input_.get(c)) { 00112 if (not isalnum(c)) { 00113 input_.unget(); 00114 return; 00115 } 00116 identifier += c; 00117 } 00118 return; 00119 }
void parser::get_expr_list | ( | node_list & | result | ) | [private] |
Parse a comma-separated expression list.
[out] | result | Store the result here |
Definition at line 355 of file parse.cpp.
References get_expr(), get_token(), and push_back().
Referenced by get_primary().
00356 { 00357 result.clear(); 00358 std::string token; 00359 while (kind k = get_token(token)) { 00360 if (k == ')') 00361 return; 00362 push_back(token, k); 00363 node expr; 00364 if (not get_expr(expr)) 00365 throw syntax_error("unexpected end of line in function argument"); 00366 result.push_back(expr); 00367 k = get_token(token); 00368 if (k == ')') 00369 return; 00370 else if (k != ',') 00371 throw syntax_error("expected comma in argument list, got " + token); 00372 } 00373 throw syntax_error("unexpected end of line in function argument list"); 00374 }
OutputIterator parser::get_namelist | ( | OutputIterator | output | ) | [inline, private] |
Parse a list of parameter names. Names are identifiers, separated by commas. The list can be empty. This is a template so the container type is unimportant. Any output iterator will do.
[out] | output | Store the identifiers here |
output
after storing all the identifiers Definition at line 207 of file parse.hpp.
References get_token(), and identifier.
Referenced by get_definition().
00208 { 00209 std::string token; 00210 while (kind k = get_token(token)) { 00211 if (k == ')') 00212 return output; 00213 else if (k != identifier) 00214 throw syntax_error("expected function parameter, got " + token); 00215 else { 00216 *output = token; 00217 ++output; 00218 00219 k = get_token(token); 00220 if (k == ')') 00221 return output; 00222 if (k != ',') 00223 throw syntax_error("expected comma in function paramter list, got " + token); 00224 } 00225 } 00226 throw syntax_error("unexpected end of line in function parameter list"); 00227 }
void parser::get_escape | ( | std::string & | str | ) | [private] |
Interpret a backslash escape sequence. The caller must have read the backslash already.
[out] | str | Write the string equivalent of the escape sequence at the end of this string |
Definition at line 41 of file parse.cpp.
References ctype_, and input_.
Referenced by get_string().
00042 { 00043 char c; 00044 if (not input_.get(c)) 00045 throw syntax_error("incomplete escape"); 00046 if (c == '\n') 00047 return; 00048 00049 if (c == 'a') 00050 str += '\a'; 00051 else if (c == 'b') 00052 str += '\b'; 00053 else if (c == 'n') 00054 str += '\n'; 00055 else if (c == 'f') 00056 str += '\f'; 00057 else if (c == 'r') 00058 str += '\r'; 00059 else if (c == 't') 00060 str += '\t'; 00061 else if (c == 'v') 00062 str += '\v'; 00063 else if (c == 'x') { 00064 // hexadecimal sequence 00065 std::string digits; 00066 while(input_.get(c) and ctype_.is(ctype_.xdigit, c)) 00067 digits += c; 00068 if (input_) 00069 input_.unget(); 00070 unsigned long value(std::strtoul(digits.c_str(), 0, 16)); 00071 str += static_cast<char>(static_cast<unsigned char>(value)); 00072 } else if (c >= '0' and c <= '7') { 00073 // octal sequence 00074 std::string digits; 00075 for (int i = 0; i < 3 and input_.get(c) and c >= '0' and c <= '7'; ++i) 00076 digits += c; 00077 if (input_ or c < '0' or c > '7') 00078 input_.unget(); 00079 unsigned long value(std::strtoul(digits.c_str(), 0, 8)); 00080 str += static_cast<char>(static_cast<unsigned char>(value)); 00081 } else { 00082 str += c; 00083 } 00084 }
void parser::get_string | ( | std::string & | result, | |
char | delimiter | |||
) | [private] |
Parse a quoted string. The caller passes the quote character in the delimiter
argument.
[out] | result | Store the token here. |
[in] | delimiter | The quote character (' or " ) |
Definition at line 86 of file parse.cpp.
References get_escape(), and input_.
Referenced by get_token().
00087 { 00088 char c; 00089 while (input_.get(c)) { 00090 if (c == delimiter) 00091 return; 00092 else if (c == '\\') 00093 get_escape(result); 00094 else if (c == '\n') 00095 throw syntax_error("unterminated string"); 00096 else 00097 result += c; 00098 } 00099 throw syntax_error("unterminated string"); 00100 }
void parser::push_back | ( | std::string const & | token, | |
kind | k | |||
) | [private] |
Push back a token. The next call to get_token() will return the pushed-back token.
token | The token to push back. | |
k | The kind of token being pushed back |
Definition at line 121 of file parse.cpp.
References eof, kind_, and token_.
Referenced by get_add_expr(), get_expr_list(), get_mul_expr(), get_primary(), get_statement(), and get_unary().
00122 { 00123 kind_ = k; 00124 if (kind_ == eof) 00125 token_ = "end of line"; 00126 else 00127 token_ = token; 00128 }
bool parser::isalpha | ( | char | c | ) | const [inline, private] |
Return true if c
is alphabetic. Use the locale of the input stream.
c | The character to test. |
Definition at line 176 of file parse.hpp.
References ctype_.
Referenced by get_identifier(), and get_token().
bool parser::isalnum | ( | char | c | ) | const [inline, private] |
bool parser::isdigit | ( | char | c | ) | const [inline, private] |
bool parser::isprint | ( | char | c | ) | const [inline, private] |
std::istream& parser::input_ [private] |
Share the input stream.
Definition at line 193 of file parse.hpp.
Referenced by get_escape(), get_identifier(), get_string(), and get_token().
std::ctype<char> const& parser::ctype_ [private] |
std::string parser::token_ [private] |
One token push-back.
Definition at line 195 of file parse.hpp.
Referenced by get_token(), and push_back().
kind parser::kind_ [private] |
The kind of token that was pushed back.
Definition at line 196 of file parse.hpp.
Referenced by get_token(), and push_back().