#include <parse.hpp>
Public Types | |
enum | kind { eof, identifier, integer, floating_point, string, plus = '+', minus = '-', times = '*', slash = '/', lparen = '(', rparen = ')', equal = '=', comma = ',' } |
Public Member Functions | |
parser (std::istream &input) | |
bool | get_statement (std::ostream &output) |
Private Member Functions | |
std::string | charify (char c) |
bool | get_float (std::string const &token, node &result) |
bool | get_integer (std::string const &token, node &result) |
bool | get_expr (node &result) |
bool | get_add_expr (node &result) |
bool | get_mul_expr (node &result) |
bool | get_primary (node &result) |
bool | get_unary (node &result) |
void | get_definition (std::string &name, identifier_list ¶meters, node &definition) |
kind | get_token (std::string &token) |
void | get_identifier (std::string &identifier) |
void | get_expr_list (node_list &result) |
template<class OutputIterator> | |
OutputIterator | get_namelist (OutputIterator output) |
void | get_escape (std::string &str) |
void | get_string (std::string &result, char delimiter) |
void | push_back (std::string const &token, kind k) |
bool | isalpha (char c) const |
bool | isalnum (char c) const |
bool | isdigit (char c) const |
bool | isprint (char c) const |
Private Attributes | |
std::istream & | input_ |
Share the input stream. | |
std::ctype< char > const & | ctype_ |
Cache the ctype facet for checking character categories. | |
std::string | token_ |
One token push-back. | |
kind | kind_ |
The kind of token that was pushed back. |
Because the recursive-descent parser can examine too many tokens from the input stream, it keeps a push-back token. Once the parser knows it has gone too far, it pushes back the most recently read token. The next call to get_token() retrieves the pushed-back token.
Only one push-back is available, which limits the complexity of the syntax.
Definition at line 26 of file parse.hpp.
enum parser::kind |
Token kind. Declare a name for each single-character token, to ensure the enumerated type can represent any operator or punctuator character.
eof | |
identifier | |
integer | |
floating_point | |
string | |
plus | |
minus | |
times | |
slash | |
lparen | |
rparen | |
equal | |
comma |
Definition at line 32 of file parse.hpp.
00032 { eof, identifier, integer, floating_point, string, 00033 plus='+', minus='-', times='*', slash='/', lparen = '(', rparen=')', equal='=', 00034 comma=','};
parser::parser | ( | std::istream & | input | ) |
bool parser::get_statement | ( | std::ostream & | output | ) |
Read one statement and store the parse tree in result
. If the statement is an assignment or function definition, store the variable or function. If the statement is an expression, print the result to output
.
STATEMENT ::= DEFINITION | QUIT | EXPR | SAVE | LOAD SAVE ::= "save" FILENAME LOAD ::= "load" FILENAME FILENAME ::= quoted-string
output | The output stream. |
parse_error | for various syntax and other errors |
Definition at line 262 of file parse.cpp.
References eof, node::evaluate(), get_definition(), get_expr(), get_token(), identifier, load_library(), push_back(), save_library(), and set_function().
Referenced by parse_loop().
00263 { 00264 std::string token; 00265 kind k(get_token(token)); 00266 if (k == eof) 00267 return false; 00268 00269 if (k == identifier and token == "def") { 00270 node definition; 00271 identifier_list parameters; 00272 get_definition(token, parameters, definition); 00273 set_function(token, node(parameters, definition)); 00274 return true; 00275 } 00276 00277 if (k == identifier and token == "quit") 00278 std::exit(0); 00279 00280 if (k == identifier and token == "save") { 00281 std::string filename; 00282 if (get_token(filename) != string) 00283 throw syntax_error("expected FILENAME after save, got " + token); 00284 save_library(filename); 00285 output << "Library saved to " << filename << '\n'; 00286 } 00287 00288 if (k == identifier and token == "load") { 00289 std::string filename; 00290 if (get_token(filename) != string) 00291 throw syntax_error("expected FILENAME after load, got " + token); 00292 load_library(filename); 00293 output << "Library loaded from " << filename << '\n'; 00294 } 00295 // Otherwise, the statement must be an expression. 00296 push_back(token, k); 00297 node n; 00298 if (not get_expr(n)) 00299 return false; 00300 else { 00301 // Evaluate the expression and print the result. 00302 output << n.evaluate() << '\n'; 00303 return true; 00304 } 00305 }
std::string parser::charify | ( | char | c | ) | [private] |
Convert a characer to a readable form.
c | The character |
c
is readable. Definition at line 17 of file parse.cpp.
References isprint().
Referenced by get_identifier(), and get_token().
00018 { 00019 if (c == '\a') return "\'\\a\'"; 00020 if (c == '\b') return "\'\\b\'"; 00021 if (c == '\f') return "\'\\f\'"; 00022 if (c == '\n') return "\'\\n\'"; 00023 if (c == '\r') return "\'\\r\'"; 00024 if (c == '\t') return "\'\\t\'"; 00025 if (c == '\v') return "\'\\v\'"; 00026 if (c == '\'') return "\'\\'\'"; 00027 if (c == '\\') return "\'\\\\\'"; 00028 00029 if (isprint(c)) 00030 return std::string("\'") + c + '\''; 00031 else { 00032 std::ostringstream stream; 00033 stream << "'\\x" << std::hex; 00034 stream.fill('0'); 00035 stream.width(2); 00036 stream << (std::char_traits<char>::to_int_type(c) & 0xFF) << '\''; 00037 return stream.str(); 00038 } 00039 }
bool parser::get_float | ( | std::string const & | token, | |
node & | result | |||
) | [private] |
Parse a floating number.
token | The token to parse | |
result | Store the number here |
token
is a valid number or false for an error Definition at line 230 of file parse.cpp.
Referenced by get_primary().
00231 { 00232 std::istringstream stream(token); 00233 // If the value overflows or is otherwise invalid, return false. 00234 double value; 00235 if (not (stream >> value)) 00236 return false; 00237 result = node(number(value)); 00238 return true; 00239 }
bool parser::get_integer | ( | std::string const & | token, | |
node & | result | |||
) | [private] |
Parse an integer.
token | The token to parse | |
result | Store the number here |
token
is a valid number or false for an error Definition at line 219 of file parse.cpp.
Referenced by get_primary().
00220 { 00221 std::istringstream stream(token); 00222 // If the value overflows or is otherwise invalid, return false. 00223 long value; 00224 if (not (stream >> value)) 00225 return false; 00226 result = node(number(value)); 00227 return true; 00228 }
bool parser::get_expr | ( | node & | result | ) | [private] |
Parse an expression
result | Store the result here |
Definition at line 307 of file parse.cpp.
References get_add_expr().
Referenced by get_definition(), get_expr_list(), get_primary(), and get_statement().
00308 { 00309 return get_add_expr(result); 00310 }
bool parser::get_add_expr | ( | node & | result | ) | [private] |
Parse an addition expression
ADD_EXPR ::= MUL_EXPR | ADD_EXPR + MUL_EXPR | ADD_EXPR - MUL_EXPR
result | Store the result here |
Definition at line 312 of file parse.cpp.
References get_mul_expr(), get_token(), and push_back().
Referenced by get_expr().
00313 { 00314 if (not get_mul_expr(result)) 00315 return false; 00316 std::string token; 00317 while (kind k = get_token(token)) { 00318 if (k != '+' and k != '-') { 00319 push_back(token, k); 00320 return true; 00321 } else { 00322 node right; 00323 if (not get_mul_expr(right)) 00324 throw syntax_error("unterminated expression. Expected a multiplicative-expression after " + token); 00325 result = node(result, k, right); 00326 } 00327 } 00328 return true; 00329 }
bool parser::get_mul_expr | ( | node & | result | ) | [private] |
Parse a multiplicative expression.
MUL_EXPR ::= UNARY | MUL_EXPR + UNARY | MUL_EXPR - UNARY
result | Store the result here |
Definition at line 331 of file parse.cpp.
References get_token(), get_unary(), and push_back().
Referenced by get_add_expr().
00332 { 00333 if (not get_unary(result)) 00334 return false; 00335 std::string token; 00336 while (kind k = get_token(token)) { 00337 if (k != '*' and k != '/') { 00338 push_back(token, k); 00339 return true; 00340 } else { 00341 node right; 00342 if (not get_unary(right)) 00343 throw syntax_error("unterminated expression. Expected a unary-expression after " + token); 00344 result = node(result, k, right); 00345 } 00346 } 00347 return true; 00348 }
bool parser::get_primary | ( | node & | result | ) | [private] |
Parse a primary expression.
PRIMARY ::= NUMBER | IDENTIFIER | '(' EXPR ')' | FUNCTION_CALL FUNCTION_CALL ::= IDENTIFIER '(' OPT_EXPR_LIST ')' OPT_EXPR_LIST ::= empty | EXPR_LIST EXPR_LIST ::= EXPR | EXPR_LIST ',' EXPR
result | Store the result here |
Definition at line 392 of file parse.cpp.
References eof, floating_point, get_expr(), get_expr_list(), get_float(), get_integer(), get_token(), identifier, integer, and push_back().
Referenced by get_unary().
00393 { 00394 std::string token; 00395 kind k = get_token(token); 00396 if (k == eof) 00397 return false; 00398 00399 if (k == '(') { 00400 // Parenthesized expression 00401 if (not get_expr(result)) 00402 throw syntax_error("expected expression, got end of line"); 00403 k = get_token(token); 00404 if (k != ')') 00405 throw syntax_error("expected ')', got " + token); 00406 else 00407 return true; 00408 } 00409 00410 if (k == integer) { 00411 // Integer literal 00412 if (not get_integer(token, result)) 00413 throw syntax_error("Invalid integer literal: " + token); 00414 return true; 00415 } 00416 00417 if (k == floating_point) { 00418 // Integer literal 00419 if (not get_float(token, result)) 00420 throw syntax_error("Invalid integer literal: " + token); 00421 return true; 00422 } 00423 00424 if (k == identifier) { 00425 // Identifier: variable or function call 00426 std::string next; 00427 k = get_token(next); 00428 if (k == '(') { 00429 // function call 00430 node_list arguments; 00431 get_expr_list(arguments); 00432 result = node(token, arguments); 00433 } else { 00434 static const node_list no_arguments; 00435 // Variable reference or function call with no arguments 00436 push_back(next, k); 00437 result = node(token); 00438 } 00439 return true; 00440 } 00441 throw syntax_error("expected a primary, got " + token); 00442 }
bool parser::get_unary | ( | node & | result | ) | [private] |
Parse a unary expression.
UNARY ::= '-' PRIMARY | '+' PRIMARY | PRIMARY
result | Store the result here |
Definition at line 350 of file parse.cpp.
References eof, get_primary(), get_token(), and push_back().
Referenced by get_mul_expr().
00351 { 00352 std::string token; 00353 kind k = get_token(token); 00354 if (k == eof) 00355 return false; 00356 if (k == '-') { 00357 if (not get_primary(result)) 00358 throw syntax_error("expected primary after unary " + token + ", got end of line"); 00359 result = node(k, result); 00360 return true; 00361 } else if (k == '+') { 00362 if (not get_primary(result)) 00363 throw syntax_error("expected primary after unary +, got end of line"); 00364 return true; 00365 } else { 00366 push_back(token, k); 00367 return get_primary(result); 00368 } 00369 }
void parser::get_definition | ( | std::string & | name, | |
identifier_list & | parameters, | |||
node & | definition | |||
) | [private] |
Parse a function or variable definition A variable is just like a function that takes no parameters.
DEFINITION ::= DEF IDENTIFIER OPT_PARAMETERS '=' EXPR OPT_PARAMETERS ::= emtpy | '(' OPT_IDENTIFIER_LIST ')' OPT_IDENTIFIER_LIST ::= empty | IDENTIFIER_LIST IDENTIFIER_LIST ::= IDENTIFIER | IDENTIFIER_LIST ',' IDENTIFIER
[out] | name | Store the variable or function name here |
[out] | parameters | Store the list of parameter names here |
[out] | definition | Store the definition expression here |
Definition at line 241 of file parse.cpp.
References get_expr(), get_namelist(), get_token(), and identifier.
Referenced by get_statement().
00242 { 00243 // Define a variable. 00244 kind k = get_token(name); 00245 if (k != identifier) 00246 throw syntax_error("expected IDENTIFIER, got " + name); 00247 00248 std::string token; 00249 k = get_token(token); 00250 if (k == '(') { 00251 get_namelist(std::back_inserter(parameters)); 00252 k = get_token(token); 00253 } 00254 00255 if (k != '=') 00256 throw syntax_error("expected = in definition, got " + token); 00257 00258 if (not get_expr(definition)) 00259 throw syntax_error("expected exprssion in assignment"); 00260 }
parser::kind parser::get_token | ( | std::string & | token | ) | [private] |
Parse a token. A token can be a keyword, a literal or a symbol.
TOKEN ::= IDENTIFIER | NUMBER | SYMBOL | STRING IDENTIIFER ::= ALPHA (ALPHA | DIGIT)* NUMBER ::= INTEGER | FLOATING-POINT INTEGER ::= DIGIT+ FLOATING_POINT ::= DIGIT+ '.' DIGITS+ ('E' SIGN? DIGITS+)? | DIGIT+ ('.' DIGITS+)? 'E' SIGN? DIGITS+ SYMBOL ::= '+' | '-' | '*' | '/' | '%' | '(' | ')' | '=' | ','
token | Store the text of the token here. |
Definition at line 130 of file parse.cpp.
References charify(), eof, floating_point, get_identifier(), get_string(), identifier, input_, integer, isalpha(), kind_, string, and token_.
Referenced by get_add_expr(), get_definition(), get_expr_list(), get_mul_expr(), get_namelist(), get_primary(), get_statement(), and get_unary().
00131 { 00132 if (not token_.empty()) 00133 { 00134 kind result(kind_); 00135 token = token_; 00136 00137 token_.clear(); 00138 kind_ = eof; 00139 00140 return result; 00141 } 00142 00143 char c; 00144 if (not (input_ >> c)) { 00145 token = "end of line"; 00146 return eof; 00147 } 00148 if (isalpha(c)) { 00149 input_.unget(); 00150 get_identifier(token); 00151 return identifier; 00152 } 00153 00154 if (c == '\'' or c == '"') { 00155 // Quoted string 00156 token.clear(); 00157 get_string(token, c); 00158 return string; 00159 } 00160 00161 // Get a numeric literal. 00162 token.clear(); 00163 if (c == '+' or c == '-' or c == '*' or c == '/' or c == '%' or c == '(' or c == ')' or c == '=' or c == ',') { 00164 token += c; 00165 return kind(c); 00166 } 00167 00168 if (c < '0' or c > '9') { 00169 input_.unget(); 00170 throw syntax_error("expected digit, got " + charify(c)); 00171 } 00172 00173 kind k = integer; 00174 00175 while (c >= '0' and c <= '9') { 00176 token += c; 00177 if (not input_.get(c)) 00178 return k; 00179 } 00180 if (c == '.') { 00181 k = floating_point; 00182 token += c; 00183 if (not input_.get(c)) 00184 throw syntax_error("unterminated number: expected digit after the decimal point"); 00185 if (c < '0' or c > '9') { 00186 input_.unget(); 00187 throw syntax_error("expected digit after decimal point, got " + charify(c)); 00188 } 00189 while (c >= '0' and c <= '9') { 00190 token += c; 00191 if (not input_.get(c)) 00192 return k; 00193 } 00194 } 00195 if (c == 'e' or c == 'E') { 00196 k = floating_point; 00197 token += c; 00198 if (not input_.get(c)) 00199 throw syntax_error("unterminated number: expected digit in the exponent"); 00200 if (c == '-' or c == '+') { 00201 token += c; 00202 if (not input_.get(c)) 00203 throw syntax_error("unterminated number: expected digit after sign in the exponent"); 00204 } 00205 if (c < '0' or c > '9') { 00206 input_.unget(); 00207 throw syntax_error("expected digit in the exponent, got " + charify(c)); 00208 } 00209 while (c >= '0' and c <= '9') { 00210 token += c; 00211 if (not input_.get(c)) 00212 return k; 00213 } 00214 } 00215 input_.unget(); 00216 return k; 00217 }
void parser::get_identifier | ( | std::string & | identifier | ) | [private] |
Parse an identifer.
identifier | Store the identifier here. |
Definition at line 102 of file parse.cpp.
References charify(), input_, isalnum(), and isalpha().
Referenced by get_token().
00103 { 00104 identifier.clear(); 00105 char c; 00106 if (not input_.get(c)) 00107 return; 00108 if (not isalpha(c)) 00109 throw syntax_error("expected alphabetic, got " + charify(c)); 00110 identifier += c; 00111 while (input_.get(c)) { 00112 if (not isalnum(c)) { 00113 input_.unget(); 00114 return; 00115 } 00116 identifier += c; 00117 } 00118 return; 00119 }
void parser::get_expr_list | ( | node_list & | result | ) | [private] |
Parse a comma-separated expression list.
[out] | result | Store the result here |
Definition at line 371 of file parse.cpp.
References get_expr(), get_token(), and push_back().
Referenced by get_primary().
00372 { 00373 result.clear(); 00374 std::string token; 00375 while (kind k = get_token(token)) { 00376 if (k == ')') 00377 return; 00378 push_back(token, k); 00379 node expr; 00380 if (not get_expr(expr)) 00381 throw syntax_error("unexpected end of line in function argument"); 00382 result.push_back(expr); 00383 k = get_token(token); 00384 if (k == ')') 00385 return; 00386 else if (k != ',') 00387 throw syntax_error("expected comma in argument list, got " + token); 00388 } 00389 throw syntax_error("unexpected end of line in function argument list"); 00390 }
OutputIterator parser::get_namelist | ( | OutputIterator | output | ) | [inline, private] |
Parse a list of parameter names. Names are identifiers, separated by commas. The list can be empty. This is a template so the container type is unimportant. Any output iterator will do.
[out] | output | Store the identifiers here |
output
after storing all the identifiers Definition at line 216 of file parse.hpp.
References get_token(), and identifier.
Referenced by get_definition().
00217 { 00218 std::string token; 00219 while (kind k = get_token(token)) { 00220 if (k == ')') 00221 return output; 00222 else if (k != identifier) 00223 throw syntax_error("expected function parameter, got " + token); 00224 else { 00225 *output = token; 00226 ++output; 00227 00228 k = get_token(token); 00229 if (k == ')') 00230 return output; 00231 if (k != ',') 00232 throw syntax_error("expected comma in function paramter list, got " + token); 00233 } 00234 } 00235 throw syntax_error("unexpected end of line in function parameter list"); 00236 }
void parser::get_escape | ( | std::string & | str | ) | [private] |
Interpret a backslash escape sequence. The caller must have read the backslash already.
[out] | str | Write the string equivalent of the escape sequence at the end of this string |
Definition at line 41 of file parse.cpp.
References ctype_, and input_.
Referenced by get_string().
00042 { 00043 char c; 00044 if (not input_.get(c)) 00045 throw syntax_error("incomplete escape"); 00046 if (c == '\n') 00047 return; 00048 00049 if (c == 'a') 00050 str += '\a'; 00051 else if (c == 'b') 00052 str += '\b'; 00053 else if (c == 'n') 00054 str += '\n'; 00055 else if (c == 'f') 00056 str += '\f'; 00057 else if (c == 'r') 00058 str += '\r'; 00059 else if (c == 't') 00060 str += '\t'; 00061 else if (c == 'v') 00062 str += '\v'; 00063 else if (c == 'x') { 00064 // hexadecimal sequence 00065 std::string digits; 00066 while(input_.get(c) and ctype_.is(ctype_.xdigit, c)) 00067 digits += c; 00068 if (input_) 00069 input_.unget(); 00070 unsigned long value(std::strtoul(digits.c_str(), 0, 16)); 00071 str += static_cast<char>(static_cast<unsigned char>(value)); 00072 } else if (c >= '0' and c <= '7') { 00073 // octal sequence 00074 std::string digits; 00075 for (int i = 0; i < 3 and input_.get(c) and c >= '0' and c <= '7'; ++i) 00076 digits += c; 00077 if (input_ or c < '0' or c > '7') 00078 input_.unget(); 00079 unsigned long value(std::strtoul(digits.c_str(), 0, 8)); 00080 str += static_cast<char>(static_cast<unsigned char>(value)); 00081 } else { 00082 str += c; 00083 } 00084 }
void parser::get_string | ( | std::string & | result, | |
char | delimiter | |||
) | [private] |
Parse a quoted string. The caller passes the quote character in the delimiter
argument.
[out] | result | Store the token here. |
[in] | delimiter | The quote character (' or " ) |
Definition at line 86 of file parse.cpp.
References get_escape(), and input_.
Referenced by get_token().
00087 { 00088 char c; 00089 while (input_.get(c)) { 00090 if (c == delimiter) 00091 return; 00092 else if (c == '\\') 00093 get_escape(result); 00094 else if (c == '\n') 00095 throw syntax_error("unterminated string"); 00096 else 00097 result += c; 00098 } 00099 throw syntax_error("unterminated string"); 00100 }
void parser::push_back | ( | std::string const & | token, | |
kind | k | |||
) | [private] |
Push back a token. The next call to get_token() will return the pushed-back token.
token | The token to push back. | |
k | The kind of token being pushed back |
Definition at line 121 of file parse.cpp.
References eof, kind_, and token_.
Referenced by get_add_expr(), get_expr_list(), get_mul_expr(), get_primary(), get_statement(), and get_unary().
00122 { 00123 kind_ = k; 00124 if (kind_ == eof) 00125 token_ = "end of line"; 00126 else 00127 token_ = token; 00128 }
bool parser::isalpha | ( | char | c | ) | const [inline, private] |
Return true if c
is alphabetic. Use the locale of the input stream.
c | The character to test. |
Definition at line 185 of file parse.hpp.
References ctype_.
Referenced by get_identifier(), and get_token().
bool parser::isalnum | ( | char | c | ) | const [inline, private] |
bool parser::isdigit | ( | char | c | ) | const [inline, private] |
bool parser::isprint | ( | char | c | ) | const [inline, private] |
std::istream& parser::input_ [private] |
Share the input stream.
Definition at line 202 of file parse.hpp.
Referenced by get_escape(), get_identifier(), get_string(), and get_token().
std::ctype<char> const& parser::ctype_ [private] |
std::string parser::token_ [private] |
One token push-back.
Definition at line 204 of file parse.hpp.
Referenced by get_token(), and push_back().
kind parser::kind_ [private] |
The kind of token that was pushed back.
Definition at line 205 of file parse.hpp.
Referenced by get_token(), and push_back().