#include <parse.hpp>
Public Types | |
enum | kind { eof, identifier, number, plus = '+', minus = '-', times = '*', slash = '/', lparen = '(', rparen = ')', equal = '=' } |
Public Member Functions | |
parser (std::istream &input) | |
bool | get_expr (node &result) |
Private Member Functions | |
std::string | charify (char c) |
bool | get_number (std::string const &token, node &result) |
bool | get_add_expr (node &result) |
bool | get_mul_expr (node &result) |
bool | get_primary (node &result) |
bool | get_unary (node &result) |
kind | get_token (std::string &token) |
void | get_identifier (std::string &identifier) |
void | push_back (std::string const &token, kind k) |
bool | isalpha (char c) const |
bool | isalnum (char c) const |
bool | isdigit (char c) const |
bool | isprint (char c) const |
Private Attributes | |
std::istream & | input_ |
Share the input stream. | |
std::ctype< char > const & | ctype_ |
Cache the ctype facet for checking character categories. | |
std::string | token_ |
One token push-back. | |
kind | kind_ |
The kind of token that was pushed back. |
Because the recursive-descent parser can examine too many tokens from the input stream, it keeps a push-back token. Once the parser knows it has gone too far, it pushes back the most recently read token. The next call to get_token() retrieves the pushed-back token.
Only one push-back is available, which limits the complexity of the syntax.
Definition at line 30 of file parse.hpp.
enum parser::kind |
Token kind. Declare a name for each single-character token, to ensure the enumerated type can represent any operator or punctuator character.
Definition at line 36 of file parse.hpp.
00036 { eof, identifier, number, 00037 plus='+', minus='-', times='*', slash='/', lparen = '(', rparen=')', equal='=' };
parser::parser | ( | std::istream & | input | ) |
bool parser::get_expr | ( | node & | result | ) |
Read one expression and store the parse tree in result
.
result | Where to store the parse tree. |
parse_error | for various syntax and other errors |
Definition at line 143 of file parse.cpp.
References eof, get_add_expr(), get_token(), identifier, and push_back().
Referenced by get_primary(), and parse_loop().
00144 { 00145 std::string token; 00146 kind k(get_token(token)); 00147 if (k == eof) 00148 return false; 00149 00150 if (k == identifier and token == "var") { 00151 std::string name; 00152 // Define a variable. 00153 k = get_token(name); 00154 if (k != identifier) 00155 throw parse_error("syntax error: expected IDENTIFIER, but got " + name); 00156 k = get_token(token); 00157 if (k != '=') 00158 throw parse_error("syntax error: expected =, but got " + token); 00159 if (not get_add_expr(result)) 00160 throw parse_error("syntax error: expected additive-exprssion in assignment"); 00161 result = node(node(name), result); 00162 return true; 00163 } 00164 00165 if (k == identifier and token == "quit") 00166 std::exit(0); 00167 00168 push_back(token, k); 00169 if (not get_add_expr(result)) 00170 throw parse_error("syntax error: expected an additive-expression"); 00171 00172 return true; 00173 }
std::string parser::charify | ( | char | c | ) | [private] |
Convert a characer to a readable form.
c | The character |
c
is readable. Definition at line 13 of file parse.cpp.
References isprint().
Referenced by get_identifier(), and get_token().
00014 { 00015 if (c == '\a') return "\'\\a\'"; 00016 if (c == '\b') return "\'\\b\'"; 00017 if (c == '\f') return "\'\\f\'"; 00018 if (c == '\n') return "\'\\n\'"; 00019 if (c == '\r') return "\'\\r\'"; 00020 if (c == '\t') return "\'\\t\'"; 00021 if (c == '\v') return "\'\\v\'"; 00022 if (c == '\'') return "\'\\'\'"; 00023 if (c == '\\') return "\'\\\\\'"; 00024 00025 if (isprint(c)) 00026 return std::string("\'") + c + '\''; 00027 else { 00028 std::ostringstream stream; 00029 stream << "'\\x" << std::hex; 00030 stream.fill('0'); 00031 stream.width(2); 00032 stream << (std::char_traits<char>::to_int_type(c) & 0xFF) << '\''; 00033 return stream.str(); 00034 } 00035 }
bool parser::get_number | ( | std::string const & | token, | |
node & | result | |||
) | [private] |
Parse a floating number.
token | The token to parse | |
result | Store the number here |
token
is a valid number or false for an error Definition at line 129 of file parse.cpp.
Referenced by get_primary().
00130 { 00131 std::istringstream stream(token); 00132 // If the value overflows or is otherwise invalid, return false. 00133 double value; 00134 if (not (stream >> value)) 00135 return false; 00136 result = node(value); 00137 return true; 00138 }
bool parser::get_add_expr | ( | node & | result | ) | [private] |
Parse an addition expression
ADD_EXPR ::= MUL_EXPR | ADD_EXPR + MUL_EXPR | ADD_EXPR - MUL_EXPR
result | Store the result here |
Definition at line 178 of file parse.cpp.
References get_mul_expr(), get_token(), and push_back().
Referenced by get_expr().
00179 { 00180 if (not get_mul_expr(result)) 00181 return false; 00182 std::string token; 00183 while (kind k = get_token(token)) { 00184 if (k != '+' and k != '-') { 00185 push_back(token, k); 00186 return true; 00187 } else { 00188 node right; 00189 if (not get_mul_expr(right)) 00190 throw parse_error("syntax error: unterminated expression. Expected a multiplicative-expression after " + token); 00191 result = node(result, k, right); 00192 } 00193 } 00194 return true; 00195 }
bool parser::get_mul_expr | ( | node & | result | ) | [private] |
Parse a multiplicative expression.
MUL_EXPR ::= UNARY | MUL_EXPR + UNARY | MUL_EXPR - UNARY
result | Store the result here |
Definition at line 200 of file parse.cpp.
References get_token(), get_unary(), and push_back().
Referenced by get_add_expr().
00201 { 00202 if (not get_unary(result)) 00203 return false; 00204 std::string token; 00205 while (kind k = get_token(token)) { 00206 if (k != '*' and k != '/') { 00207 push_back(token, k); 00208 return true; 00209 } else { 00210 node right; 00211 if (not get_unary(right)) 00212 throw parse_error("syntax error: unterminated expression. Expected a unary-expression after " + token); 00213 result = node(result, k, right); 00214 } 00215 } 00216 return true; 00217 }
bool parser::get_primary | ( | node & | result | ) | [private] |
Parse a primary expression.
PRIMARY ::= NUMBER | IDENTIFIER | '(' EXPR ')'
result | Store the result here |
Definition at line 244 of file parse.cpp.
References eof, get_expr(), get_number(), get_token(), identifier, and number.
Referenced by get_unary().
00245 { 00246 std::string token; 00247 if (kind k = get_token(token)) { 00248 if (k == '(') { 00249 if (not get_expr(result)) 00250 return false; 00251 k = get_token(token); 00252 if (k == eof) 00253 throw parse_error("syntax error: EOF when expecting ')'"); 00254 else if (k != ')') 00255 throw parse_error("syntax error: expected ')', but got " + token); 00256 else 00257 return true; 00258 } else if (k == number) { 00259 if (not get_number(token, result)) 00260 throw parse_error("Invalid numeric literal: " + token); 00261 return true; 00262 } else if (k == identifier) { 00263 result = node(token); 00264 return true; 00265 } else { 00266 throw parse_error("syntax error: expected a primary, but got " + token); 00267 } 00268 } 00269 return false; 00270 }
bool parser::get_unary | ( | node & | result | ) | [private] |
Parse a unary expression.
UNARY ::= '-' PRIMARY | '+' PRIMARY | PRIMARY
result | Store the result here |
Definition at line 222 of file parse.cpp.
References get_primary(), get_token(), and push_back().
Referenced by get_mul_expr().
00223 { 00224 std::string token; 00225 if (kind k = get_token(token)) { 00226 if (k == '-') { 00227 if (not get_primary(result)) 00228 return false; 00229 result = node(k, result); 00230 return true; 00231 } else if (k == '+') { 00232 return get_primary(result); 00233 } else { 00234 push_back(token, k); 00235 return get_primary(result); 00236 } 00237 } 00238 return false; 00239 }
parser::kind parser::get_token | ( | std::string & | token | ) | [private] |
Parse a token. A token can be a keyword, a literal or a symbol.
TOKEN ::= IDENTIFIER | NUMBER | SYMBOL IDENTIIFER ::= ALPHA (ALPHA | DIGIT)* NUMBER ::= DIGIT+ ('.' DIGITS+)? ('E' SIGN? DIGITS+)? SYMBOL ::= '+' | '-' | '*' | '/' | '%' | '(' | ')' | '='
token | Store the text of the token here. |
Definition at line 56 of file parse.cpp.
References charify(), eof, get_identifier(), identifier, input_, isalpha(), kind_, number, and token_.
Referenced by get_add_expr(), get_expr(), get_mul_expr(), get_primary(), and get_unary().
00057 { 00058 if (not token_.empty()) 00059 { 00060 token = token_; 00061 kind result(kind_); 00062 token_.clear(); 00063 kind_ = eof; 00064 return result; 00065 } 00066 00067 char c; 00068 if (not (input_ >> c)) 00069 return eof; 00070 if (isalpha(c)) { 00071 input_.unget(); 00072 get_identifier(token); 00073 return identifier; 00074 } 00075 00076 // Get a numeric literal. 00077 token.clear(); 00078 if (c == '+' or c == '-' or c == '*' or c == '/' or c == '%' or c == '(' or c == ')' or c == '=') { 00079 token += c; 00080 return kind(c); 00081 } 00082 00083 if (c < '0' or c > '9') { 00084 input_.unget(); 00085 throw parse_error("syntax error: expected digit, got " + charify(c)); 00086 } 00087 while (c >= '0' and c <= '9') { 00088 token += c; 00089 if (not input_.get(c)) 00090 return number; 00091 } 00092 if (c == '.') { 00093 token += c; 00094 if (not input_.get(c)) 00095 throw parse_error("unterminated number: expected digit after the decimal point"); 00096 if (c < '0' or c > '9') { 00097 input_.unget(); 00098 throw parse_error("syntax error: expected digit after decimal point, got " + charify(c)); 00099 } 00100 while (c >= '0' and c <= '9') { 00101 token += c; 00102 if (not input_.get(c)) 00103 return number; 00104 } 00105 } 00106 if (c == 'e' or c == 'E') { 00107 token += c; 00108 if (not input_.get(c)) 00109 throw parse_error("unterminated number: expected digit in the exponent"); 00110 if (c == '-' or c == '+') { 00111 token += c; 00112 if (not input_.get(c)) 00113 throw parse_error("unterminated number: expected digit after sign in the exponent"); 00114 } 00115 if (c < '0' or c > '9') { 00116 input_.unget(); 00117 throw parse_error("syntax error: expected digit in the exponent, got " + charify(c)); 00118 } 00119 while (c >= '0' and c <= '9') { 00120 token += c; 00121 if (not input_.get(c)) 00122 return number; 00123 } 00124 } 00125 input_.unget(); 00126 return number; 00127 }
void parser::get_identifier | ( | std::string & | identifier | ) | [private] |
Parse an identifer.
identifier | Store the identifier here. |
Definition at line 37 of file parse.cpp.
References charify(), input_, isalnum(), and isalpha().
Referenced by get_token().
00038 { 00039 identifier.clear(); 00040 char c; 00041 if (not input_.get(c)) 00042 return; 00043 if (not isalpha(c)) 00044 throw parse_error("syntax error: expected alphabetic, got " + charify(c)); 00045 identifier += c; 00046 while (input_.get(c)) { 00047 if (not isalnum(c)) { 00048 input_.unget(); 00049 return; 00050 } 00051 identifier += c; 00052 } 00053 return; 00054 }
void parser::push_back | ( | std::string const & | token, | |
kind | k | |||
) | [inline, private] |
Push back a token. The next call to get_token() will return the pushed-back token.
token | The token to push back. | |
k | The kind of token being pushed back |
Definition at line 118 of file parse.hpp.
Referenced by get_add_expr(), get_expr(), get_mul_expr(), and get_unary().
bool parser::isalpha | ( | char | c | ) | const [inline, private] |
Return true if c
is alphabetic. Use the locale of the input stream.
c | The character to test. |
Definition at line 124 of file parse.hpp.
References ctype_.
Referenced by get_identifier(), and get_token().
bool parser::isalnum | ( | char | c | ) | const [inline, private] |
bool parser::isdigit | ( | char | c | ) | const [inline, private] |
bool parser::isprint | ( | char | c | ) | const [inline, private] |
std::istream& parser::input_ [private] |
Share the input stream.
Definition at line 141 of file parse.hpp.
Referenced by get_identifier(), and get_token().
std::ctype<char> const& parser::ctype_ [private] |
std::string parser::token_ [private] |
One token push-back.
Definition at line 143 of file parse.hpp.
Referenced by get_token(), and push_back().
kind parser::kind_ [private] |
The kind of token that was pushed back.
Definition at line 144 of file parse.hpp.
Referenced by get_token(), and push_back().