00001 #include <cstdlib>
00002 #include <iterator>
00003 #include <sstream>
00004
00005 #include "calc_error.hpp"
00006 #include "node.hpp"
00007 #include "parse.hpp"
00008 #include "variables.hpp"
00009
00010 parser::parser(std::istream& input)
00011 : input_(input),
00012 ctype_(std::use_facet<std::ctype<char> >(input.getloc())),
00013 token_(),
00014 kind_()
00015 {}
00016
00017 std::string parser::charify(char c)
00018 {
00019 if (c == '\a') return "\'\\a\'";
00020 if (c == '\b') return "\'\\b\'";
00021 if (c == '\f') return "\'\\f\'";
00022 if (c == '\n') return "\'\\n\'";
00023 if (c == '\r') return "\'\\r\'";
00024 if (c == '\t') return "\'\\t\'";
00025 if (c == '\v') return "\'\\v\'";
00026 if (c == '\'') return "\'\\'\'";
00027 if (c == '\\') return "\'\\\\\'";
00028
00029 if (isprint(c))
00030 return std::string("\'") + c + '\'';
00031 else {
00032 std::ostringstream stream;
00033 stream << "'\\x" << std::hex;
00034 stream.fill('0');
00035 stream.width(2);
00036 stream << (std::char_traits<char>::to_int_type(c) & 0xFF) << '\'';
00037 return stream.str();
00038 }
00039 }
00040
00041 void parser::get_escape(std::string& str)
00042 {
00043 char c;
00044 if (not input_.get(c))
00045 throw syntax_error("incomplete escape");
00046 if (c == '\n')
00047 return;
00048
00049 if (c == 'a')
00050 str += '\a';
00051 else if (c == 'b')
00052 str += '\b';
00053 else if (c == 'n')
00054 str += '\n';
00055 else if (c == 'f')
00056 str += '\f';
00057 else if (c == 'r')
00058 str += '\r';
00059 else if (c == 't')
00060 str += '\t';
00061 else if (c == 'v')
00062 str += '\v';
00063 else if (c == 'x') {
00064
00065 std::string digits;
00066 while(input_.get(c) and ctype_.is(ctype_.xdigit, c))
00067 digits += c;
00068 if (input_)
00069 input_.unget();
00070 unsigned long value(std::strtoul(digits.c_str(), 0, 16));
00071 str += static_cast<char>(static_cast<unsigned char>(value));
00072 } else if (c >= '0' and c <= '7') {
00073
00074 std::string digits;
00075 for (int i = 0; i < 3 and input_.get(c) and c >= '0' and c <= '7'; ++i)
00076 digits += c;
00077 if (input_ or c < '0' or c > '7')
00078 input_.unget();
00079 unsigned long value(std::strtoul(digits.c_str(), 0, 8));
00080 str += static_cast<char>(static_cast<unsigned char>(value));
00081 } else {
00082 str += c;
00083 }
00084 }
00085
00086 void parser::get_string(std::string& result, char delimiter)
00087 {
00088 char c;
00089 while (input_.get(c)) {
00090 if (c == delimiter)
00091 return;
00092 else if (c == '\\')
00093 get_escape(result);
00094 else if (c == '\n')
00095 throw syntax_error("unterminated string");
00096 else
00097 result += c;
00098 }
00099 throw syntax_error("unterminated string");
00100 }
00101
00102 void parser::get_identifier(std::string& identifier)
00103 {
00104 identifier.clear();
00105 char c;
00106 if (not input_.get(c))
00107 return;
00108 if (not isalpha(c))
00109 throw syntax_error("expected alphabetic, got " + charify(c));
00110 identifier += c;
00111 while (input_.get(c)) {
00112 if (not isalnum(c)) {
00113 input_.unget();
00114 return;
00115 }
00116 identifier += c;
00117 }
00118 return;
00119 }
00120
00121 void parser::push_back(std::string const& token, kind k)
00122 {
00123 kind_ = k;
00124 if (kind_ == eof)
00125 token_ = "end of line";
00126 else
00127 token_ = token;
00128 }
00129
00130 parser::kind parser::get_token(std::string& token)
00131 {
00132 if (not token_.empty())
00133 {
00134 kind result(kind_);
00135 token = token_;
00136
00137 token_.clear();
00138 kind_ = eof;
00139
00140 return result;
00141 }
00142
00143 char c;
00144 if (not (input_ >> c)) {
00145 token = "end of line";
00146 return eof;
00147 }
00148 if (isalpha(c)) {
00149 input_.unget();
00150 get_identifier(token);
00151 return identifier;
00152 }
00153
00154 if (c == '\'' or c == '"') {
00155
00156 token.clear();
00157 get_string(token, c);
00158 return string;
00159 }
00160
00161
00162 token.clear();
00163 if (c == '+' or c == '-' or c == '*' or c == '/' or c == '%' or c == '(' or c == ')' or c == '=' or c == ',') {
00164 token += c;
00165 return kind(c);
00166 }
00167
00168 if (c < '0' or c > '9') {
00169 input_.unget();
00170 throw syntax_error("expected digit, got " + charify(c));
00171 }
00172 while (c >= '0' and c <= '9') {
00173 token += c;
00174 if (not input_.get(c))
00175 return number;
00176 }
00177 if (c == '.') {
00178 token += c;
00179 if (not input_.get(c))
00180 throw syntax_error("unterminated number: expected digit after the decimal point");
00181 if (c < '0' or c > '9') {
00182 input_.unget();
00183 throw syntax_error("expected digit after decimal point, got " + charify(c));
00184 }
00185 while (c >= '0' and c <= '9') {
00186 token += c;
00187 if (not input_.get(c))
00188 return number;
00189 }
00190 }
00191 if (c == 'e' or c == 'E') {
00192 token += c;
00193 if (not input_.get(c))
00194 throw syntax_error("unterminated number: expected digit in the exponent");
00195 if (c == '-' or c == '+') {
00196 token += c;
00197 if (not input_.get(c))
00198 throw syntax_error("unterminated number: expected digit after sign in the exponent");
00199 }
00200 if (c < '0' or c > '9') {
00201 input_.unget();
00202 throw syntax_error("expected digit in the exponent, got " + charify(c));
00203 }
00204 while (c >= '0' and c <= '9') {
00205 token += c;
00206 if (not input_.get(c))
00207 return number;
00208 }
00209 }
00210 input_.unget();
00211 return number;
00212 }
00213
00214 bool parser::get_number(std::string const& token, node& result)
00215 {
00216 std::istringstream stream(token);
00217
00218 double value;
00219 if (not (stream >> value))
00220 return false;
00221 result = node(value);
00222 return true;
00223 }
00224
00225 void parser::get_definition(std::string& name, identifier_list& parameters, node& definition)
00226 {
00227
00228 kind k = get_token(name);
00229 if (k != identifier)
00230 throw syntax_error("expected IDENTIFIER, got " + name);
00231
00232 std::string token;
00233 k = get_token(token);
00234 if (k == '(') {
00235 get_namelist(std::back_inserter(parameters));
00236 k = get_token(token);
00237 }
00238
00239 if (k != '=')
00240 throw syntax_error("expected = in definition, got " + token);
00241
00242 if (not get_expr(definition))
00243 throw syntax_error("expected exprssion in assignment");
00244 }
00245
00246 bool parser::get_statement(std::ostream& output)
00247 {
00248 std::string token;
00249 kind k(get_token(token));
00250 if (k == eof)
00251 return false;
00252
00253 if (k == identifier and token == "def") {
00254 node definition;
00255 identifier_list parameters;
00256 get_definition(token, parameters, definition);
00257 set_function(token, node(parameters, definition));
00258 return true;
00259 }
00260
00261 if (k == identifier and token == "quit")
00262 std::exit(0);
00263
00264 if (k == identifier and token == "save") {
00265 std::string filename;
00266 if (get_token(filename) != string)
00267 throw syntax_error("expected FILENAME after save, got " + token);
00268 save_library(filename);
00269 output << "Library saved to " << filename << '\n';
00270 }
00271
00272 if (k == identifier and token == "load") {
00273 std::string filename;
00274 if (get_token(filename) != string)
00275 throw syntax_error("expected FILENAME after load, got " + token);
00276 load_library(filename);
00277 output << "Library loaded from " << filename << '\n';
00278 }
00279
00280 push_back(token, k);
00281 node n;
00282 if (not get_expr(n))
00283 return false;
00284 else {
00285
00286 output << n.evaluate() << '\n';
00287 return true;
00288 }
00289 }
00290
00291 bool parser::get_expr(node& result)
00292 {
00293 return get_add_expr(result);
00294 }
00295
00296 bool parser::get_add_expr(node& result)
00297 {
00298 if (not get_mul_expr(result))
00299 return false;
00300 std::string token;
00301 while (kind k = get_token(token)) {
00302 if (k != '+' and k != '-') {
00303 push_back(token, k);
00304 return true;
00305 } else {
00306 node right;
00307 if (not get_mul_expr(right))
00308 throw syntax_error("unterminated expression. Expected a multiplicative-expression after " + token);
00309 result = node(result, k, right);
00310 }
00311 }
00312 return true;
00313 }
00314
00315 bool parser::get_mul_expr(node& result)
00316 {
00317 if (not get_unary(result))
00318 return false;
00319 std::string token;
00320 while (kind k = get_token(token)) {
00321 if (k != '*' and k != '/') {
00322 push_back(token, k);
00323 return true;
00324 } else {
00325 node right;
00326 if (not get_unary(right))
00327 throw syntax_error("unterminated expression. Expected a unary-expression after " + token);
00328 result = node(result, k, right);
00329 }
00330 }
00331 return true;
00332 }
00333
00334 bool parser::get_unary(node& result)
00335 {
00336 std::string token;
00337 kind k = get_token(token);
00338 if (k == eof)
00339 return false;
00340 if (k == '-') {
00341 if (not get_primary(result))
00342 throw syntax_error("expected primary after unary " + token + ", got end of line");
00343 result = node(k, result);
00344 return true;
00345 } else if (k == '+') {
00346 if (not get_primary(result))
00347 throw syntax_error("expected primary after unary +, got end of line");
00348 return true;
00349 } else {
00350 push_back(token, k);
00351 return get_primary(result);
00352 }
00353 }
00354
00355 void parser::get_expr_list(node_list& result)
00356 {
00357 result.clear();
00358 std::string token;
00359 while (kind k = get_token(token)) {
00360 if (k == ')')
00361 return;
00362 push_back(token, k);
00363 node expr;
00364 if (not get_expr(expr))
00365 throw syntax_error("unexpected end of line in function argument");
00366 result.push_back(expr);
00367 k = get_token(token);
00368 if (k == ')')
00369 return;
00370 else if (k != ',')
00371 throw syntax_error("expected comma in argument list, got " + token);
00372 }
00373 throw syntax_error("unexpected end of line in function argument list");
00374 }
00375
00376 bool parser::get_primary(node& result)
00377 {
00378 std::string token;
00379 kind k = get_token(token);
00380 if (k == eof)
00381 return false;
00382
00383 if (k == '(') {
00384
00385 if (not get_expr(result))
00386 throw syntax_error("expected expression, got end of line");
00387 k = get_token(token);
00388 if (k != ')')
00389 throw syntax_error("expected ')', got " + token);
00390 else
00391 return true;
00392 }
00393
00394 if (k == number) {
00395
00396 if (not get_number(token, result))
00397 throw syntax_error("Invalid numeric literal: " + token);
00398 return true;
00399 }
00400
00401 if (k == identifier) {
00402
00403 std::string next;
00404 k = get_token(next);
00405 if (k == '(') {
00406
00407 node_list arguments;
00408 get_expr_list(arguments);
00409 result = node(token, arguments);
00410 } else {
00411 static const node_list no_arguments;
00412
00413 push_back(next, k);
00414 result = node(token);
00415 }
00416 return true;
00417 }
00418 throw syntax_error("expected a primary, got " + token);
00419 }
00420
00421 void parse_loop(std::istream& input, std::ostream& output)
00422 {
00423 std::string line;
00424
00425
00426
00427 for (output << "> "; std::getline(input, line); output << "> ") {
00428 std::istringstream input(line);
00429 parser p(input);
00430 try {
00431 while (p.get_statement(output)) {
00432
00433 }
00434 } catch(calc_error const& ex) {
00435 output << ex.what() << '\n';
00436 } catch(std::exception const& ex) {
00437 output << "exception: " << ex.what() << '\n';
00438 }
00439 }
00440 }