00001 #include <cstdlib>
00002 #include <iterator>
00003 #include <sstream>
00004
00005 #include "calc_error.hpp"
00006 #include "node.hpp"
00007 #include "parse.hpp"
00008 #include "variables.hpp"
00009
00010 parser::parser(std::istream& input)
00011 : input_(input),
00012 ctype_(std::use_facet<std::ctype<char> >(input.getloc())),
00013 token_(),
00014 kind_()
00015 {}
00016
00017 std::string parser::charify(char c)
00018 {
00019 if (c == '\a') return "\'\\a\'";
00020 if (c == '\b') return "\'\\b\'";
00021 if (c == '\f') return "\'\\f\'";
00022 if (c == '\n') return "\'\\n\'";
00023 if (c == '\r') return "\'\\r\'";
00024 if (c == '\t') return "\'\\t\'";
00025 if (c == '\v') return "\'\\v\'";
00026 if (c == '\'') return "\'\\'\'";
00027 if (c == '\\') return "\'\\\\\'";
00028
00029 if (isprint(c))
00030 return std::string("\'") + c + '\'';
00031 else {
00032 std::ostringstream stream;
00033 stream << "'\\x" << std::hex;
00034 stream.fill('0');
00035 stream.width(2);
00036 stream << (std::char_traits<char>::to_int_type(c) & 0xFF) << '\'';
00037 return stream.str();
00038 }
00039 }
00040
00041 void parser::get_escape(std::string& str)
00042 {
00043 char c;
00044 if (not input_.get(c))
00045 throw syntax_error("incomplete escape");
00046 if (c == '\n')
00047 return;
00048
00049 if (c == 'a')
00050 str += '\a';
00051 else if (c == 'b')
00052 str += '\b';
00053 else if (c == 'n')
00054 str += '\n';
00055 else if (c == 'f')
00056 str += '\f';
00057 else if (c == 'r')
00058 str += '\r';
00059 else if (c == 't')
00060 str += '\t';
00061 else if (c == 'v')
00062 str += '\v';
00063 else if (c == 'x') {
00064
00065 std::string digits;
00066 while(input_.get(c) and ctype_.is(ctype_.xdigit, c))
00067 digits += c;
00068 if (input_)
00069 input_.unget();
00070 unsigned long value(std::strtoul(digits.c_str(), 0, 16));
00071 str += static_cast<char>(static_cast<unsigned char>(value));
00072 } else if (c >= '0' and c <= '7') {
00073
00074 std::string digits;
00075 for (int i = 0; i < 3 and input_.get(c) and c >= '0' and c <= '7'; ++i)
00076 digits += c;
00077 if (input_ or c < '0' or c > '7')
00078 input_.unget();
00079 unsigned long value(std::strtoul(digits.c_str(), 0, 8));
00080 str += static_cast<char>(static_cast<unsigned char>(value));
00081 } else {
00082 str += c;
00083 }
00084 }
00085
00086 void parser::get_string(std::string& result, char delimiter)
00087 {
00088 char c;
00089 while (input_.get(c)) {
00090 if (c == delimiter)
00091 return;
00092 else if (c == '\\')
00093 get_escape(result);
00094 else if (c == '\n')
00095 throw syntax_error("unterminated string");
00096 else
00097 result += c;
00098 }
00099 throw syntax_error("unterminated string");
00100 }
00101
00102 void parser::get_identifier(std::string& identifier)
00103 {
00104 identifier.clear();
00105 char c;
00106 if (not input_.get(c))
00107 return;
00108 if (not isalpha(c))
00109 throw syntax_error("expected alphabetic, got " + charify(c));
00110 identifier += c;
00111 while (input_.get(c)) {
00112 if (not isalnum(c)) {
00113 input_.unget();
00114 return;
00115 }
00116 identifier += c;
00117 }
00118 return;
00119 }
00120
00121 void parser::push_back(std::string const& token, kind k)
00122 {
00123 kind_ = k;
00124 if (kind_ == eof)
00125 token_ = "end of line";
00126 else
00127 token_ = token;
00128 }
00129
00130 parser::kind parser::get_token(std::string& token)
00131 {
00132 if (not token_.empty())
00133 {
00134 kind result(kind_);
00135 token = token_;
00136
00137 token_.clear();
00138 kind_ = eof;
00139
00140 return result;
00141 }
00142
00143 char c;
00144 if (not (input_ >> c)) {
00145 token = "end of line";
00146 return eof;
00147 }
00148 if (isalpha(c)) {
00149 input_.unget();
00150 get_identifier(token);
00151 return identifier;
00152 }
00153
00154 if (c == '\'' or c == '"') {
00155
00156 token.clear();
00157 get_string(token, c);
00158 return string;
00159 }
00160
00161
00162 token.clear();
00163 if (c == '+' or c == '-' or c == '*' or c == '/' or c == '%' or c == '(' or c == ')' or c == '=' or c == ',') {
00164 token += c;
00165 return kind(c);
00166 }
00167
00168 if (c < '0' or c > '9') {
00169 input_.unget();
00170 throw syntax_error("expected digit, got " + charify(c));
00171 }
00172
00173 kind k = integer;
00174
00175 while (c >= '0' and c <= '9') {
00176 token += c;
00177 if (not input_.get(c))
00178 return k;
00179 }
00180 if (c == '.') {
00181 k = floating_point;
00182 token += c;
00183 if (not input_.get(c))
00184 throw syntax_error("unterminated number: expected digit after the decimal point");
00185 if (c < '0' or c > '9') {
00186 input_.unget();
00187 throw syntax_error("expected digit after decimal point, got " + charify(c));
00188 }
00189 while (c >= '0' and c <= '9') {
00190 token += c;
00191 if (not input_.get(c))
00192 return k;
00193 }
00194 }
00195 if (c == 'e' or c == 'E') {
00196 k = floating_point;
00197 token += c;
00198 if (not input_.get(c))
00199 throw syntax_error("unterminated number: expected digit in the exponent");
00200 if (c == '-' or c == '+') {
00201 token += c;
00202 if (not input_.get(c))
00203 throw syntax_error("unterminated number: expected digit after sign in the exponent");
00204 }
00205 if (c < '0' or c > '9') {
00206 input_.unget();
00207 throw syntax_error("expected digit in the exponent, got " + charify(c));
00208 }
00209 while (c >= '0' and c <= '9') {
00210 token += c;
00211 if (not input_.get(c))
00212 return k;
00213 }
00214 }
00215 input_.unget();
00216 return k;
00217 }
00218
00219 bool parser::get_integer(std::string const& token, node& result)
00220 {
00221 std::istringstream stream(token);
00222
00223 long value;
00224 if (not (stream >> value))
00225 return false;
00226 result = node(number(value));
00227 return true;
00228 }
00229
00230 bool parser::get_float(std::string const& token, node& result)
00231 {
00232 std::istringstream stream(token);
00233
00234 double value;
00235 if (not (stream >> value))
00236 return false;
00237 result = node(number(value));
00238 return true;
00239 }
00240
00241 void parser::get_definition(std::string& name, identifier_list& parameters, node& definition)
00242 {
00243
00244 kind k = get_token(name);
00245 if (k != identifier)
00246 throw syntax_error("expected IDENTIFIER, got " + name);
00247
00248 std::string token;
00249 k = get_token(token);
00250 if (k == '(') {
00251 get_namelist(std::back_inserter(parameters));
00252 k = get_token(token);
00253 }
00254
00255 if (k != '=')
00256 throw syntax_error("expected = in definition, got " + token);
00257
00258 if (not get_expr(definition))
00259 throw syntax_error("expected exprssion in assignment");
00260 }
00261
00262 bool parser::get_statement(std::ostream& output)
00263 {
00264 std::string token;
00265 kind k(get_token(token));
00266 if (k == eof)
00267 return false;
00268
00269 if (k == identifier and token == "def") {
00270 node definition;
00271 identifier_list parameters;
00272 get_definition(token, parameters, definition);
00273 set_function(token, node(parameters, definition));
00274 return true;
00275 }
00276
00277 if (k == identifier and token == "quit")
00278 std::exit(0);
00279
00280 if (k == identifier and token == "save") {
00281 std::string filename;
00282 if (get_token(filename) != string)
00283 throw syntax_error("expected FILENAME after save, got " + token);
00284 save_library(filename);
00285 output << "Library saved to " << filename << '\n';
00286 }
00287
00288 if (k == identifier and token == "load") {
00289 std::string filename;
00290 if (get_token(filename) != string)
00291 throw syntax_error("expected FILENAME after load, got " + token);
00292 load_library(filename);
00293 output << "Library loaded from " << filename << '\n';
00294 }
00295
00296 push_back(token, k);
00297 node n;
00298 if (not get_expr(n))
00299 return false;
00300 else {
00301
00302 output << n.evaluate() << '\n';
00303 return true;
00304 }
00305 }
00306
00307 bool parser::get_expr(node& result)
00308 {
00309 return get_add_expr(result);
00310 }
00311
00312 bool parser::get_add_expr(node& result)
00313 {
00314 if (not get_mul_expr(result))
00315 return false;
00316 std::string token;
00317 while (kind k = get_token(token)) {
00318 if (k != '+' and k != '-') {
00319 push_back(token, k);
00320 return true;
00321 } else {
00322 node right;
00323 if (not get_mul_expr(right))
00324 throw syntax_error("unterminated expression. Expected a multiplicative-expression after " + token);
00325 result = node(result, k, right);
00326 }
00327 }
00328 return true;
00329 }
00330
00331 bool parser::get_mul_expr(node& result)
00332 {
00333 if (not get_unary(result))
00334 return false;
00335 std::string token;
00336 while (kind k = get_token(token)) {
00337 if (k != '*' and k != '/') {
00338 push_back(token, k);
00339 return true;
00340 } else {
00341 node right;
00342 if (not get_unary(right))
00343 throw syntax_error("unterminated expression. Expected a unary-expression after " + token);
00344 result = node(result, k, right);
00345 }
00346 }
00347 return true;
00348 }
00349
00350 bool parser::get_unary(node& result)
00351 {
00352 std::string token;
00353 kind k = get_token(token);
00354 if (k == eof)
00355 return false;
00356 if (k == '-') {
00357 if (not get_primary(result))
00358 throw syntax_error("expected primary after unary " + token + ", got end of line");
00359 result = node(k, result);
00360 return true;
00361 } else if (k == '+') {
00362 if (not get_primary(result))
00363 throw syntax_error("expected primary after unary +, got end of line");
00364 return true;
00365 } else {
00366 push_back(token, k);
00367 return get_primary(result);
00368 }
00369 }
00370
00371 void parser::get_expr_list(node_list& result)
00372 {
00373 result.clear();
00374 std::string token;
00375 while (kind k = get_token(token)) {
00376 if (k == ')')
00377 return;
00378 push_back(token, k);
00379 node expr;
00380 if (not get_expr(expr))
00381 throw syntax_error("unexpected end of line in function argument");
00382 result.push_back(expr);
00383 k = get_token(token);
00384 if (k == ')')
00385 return;
00386 else if (k != ',')
00387 throw syntax_error("expected comma in argument list, got " + token);
00388 }
00389 throw syntax_error("unexpected end of line in function argument list");
00390 }
00391
00392 bool parser::get_primary(node& result)
00393 {
00394 std::string token;
00395 kind k = get_token(token);
00396 if (k == eof)
00397 return false;
00398
00399 if (k == '(') {
00400
00401 if (not get_expr(result))
00402 throw syntax_error("expected expression, got end of line");
00403 k = get_token(token);
00404 if (k != ')')
00405 throw syntax_error("expected ')', got " + token);
00406 else
00407 return true;
00408 }
00409
00410 if (k == integer) {
00411
00412 if (not get_integer(token, result))
00413 throw syntax_error("Invalid integer literal: " + token);
00414 return true;
00415 }
00416
00417 if (k == floating_point) {
00418
00419 if (not get_float(token, result))
00420 throw syntax_error("Invalid integer literal: " + token);
00421 return true;
00422 }
00423
00424 if (k == identifier) {
00425
00426 std::string next;
00427 k = get_token(next);
00428 if (k == '(') {
00429
00430 node_list arguments;
00431 get_expr_list(arguments);
00432 result = node(token, arguments);
00433 } else {
00434 static const node_list no_arguments;
00435
00436 push_back(next, k);
00437 result = node(token);
00438 }
00439 return true;
00440 }
00441 throw syntax_error("expected a primary, got " + token);
00442 }
00443
00444 void parse_loop(std::istream& input, std::ostream& output)
00445 {
00446 std::string line;
00447
00448
00449
00450 for (output << "> "; std::getline(input, line); output << "> ") {
00451 std::istringstream input(line);
00452 parser p(input);
00453 try {
00454 while (p.get_statement(output)) {
00455
00456 }
00457 } catch(calc_error const& ex) {
00458 output << ex.what() << '\n';
00459 } catch(std::exception const& ex) {
00460 output << "exception: " << ex.what() << '\n';
00461 }
00462 }
00463 }