parse.cpp

Go to the documentation of this file.
00001 #include <cstdlib>
00002 #include <iterator>
00003 #include <sstream>
00004 
00005 #include "calc_error.hpp"
00006 #include "node.hpp"
00007 #include "parse.hpp"
00008 #include "variables.hpp"
00009 
00010 parser::parser(std::istream& input)
00011 : input_(input),
00012   ctype_(std::use_facet<std::ctype<char> >(input.getloc())),
00013   token_(),
00014   kind_()
00015 {}
00016 
00017 std::string parser::charify(char c)
00018 {
00019   if (c == '\a') return "\'\\a\'";
00020   if (c == '\b') return "\'\\b\'";
00021   if (c == '\f') return "\'\\f\'";
00022   if (c == '\n') return "\'\\n\'";
00023   if (c == '\r') return "\'\\r\'";
00024   if (c == '\t') return "\'\\t\'";
00025   if (c == '\v') return "\'\\v\'";
00026   if (c == '\'') return "\'\\'\'";
00027   if (c == '\\') return "\'\\\\\'";
00028 
00029   if (isprint(c))
00030     return std::string("\'") + c + '\'';
00031   else {
00032     std::ostringstream stream;
00033     stream << "'\\x" << std::hex;
00034     stream.fill('0');
00035     stream.width(2);
00036     stream << (std::char_traits<char>::to_int_type(c) & 0xFF) << '\'';
00037     return stream.str();
00038   }
00039 }
00040 
00041 void parser::get_escape(std::string& str)
00042 {
00043   char c;
00044   if (not input_.get(c))
00045     throw syntax_error("incomplete escape");
00046   if (c == '\n')
00047     return;
00048 
00049   if (c == 'a')
00050     str += '\a';
00051   else if (c == 'b')
00052     str += '\b';
00053   else if (c == 'n')
00054     str += '\n';
00055   else if (c == 'f')
00056     str += '\f';
00057   else if (c == 'r')
00058     str += '\r';
00059   else if (c == 't')
00060     str += '\t';
00061   else if (c == 'v')
00062     str += '\v';
00063   else if (c == 'x') {
00064     // hexadecimal sequence
00065     std::string digits;
00066     while(input_.get(c) and ctype_.is(ctype_.xdigit, c))
00067       digits += c;
00068     if (input_)
00069       input_.unget();
00070     unsigned long value(std::strtoul(digits.c_str(), 0, 16));
00071     str += static_cast<char>(static_cast<unsigned char>(value));
00072   } else if (c >= '0' and c <= '7') {
00073     // octal sequence
00074     std::string digits;
00075     for (int i = 0; i < 3 and input_.get(c) and c >= '0' and c <= '7'; ++i)
00076       digits += c;
00077     if (input_ or c < '0' or c > '7')
00078       input_.unget();
00079     unsigned long value(std::strtoul(digits.c_str(), 0, 8));
00080     str += static_cast<char>(static_cast<unsigned char>(value));
00081   } else {
00082     str += c;
00083   }
00084 }
00085 
00086 void parser::get_string(std::string& result, char delimiter)
00087 {
00088   char c;
00089   while (input_.get(c)) {
00090     if (c == delimiter)
00091       return;
00092     else if (c == '\\')
00093       get_escape(result);
00094     else if (c == '\n')
00095       throw syntax_error("unterminated string");
00096     else
00097       result += c;
00098   }
00099   throw syntax_error("unterminated string");
00100 }
00101 
00102 void parser::get_identifier(std::string& identifier)
00103 {
00104   identifier.clear();
00105   char c;
00106   if (not input_.get(c))
00107     return;
00108   if (not isalpha(c))
00109     throw syntax_error("expected alphabetic, got " + charify(c));
00110   identifier += c;
00111   while (input_.get(c)) {
00112     if (not isalnum(c)) {
00113       input_.unget();
00114       return;
00115     }
00116     identifier += c;
00117   }
00118   return;
00119 }
00120 
00121 void parser::push_back(std::string const& token, kind k)
00122 {
00123   kind_ = k;
00124   if (kind_ == eof)
00125     token_ = "end of line";
00126   else
00127     token_ = token;
00128 }
00129 
00130 parser::kind parser::get_token(std::string& token)
00131 {
00132   if (not token_.empty())
00133   {
00134     kind result(kind_);
00135     token = token_;
00136 
00137     token_.clear();
00138     kind_ = eof;
00139 
00140     return result;
00141   }
00142 
00143   char c;
00144   if (not (input_ >> c)) {
00145     token = "end of line";
00146     return eof;
00147   }
00148   if (isalpha(c)) {
00149     input_.unget();
00150     get_identifier(token);
00151     return identifier;
00152   }
00153 
00154   if (c == '\'' or c == '"') {
00155     // Quoted string
00156     token.clear();
00157     get_string(token, c);
00158     return string;
00159   }
00160 
00161   // Get a numeric literal.
00162   token.clear();
00163   if (c == '+' or c == '-' or c == '*' or c == '/' or c == '%' or c == '(' or c == ')' or c == '=' or c == ',') {
00164     token += c;
00165     return kind(c);
00166   }
00167 
00168   if (c < '0' or c > '9') {
00169     input_.unget();
00170     throw syntax_error("expected digit, got " + charify(c));
00171   }
00172   while (c >= '0' and c <= '9') {
00173     token += c;
00174     if (not input_.get(c))
00175       return number;
00176   }
00177   if (c == '.') {
00178     token += c;
00179     if (not input_.get(c))
00180       throw syntax_error("unterminated number: expected digit after the decimal point");
00181     if (c < '0' or c > '9') {
00182       input_.unget();
00183       throw syntax_error("expected digit after decimal point, got " + charify(c));
00184     }
00185     while (c >= '0' and c <= '9') {
00186       token += c;
00187       if (not input_.get(c))
00188         return number;
00189     }
00190   }
00191   if (c == 'e' or c == 'E') {
00192     token += c;
00193     if (not input_.get(c))
00194       throw syntax_error("unterminated number: expected digit in the exponent");
00195     if (c == '-' or c == '+') {
00196       token += c;
00197       if (not input_.get(c))
00198         throw syntax_error("unterminated number: expected digit after sign in the exponent");
00199     }
00200     if (c < '0' or c > '9') {
00201       input_.unget();
00202       throw syntax_error("expected digit in the exponent, got " + charify(c));
00203     }
00204     while (c >= '0' and c <= '9') {
00205       token += c;
00206       if (not input_.get(c))
00207         return number;
00208     }
00209   }
00210   input_.unget();
00211   return number;
00212 }
00213 
00214 bool parser::get_number(std::string const& token, node& result)
00215 {
00216   std::istringstream stream(token);
00217   // If the value overflows or is otherwise invalid, return false.
00218   double value;
00219   if (not (stream >> value))
00220     return false;
00221   result = node(value);
00222   return true;
00223 }
00224 
00225 void parser::get_definition(std::string& name, identifier_list& parameters, node& definition)
00226 {
00227   // Define a variable.
00228   kind k = get_token(name);
00229   if (k != identifier)
00230     throw syntax_error("expected IDENTIFIER, got " + name);
00231 
00232   std::string token;
00233   k = get_token(token);
00234   if (k == '(') {
00235     get_namelist(std::back_inserter(parameters));
00236     k = get_token(token);
00237   }
00238 
00239   if (k != '=')
00240     throw syntax_error("expected = in definition, got " + token);
00241 
00242   if (not get_expr(definition))
00243     throw syntax_error("expected exprssion in assignment");
00244 }
00245 
00246 bool parser::get_statement(std::ostream& output)
00247 {
00248   std::string token;
00249   kind k(get_token(token));
00250   if (k == eof)
00251     return false;
00252 
00253   if (k == identifier and token == "def") {
00254     node definition;
00255     identifier_list parameters;
00256     get_definition(token, parameters, definition);
00257     set_function(token, node(parameters, definition));
00258     return true;
00259   }
00260 
00261   if (k == identifier and token == "quit")
00262     std::exit(0);
00263 
00264   if (k == identifier and token == "save") {
00265     std::string filename;
00266     if (get_token(filename) != string)
00267       throw syntax_error("expected FILENAME after save, got " + token);
00268     save_library(filename);
00269     output << "Library saved to " << filename << '\n';
00270   }
00271 
00272   if (k == identifier and token == "load") {
00273     std::string filename;
00274     if (get_token(filename) != string)
00275       throw syntax_error("expected FILENAME after load, got " + token);
00276     load_library(filename);
00277     output << "Library loaded from " << filename << '\n';
00278   }
00279   // Otherwise, the statement must be an expression.
00280   push_back(token, k);
00281   node n;
00282   if (not get_expr(n))
00283     return false;
00284   else {
00285     // Evaluate the expression and print the result.
00286     output << n.evaluate() << '\n';
00287     return true;
00288   }
00289 }
00290 
00291 bool parser::get_expr(node& result)
00292 {
00293   return get_add_expr(result);
00294 }
00295 
00296 bool parser::get_add_expr(node& result)
00297 {
00298   if (not get_mul_expr(result))
00299     return false;
00300   std::string token;
00301   while (kind k = get_token(token)) {
00302     if (k != '+' and k != '-') {
00303       push_back(token, k);
00304       return true;
00305     } else {
00306       node right;
00307       if (not get_mul_expr(right))
00308         throw syntax_error("unterminated expression. Expected a multiplicative-expression after " + token);
00309       result = node(result, k, right);
00310     }
00311   }
00312   return true;
00313 }
00314 
00315 bool parser::get_mul_expr(node& result)
00316 {
00317   if (not get_unary(result))
00318     return false;
00319   std::string token;
00320   while (kind k = get_token(token)) {
00321     if (k != '*' and k != '/') {
00322       push_back(token, k);
00323       return true;
00324     } else {
00325       node right;
00326       if (not get_unary(right))
00327         throw syntax_error("unterminated expression. Expected a unary-expression after " + token);
00328       result = node(result, k, right);
00329     }
00330   }
00331   return true;
00332 }
00333 
00334 bool parser::get_unary(node& result)
00335 {
00336   std::string token;
00337   kind k = get_token(token);
00338   if (k == eof)
00339     return false;
00340   if (k == '-') {
00341     if (not get_primary(result))
00342       throw syntax_error("expected primary after unary " + token + ", got end of line");
00343     result = node(k, result);
00344     return true;
00345   } else if (k == '+') {
00346     if (not get_primary(result))
00347       throw syntax_error("expected primary after unary +, got end of line");
00348     return true;
00349   } else {
00350     push_back(token, k);
00351     return get_primary(result);
00352   }
00353 }
00354 
00355 void parser::get_expr_list(node_list& result)
00356 {
00357   result.clear();
00358   std::string token;
00359   while (kind k = get_token(token)) {
00360     if (k == ')')
00361       return;
00362     push_back(token, k);
00363     node expr;
00364     if (not get_expr(expr))
00365       throw syntax_error("unexpected end of line in function argument");
00366     result.push_back(expr);
00367     k = get_token(token);
00368     if (k == ')')
00369       return;
00370     else if (k != ',')
00371       throw syntax_error("expected comma in argument list, got " + token);
00372   }
00373   throw syntax_error("unexpected end of line in function argument list");
00374 }
00375 
00376 bool parser::get_primary(node& result)
00377 {
00378   std::string token;
00379   kind k = get_token(token);
00380   if (k == eof)
00381     return false;
00382 
00383   if (k == '(') {
00384     // Parenthesized expression
00385     if (not get_expr(result))
00386       throw syntax_error("expected expression, got end of line");
00387     k = get_token(token);
00388     if (k != ')')
00389       throw syntax_error("expected ')', got " + token);
00390     else
00391       return true;
00392   }
00393 
00394   if (k == number) {
00395     // Numeric literal
00396     if (not get_number(token, result))
00397       throw syntax_error("Invalid numeric literal: " + token);
00398     return true;
00399   }
00400 
00401   if (k == identifier) {
00402     // Identifier: variable or function call
00403     std::string next;
00404     k = get_token(next);
00405     if (k == '(') {
00406         // function call
00407       node_list arguments;
00408       get_expr_list(arguments);
00409       result = node(token, arguments);
00410     } else {
00411       static const node_list no_arguments;
00412       // Variable reference or function call with no arguments
00413       push_back(next, k);
00414       result = node(token);
00415     }
00416     return true;
00417   }
00418   throw syntax_error("expected a primary, got " + token);
00419 }
00420 
00421 void parse_loop(std::istream& input, std::ostream& output)
00422 {
00423   std::string line;
00424   // No portable way to test whether the console is an interactive terminal
00425   // vs. a non-interactive file. If you have a system-specific way to test,
00426   // output the prompt only for the interactive case.
00427   for (output << "> "; std::getline(input, line); output << "> ") {
00428     std::istringstream input(line);
00429     parser p(input);
00430     try {
00431       while (p.get_statement(output)) {
00432         /* empty */
00433       }
00434     } catch(calc_error const& ex) {
00435       output << ex.what() << '\n';
00436     } catch(std::exception const& ex) {
00437       output << "exception: " << ex.what() << '\n';
00438     }
00439   }
00440 }

Generated on Sun Nov 30 10:05:49 2008 for Calculator by  doxygen 1.5.3