parse.cpp

Go to the documentation of this file.
00001 #include <cstdlib>
00002 #include <iterator>
00003 #include <sstream>
00004 
00005 #include "calc_error.hpp"
00006 #include "node.hpp"
00007 #include "parse.hpp"
00008 #include "variables.hpp"
00009 
00010 parser::parser(std::istream& input)
00011 : input_(input),
00012   ctype_(std::use_facet<std::ctype<char> >(input.getloc())),
00013   token_(),
00014   kind_()
00015 {}
00016 
00017 std::string parser::charify(char c)
00018 {
00019   if (c == '\a') return "\'\\a\'";
00020   if (c == '\b') return "\'\\b\'";
00021   if (c == '\f') return "\'\\f\'";
00022   if (c == '\n') return "\'\\n\'";
00023   if (c == '\r') return "\'\\r\'";
00024   if (c == '\t') return "\'\\t\'";
00025   if (c == '\v') return "\'\\v\'";
00026   if (c == '\'') return "\'\\'\'";
00027   if (c == '\\') return "\'\\\\\'";
00028 
00029   if (isprint(c))
00030     return std::string("\'") + c + '\'';
00031   else {
00032     std::ostringstream stream;
00033     stream << "'\\x" << std::hex;
00034     stream.fill('0');
00035     stream.width(2);
00036     stream << (std::char_traits<char>::to_int_type(c) & 0xFF) << '\'';
00037     return stream.str();
00038   }
00039 }
00040 
00041 void parser::get_escape(std::string& str)
00042 {
00043   char c;
00044   if (not input_.get(c))
00045     throw syntax_error("incomplete escape");
00046   if (c == '\n')
00047     return;
00048 
00049   if (c == 'a')
00050     str += '\a';
00051   else if (c == 'b')
00052     str += '\b';
00053   else if (c == 'n')
00054     str += '\n';
00055   else if (c == 'f')
00056     str += '\f';
00057   else if (c == 'r')
00058     str += '\r';
00059   else if (c == 't')
00060     str += '\t';
00061   else if (c == 'v')
00062     str += '\v';
00063   else if (c == 'x') {
00064     // hexadecimal sequence
00065     std::string digits;
00066     while(input_.get(c) and ctype_.is(ctype_.xdigit, c))
00067       digits += c;
00068     if (input_)
00069       input_.unget();
00070     unsigned long value(std::strtoul(digits.c_str(), 0, 16));
00071     str += static_cast<char>(static_cast<unsigned char>(value));
00072   } else if (c >= '0' and c <= '7') {
00073     // octal sequence
00074     std::string digits;
00075     for (int i = 0; i < 3 and input_.get(c) and c >= '0' and c <= '7'; ++i)
00076       digits += c;
00077     if (input_ or c < '0' or c > '7')
00078       input_.unget();
00079     unsigned long value(std::strtoul(digits.c_str(), 0, 8));
00080     str += static_cast<char>(static_cast<unsigned char>(value));
00081   } else {
00082     str += c;
00083   }
00084 }
00085 
00086 void parser::get_string(std::string& result, char delimiter)
00087 {
00088   char c;
00089   while (input_.get(c)) {
00090     if (c == delimiter)
00091       return;
00092     else if (c == '\\')
00093       get_escape(result);
00094     else if (c == '\n')
00095       throw syntax_error("unterminated string");
00096     else
00097       result += c;
00098   }
00099   throw syntax_error("unterminated string");
00100 }
00101 
00102 void parser::get_identifier(std::string& identifier)
00103 {
00104   identifier.clear();
00105   char c;
00106   if (not input_.get(c))
00107     return;
00108   if (not isalpha(c))
00109     throw syntax_error("expected alphabetic, got " + charify(c));
00110   identifier += c;
00111   while (input_.get(c)) {
00112     if (not isalnum(c)) {
00113       input_.unget();
00114       return;
00115     }
00116     identifier += c;
00117   }
00118   return;
00119 }
00120 
00121 void parser::push_back(std::string const& token, kind k)
00122 {
00123   kind_ = k;
00124   if (kind_ == eof)
00125     token_ = "end of line";
00126   else
00127     token_ = token;
00128 }
00129 
00130 parser::kind parser::get_token(std::string& token)
00131 {
00132   if (not token_.empty())
00133   {
00134     kind result(kind_);
00135     token = token_;
00136 
00137     token_.clear();
00138     kind_ = eof;
00139 
00140     return result;
00141   }
00142 
00143   char c;
00144   if (not (input_ >> c)) {
00145     token = "end of line";
00146     return eof;
00147   }
00148   if (isalpha(c)) {
00149     input_.unget();
00150     get_identifier(token);
00151     return identifier;
00152   }
00153 
00154   if (c == '\'' or c == '"') {
00155     // Quoted string
00156     token.clear();
00157     get_string(token, c);
00158     return string;
00159   }
00160 
00161   // Get a numeric literal.
00162   token.clear();
00163   if (c == '+' or c == '-' or c == '*' or c == '/' or c == '%' or c == '(' or c == ')' or c == '=' or c == ',') {
00164     token += c;
00165     return kind(c);
00166   }
00167 
00168   if (c < '0' or c > '9') {
00169     input_.unget();
00170     throw syntax_error("expected digit, got " + charify(c));
00171   }
00172 
00173   kind k = integer;
00174 
00175   while (c >= '0' and c <= '9') {
00176     token += c;
00177     if (not input_.get(c))
00178       return k;
00179   }
00180   if (c == '.') {
00181     k = floating_point;
00182     token += c;
00183     if (not input_.get(c))
00184       throw syntax_error("unterminated number: expected digit after the decimal point");
00185     if (c < '0' or c > '9') {
00186       input_.unget();
00187       throw syntax_error("expected digit after decimal point, got " + charify(c));
00188     }
00189     while (c >= '0' and c <= '9') {
00190       token += c;
00191       if (not input_.get(c))
00192         return k;
00193     }
00194   }
00195   if (c == 'e' or c == 'E') {
00196     k = floating_point;
00197     token += c;
00198     if (not input_.get(c))
00199       throw syntax_error("unterminated number: expected digit in the exponent");
00200     if (c == '-' or c == '+') {
00201       token += c;
00202       if (not input_.get(c))
00203         throw syntax_error("unterminated number: expected digit after sign in the exponent");
00204     }
00205     if (c < '0' or c > '9') {
00206       input_.unget();
00207       throw syntax_error("expected digit in the exponent, got " + charify(c));
00208     }
00209     while (c >= '0' and c <= '9') {
00210       token += c;
00211       if (not input_.get(c))
00212         return k;
00213     }
00214   }
00215   input_.unget();
00216   return k;
00217 }
00218 
00219 bool parser::get_integer(std::string const& token, node& result)
00220 {
00221   std::istringstream stream(token);
00222   // If the value overflows or is otherwise invalid, return false.
00223   long value;
00224   if (not (stream >> value))
00225     return false;
00226   result = node(number(value));
00227   return true;
00228 }
00229 
00230 bool parser::get_float(std::string const& token, node& result)
00231 {
00232   std::istringstream stream(token);
00233   // If the value overflows or is otherwise invalid, return false.
00234   double value;
00235   if (not (stream >> value))
00236     return false;
00237   result = node(number(value));
00238   return true;
00239 }
00240 
00241 void parser::get_definition(std::string& name, identifier_list& parameters, node& definition)
00242 {
00243   // Define a variable.
00244   kind k = get_token(name);
00245   if (k != identifier)
00246     throw syntax_error("expected IDENTIFIER, got " + name);
00247 
00248   std::string token;
00249   k = get_token(token);
00250   if (k == '(') {
00251     get_namelist(std::back_inserter(parameters));
00252     k = get_token(token);
00253   }
00254 
00255   if (k != '=')
00256     throw syntax_error("expected = in definition, got " + token);
00257 
00258   if (not get_expr(definition))
00259     throw syntax_error("expected exprssion in assignment");
00260 }
00261 
00262 bool parser::get_statement(std::ostream& output)
00263 {
00264   std::string token;
00265   kind k(get_token(token));
00266   if (k == eof)
00267     return false;
00268 
00269   if (k == identifier and token == "def") {
00270     node definition;
00271     identifier_list parameters;
00272     get_definition(token, parameters, definition);
00273     set_function(token, node(parameters, definition));
00274     return true;
00275   }
00276 
00277   if (k == identifier and token == "quit")
00278     std::exit(0);
00279 
00280   if (k == identifier and token == "save") {
00281     std::string filename;
00282     if (get_token(filename) != string)
00283       throw syntax_error("expected FILENAME after save, got " + token);
00284     save_library(filename);
00285     output << "Library saved to " << filename << '\n';
00286   }
00287 
00288   if (k == identifier and token == "load") {
00289     std::string filename;
00290     if (get_token(filename) != string)
00291       throw syntax_error("expected FILENAME after load, got " + token);
00292     load_library(filename);
00293     output << "Library loaded from " << filename << '\n';
00294   }
00295   // Otherwise, the statement must be an expression.
00296   push_back(token, k);
00297   node n;
00298   if (not get_expr(n))
00299     return false;
00300   else {
00301     // Evaluate the expression and print the result.
00302     output << n.evaluate() << '\n';
00303     return true;
00304   }
00305 }
00306 
00307 bool parser::get_expr(node& result)
00308 {
00309   return get_add_expr(result);
00310 }
00311 
00312 bool parser::get_add_expr(node& result)
00313 {
00314   if (not get_mul_expr(result))
00315     return false;
00316   std::string token;
00317   while (kind k = get_token(token)) {
00318     if (k != '+' and k != '-') {
00319       push_back(token, k);
00320       return true;
00321     } else {
00322       node right;
00323       if (not get_mul_expr(right))
00324         throw syntax_error("unterminated expression. Expected a multiplicative-expression after " + token);
00325       result = node(result, k, right);
00326     }
00327   }
00328   return true;
00329 }
00330 
00331 bool parser::get_mul_expr(node& result)
00332 {
00333   if (not get_unary(result))
00334     return false;
00335   std::string token;
00336   while (kind k = get_token(token)) {
00337     if (k != '*' and k != '/') {
00338       push_back(token, k);
00339       return true;
00340     } else {
00341       node right;
00342       if (not get_unary(right))
00343         throw syntax_error("unterminated expression. Expected a unary-expression after " + token);
00344       result = node(result, k, right);
00345     }
00346   }
00347   return true;
00348 }
00349 
00350 bool parser::get_unary(node& result)
00351 {
00352   std::string token;
00353   kind k = get_token(token);
00354   if (k == eof)
00355     return false;
00356   if (k == '-') {
00357     if (not get_primary(result))
00358       throw syntax_error("expected primary after unary " + token + ", got end of line");
00359     result = node(k, result);
00360     return true;
00361   } else if (k == '+') {
00362     if (not get_primary(result))
00363       throw syntax_error("expected primary after unary +, got end of line");
00364     return true;
00365   } else {
00366     push_back(token, k);
00367     return get_primary(result);
00368   }
00369 }
00370 
00371 void parser::get_expr_list(node_list& result)
00372 {
00373   result.clear();
00374   std::string token;
00375   while (kind k = get_token(token)) {
00376     if (k == ')')
00377       return;
00378     push_back(token, k);
00379     node expr;
00380     if (not get_expr(expr))
00381       throw syntax_error("unexpected end of line in function argument");
00382     result.push_back(expr);
00383     k = get_token(token);
00384     if (k == ')')
00385       return;
00386     else if (k != ',')
00387       throw syntax_error("expected comma in argument list, got " + token);
00388   }
00389   throw syntax_error("unexpected end of line in function argument list");
00390 }
00391 
00392 bool parser::get_primary(node& result)
00393 {
00394   std::string token;
00395   kind k = get_token(token);
00396   if (k == eof)
00397     return false;
00398 
00399   if (k == '(') {
00400     // Parenthesized expression
00401     if (not get_expr(result))
00402       throw syntax_error("expected expression, got end of line");
00403     k = get_token(token);
00404     if (k != ')')
00405       throw syntax_error("expected ')', got " + token);
00406     else
00407       return true;
00408   }
00409 
00410   if (k == integer) {
00411     // Integer literal
00412     if (not get_integer(token, result))
00413       throw syntax_error("Invalid integer literal: " + token);
00414     return true;
00415   }
00416 
00417   if (k == floating_point) {
00418     // Integer literal
00419     if (not get_float(token, result))
00420       throw syntax_error("Invalid integer literal: " + token);
00421     return true;
00422   }
00423 
00424   if (k == identifier) {
00425     // Identifier: variable or function call
00426     std::string next;
00427     k = get_token(next);
00428     if (k == '(') {
00429         // function call
00430       node_list arguments;
00431       get_expr_list(arguments);
00432       result = node(token, arguments);
00433     } else {
00434       static const node_list no_arguments;
00435       // Variable reference or function call with no arguments
00436       push_back(next, k);
00437       result = node(token);
00438     }
00439     return true;
00440   }
00441   throw syntax_error("expected a primary, got " + token);
00442 }
00443 
00444 void parse_loop(std::istream& input, std::ostream& output)
00445 {
00446   std::string line;
00447   // No portable way to test whether the console is an interactive terminal
00448   // vs. a non-interactive file. If you have a system-specific way to test,
00449   // output the prompt only for the interactive case.
00450   for (output << "> "; std::getline(input, line); output << "> ") {
00451     std::istringstream input(line);
00452     parser p(input);
00453     try {
00454       while (p.get_statement(output)) {
00455         /* empty */
00456       }
00457     } catch(calc_error const& ex) {
00458       output << ex.what() << '\n';
00459     } catch(std::exception const& ex) {
00460       output << "exception: " << ex.what() << '\n';
00461     }
00462   }
00463 }

Generated on Sun Nov 30 10:06:52 2008 for Calculator by  doxygen 1.5.3