Files
MagicSetEditor2/src/script/parser.cpp
T
twanvl 8e3049d0ee improved error reporting for the keyword editor
git-svn-id: svn://svn.code.sf.net/p/magicseteditor/code/trunk@260 0fc631ac-6414-0410-93d0-97cfa31319b6
2007-04-16 23:45:01 +00:00

611 lines
22 KiB
C++

//+----------------------------------------------------------------------------+
//| Description: Magic Set Editor - Program to make Magic (tm) cards |
//| Copyright: (C) 2001 - 2006 Twan van Laarhoven |
//| License: GNU General Public License 2 or later (see file COPYING) |
//+----------------------------------------------------------------------------+
// ----------------------------------------------------------------------------- : Includes
#include <script/script.hpp>
#include <script/parser.hpp>
#include <script/to_value.hpp>
#include <util/error.hpp>
#include <util/io/package_manager.hpp> // for "include file" semi hack
#include <stack>
DECLARE_TYPEOF_COLLECTION(int);
#ifdef __WXMSW__
#define TokenType TokenType_ // some stupid windows header uses our name
#endif
// ----------------------------------------------------------------------------- : Tokenizing : class
enum TokenType
{ TOK_NAME // abc
, TOK_INT // 123
, TOK_DOUBLE // 123.0
, TOK_STRING // "asdf"
, TOK_OPER // + - * / . ;
, TOK_LPAREN // ( { [
, TOK_RPAREN // ) } ]
, TOK_DUMMY // placeholder for putBack
, TOK_EOF // end of input
};
/// Tokens produced by the TokenIterator
struct Token {
TokenType type;
String value;
bool newline; ///< Is there a newline between this token and the previous one?
inline bool operator == (TokenType t) const { return type == t; }
inline bool operator != (TokenType t) const { return type != t; }
inline bool operator == (const String& s) const { return type != TOK_STRING && value == s; }
inline bool operator != (const String& s) const { return type == TOK_STRING || value != s; }
};
enum OpenBrace
{ BRACE_STRING // "
, BRACE_STRING_MODE // fake brace for string mode
, BRACE_PAREN // (, [, {
};
/// Iterator over a string, one token at a time.
/** Also stores errors found when tokenizing or parsing */
class TokenIterator {
public:
TokenIterator(const String& str, bool string_mode, vector<ScriptParseError>& errors);
/// Peek at the next token, doesn't move to the one after that
/** Can peek further forward by using higher values of offset.
* offset=0 returns the last token that was read, or newline if putBack() was used.
*/
const Token& peek(size_t offset = 1);
/// Retrieve the next token
const Token& read();
/// Put back a token
/** Only one token can be correctly put back, the put back token will read as a newline.
*/
void putBack();
private:
String input;
size_t pos;
vector<Token> buffer; ///< buffer of unread tokens, front() = current
stack<OpenBrace> open_braces; ///< braces/quotes we entered from script mode
bool newline; ///< Did we just pass a newline?
// more input?
struct MoreInput {
String input;
size_t pos;
};
stack<MoreInput> more; ///< Read tokens from here when we are done with the current input
/// Add a token to the buffer, with the current newline value, resets newline
void addToken(TokenType type, const String& value);
/// Read the next token, and add it to the buffer
void readToken();
/// Read the next token which is a string (after the opening ")
void readStringToken();
public:
/// All errors found
vector<ScriptParseError>& errors;
/// Add an error message
void add_error(const String& message);
/// Expected some token instead of what was found
void expected(const String& exp);
};
// ----------------------------------------------------------------------------- : Characters
bool isAlpha_(Char c) { return isAlpha(c) || c==_('_'); }
bool isAlnum_(Char c) { return isAlnum(c) || c==_('_'); }
bool isOper (Char c) { return c==_('+') || c==_('-') || c==_('*') || c==_('/') || c==_('!') || c==_('.') ||
c==_(':') || c==_('=') || c==_('<') || c==_('>') || c==_(';') || c==_(','); }
bool isLparen(Char c) { return c==_('(') || c==_('[') || c==_('{'); }
bool isRparen(Char c) { return c==_(')') || c==_(']') || c==_('}'); }
bool isDigitOrDot(Char c) { return isDigit(c) || c==_('.'); }
bool isLongOper(const String& s) { return s==_(":=") || s==_("==") || s==_("!=") || s==_("<=") || s==_(">="); }
// ----------------------------------------------------------------------------- : Tokenizing
TokenIterator::TokenIterator(const String& str, bool string_mode, vector<ScriptParseError>& errors)
: input(str)
, pos(0)
, newline(false)
, errors(errors)
{
if (string_mode) {
open_braces.push(BRACE_STRING_MODE);
putBack();//dummy
readStringToken();
}
}
const Token& TokenIterator::peek(size_t offset) {
// read the next token until we have enough
while (buffer.size() <= offset) {
readToken();
}
return buffer[offset];
}
const Token& TokenIterator::read() {
if (!buffer.empty()) buffer.erase(buffer.begin());
return peek(0);
}
void TokenIterator::putBack() {
// Don't use addToken, because it canges newline
// Also, we want to push_front
Token t = {TOK_DUMMY, _(""), false};
buffer.insert(buffer.begin(), t);
}
void TokenIterator::addToken(TokenType type, const String& value) {
Token t = {type, value, newline};
buffer.push_back(t);
newline = false;
}
void TokenIterator::readToken() {
if (pos >= input.size()) {
// done with input, is there more?
if (!more.empty()) {
input = more.top().input;
pos = more.top().pos;
more.pop();
} else {
// EOF
addToken(TOK_EOF, _("end of input"));
}
return;
}
// read a character from the input
Char c = input.GetChar(pos++);
if (c == _('\n')) {
newline = true;
} else if (isSpace(c)) {
// ignore
} else if (is_substr(input, pos-1, _("include file:"))) {
// include a file
// HACK: This is not really the right place for it, but it's the best I've got
// filename
pos += 12; // "nclude file:"
size_t eol = input.find_first_of(_("\r\n"), pos);
if (eol == String::npos) eol = input.size();
String filename = trim(input.substr(pos, eol - pos));
// store the current input for later retrieval
MoreInput m = {input, eol};
more.push(m);
// open file
InputStreamP is = packages.openFileFromPackage(filename);
wxTextInputStream tis(*is);
// read the entire file, and start at the beginning of it
pos = 0;
input.clear();
while (!is->Eof()) input += tis.ReadLine() + _('\n');
} else if (isAlpha(c)) {
// name
size_t start = pos - 1;
while (pos < input.size() && isAlnum_(input.GetChar(pos))) ++pos;
addToken(TOK_NAME, cannocial_name_form(input.substr(start, pos-start))); // convert name to cannocial form
} else if (isDigit(c)) {
// number
size_t start = pos - 1;
while (pos < input.size() && isDigitOrDot(input.GetChar(pos))) ++pos;
String num = input.substr(start, pos-start);
addToken(
num.find_first_of('.') == String::npos ? TOK_INT : TOK_DOUBLE,
num
);
} else if (isOper(c)) {
// operator
if (pos < input.size() && isLongOper(input.substr(pos - 1, 2))) {
// long operator
addToken(TOK_OPER, input.substr(pos - 1, 2));
pos += 1;
} else {
addToken(TOK_OPER, input.substr(pos - 1, 1));
}
} else if (c==_('"')) {
// string
open_braces.push(BRACE_STRING);
readStringToken();
} else if (c == _('}') && !open_braces.empty() && open_braces.top() != BRACE_PAREN) {
// closing smart string, resume to string parsing
// "a{e}b" --> "a" "{ e }" "b"
addToken(TOK_RPAREN, _("}\""));
readStringToken();
} else if (isLparen(c)) {
// paranthesis/brace
open_braces.push(BRACE_PAREN);
addToken(TOK_LPAREN, String(1,c));
} else if (isRparen(c)) {
// paranthesis/brace
if (!open_braces.empty()) open_braces.pop();
addToken(TOK_RPAREN, String(1,c));
} else if(c==_('#')) {
// comment untill end of line
while (pos < input.size() && input[pos] != _('\n')) ++pos;
} else {
add_error(_("Unknown character in script: '") + String(1,c) + _("'"));
// just skip the character
}
}
void TokenIterator::readStringToken() {
String str;
while (true) {
if (pos >= input.size()) {
if (!open_braces.empty() && open_braces.top() == BRACE_STRING_MODE) {
// in string mode: end of input = end of string
addToken(TOK_STRING, str);
return;
} else {
add_error(_("Unexpected end of input in string constant"));
// fix up
addToken(TOK_STRING, str);
return;
}
}
Char c = input.GetChar(pos++);
// parse the string constant
if (c == _('"') && !open_braces.empty() && open_braces.top() == BRACE_STRING) {
// end of string
addToken(TOK_STRING, str);
open_braces.pop();
return;
} else if (c == _('\\')) {
// escape
if (pos >= input.size()) {
add_error(_("Unexpected end of input in string constant"));
// fix up
addToken(TOK_STRING, str);
return;
}
c = input.GetChar(pos++);
if (c == _('n')) str += _('\n');
else if (c == _('<')) str += _('\1'); // escape for <
else str += c; // \ or { or "
} else if (c == _('{')) {
// smart string
// "a{e}b" --> "a" "{ e }" "b"
addToken(TOK_STRING, str);
addToken(TOK_LPAREN, _("\"{"));
return;
} else {
str += c;
}
}
}
void TokenIterator::add_error(const String& message) {
if (!errors.empty() && errors.back().start == pos) return; // already an error here
errors.push_back(ScriptParseError(pos, message));
}
void TokenIterator::expected(const String& expected) {
size_t error_pos = pos - peek(0).value.size();
if (!errors.empty() && errors.back().start == pos) return; // already an error here
errors.push_back(ScriptParseError(error_pos, expected, peek(0).value));
}
// ----------------------------------------------------------------------------- : Parsing
/// Precedence levels for parsing, higher = tighter
enum Precedence
{ PREC_ALL
, PREC_NEWLINE // newline ;
, PREC_SEQ // ;
, PREC_SET // :=
, PREC_AND // and or
, PREC_CMP // == != < > <= >=
, PREC_ADD // + -
, PREC_MUL // * / mod
, PREC_UNARY // - not (unary operators)
, PREC_FUN // [] () . (function call, member)
, PREC_STRING // +{ }+ (smart string operators)
, PREC_NONE
};
/// Parse an expression
/** @param input Read tokens from the input
* @param scrip Add resulting instructions to the script
* @param minPrec Minimum precedence level for operators
* NOTE: The net stack effect of an expression should be +1
*/
void parseExpr(TokenIterator& input, Script& script, Precedence minPrec);
/// Parse an expression, possibly with operators applied. Optionally adds an instruction at the end.
/** @param input Read tokens from the input
* @param script Add resulting instructions to the script
* @param minPrec Minimum precedence level for operators
* @param closeWith Add this instruction at the end
* @param closeWithData Data for the instruction at the end
* NOTE: The net stack effect of an expression should be +1
*/
void parseOper(TokenIterator& input, Script& script, Precedence minPrec, InstructionType closeWith = I_NOP, int closeWithData = 0);
ScriptP parse(const String& s, bool string_mode, vector<ScriptParseError>& errors_out) {
errors_out.clear();
// parse
TokenIterator input(s, string_mode, errors_out);
ScriptP script(new Script);
parseOper(input, *script, PREC_ALL, I_RET);
Token eof = input.read();
if (eof != TOK_EOF) {
input.expected(_("end of input"));
}
// were there errors?
if (errors_out.empty()) {
return script;
} else {
return ScriptP();
}
}
ScriptP parse(const String& s, bool string_mode) {
vector<ScriptParseError> errors;
ScriptP script = parse(s, string_mode, errors);
if (!errors.empty()) {
throw ScriptParseErrors(errors);
}
return script;
}
// Expect a token, adds an error if it is not found
bool expectToken(TokenIterator& input, const Char* expect, const Char* name_in_error = nullptr) {
Token token = input.read();
if (token == expect) {
return true;
} else {
input.expected(name_in_error ? name_in_error : expect);
return false;
}
}
void parseExpr(TokenIterator& input, Script& script, Precedence minPrec) {
// usually loop only once, unless we encounter newlines
while (true) {
const Token& token = input.read();
if (token == _("(")) {
// Parentheses = grouping for precedence of expressions
parseOper(input, script, PREC_ALL);
expectToken(input, _(")"));
} else if (token == _("{")) {
// {} = function block. Parse a new Script
intrusive_ptr<Script> subScript(new Script);
parseOper(input, *subScript, PREC_ALL, I_RET);
expectToken(input, _("}"));
script.addInstruction(I_PUSH_CONST, subScript);
} else if (minPrec <= PREC_UNARY && token == _("-")) {
parseOper(input, script, PREC_UNARY, I_UNARY, I_NEGATE); // unary negation
} else if (token == TOK_NAME) {
if (minPrec <= PREC_UNARY && token == _("not")) {
parseOper(input, script, PREC_UNARY, I_UNARY, I_NOT); // unary not
} else if (token == _("true")) {
script.addInstruction(I_PUSH_CONST, script_true); // boolean constant : true
} else if (token == _("false")) {
script.addInstruction(I_PUSH_CONST, script_false); // boolean constant : false
} else if (token == _("if")) {
// if AAA then BBB else CCC
unsigned int jmpElse, jmpEnd;
parseOper(input, script, PREC_SET); // AAA
jmpElse = script.getLabel(); // jmp_else:
script.addInstruction(I_JUMP_IF_NOT, 0xFFFF); // jnz lbl_else
expectToken(input, _("then")); // then
parseOper(input, script, PREC_SET); // BBB
jmpEnd = script.getLabel(); // jmp_end:
script.addInstruction(I_JUMP, 0xFFFF); // jump lbl_end
script.comeFrom(jmpElse); // lbl_else:
if (input.read() == _("else")) { // else
parseOper(input, script, PREC_SET); // CCC
} else {
script.addInstruction(I_PUSH_CONST, script_nil);
}
script.comeFrom(jmpEnd); // lbl_end:
} else if (token == _("for")) {
unsigned int lblStart;
// the loop body should have a net stack effect of 0, but the entire expression of +1
// solution: add all results from the body, start with nil
if (input.peek() == _("each")) {
// for each AAA in BBB do CCC
input.read(); // each
Token name = input.read(); // AAA
if (name != TOK_NAME) {
input.expected(_("name"));
}
expectToken(input, _("in")); // in
parseOper(input, script, PREC_SET); // BBB
script.addInstruction(I_UNARY, I_ITERATOR_C); // iterator_collection
script.addInstruction(I_PUSH_CONST, script_nil); // push nil
lblStart = script.getLabel(); // lbl_start:
script.addInstruction(I_LOOP, 0xFFFF); // loop
expectToken(input, _("do")); // do
script.addInstruction(I_SET_VAR,
string_to_variable(name.value));// set name
script.addInstruction(I_POP); // pop
parseOper(input, script, PREC_SET, I_BINARY, I_ADD);// CCC; add
script.addInstruction(I_JUMP, lblStart); // jump lbl_start
script.comeFrom(lblStart); // lbl_end:
} else {
// for AAA from BBB to CCC do DDD
Token name = input.read(); // AAA
expectToken(input, _("from")); // from
parseOper(input, script, PREC_SET); // BBB
expectToken(input, _("to")); // to
parseOper(input, script, PREC_SET); // CCC
script.addInstruction(I_BINARY, I_ITERATOR_R); // iterator_range
script.addInstruction(I_PUSH_CONST, script_nil); // push nil
lblStart = script.getLabel(); // lbl_start:
script.addInstruction(I_LOOP, 0xFFFF); // loop
expectToken(input, _("do")); // do
script.addInstruction(I_SET_VAR,
string_to_variable(name.value));// set name
script.addInstruction(I_POP); // pop
parseOper(input, script, PREC_SET, I_BINARY, I_ADD);// DDD; add
script.addInstruction(I_JUMP, lblStart); // jump lbl_start
script.comeFrom(lblStart); // lbl_end:
}
} else if (token == _("rgb")) {
// rgb(r, g, b)
expectToken(input, _("("));
parseOper(input, script, PREC_ALL); // r
expectToken(input, _(","));
parseOper(input, script, PREC_ALL); // g
expectToken(input, _(","));
parseOper(input, script, PREC_ALL); // b
expectToken(input, _(")"));
script.addInstruction(I_TERNARY, I_RGB);
} else {
// variable
unsigned int var = string_to_variable(token.value);
script.addInstruction(I_GET_VAR, var);
}
} else if (token == TOK_INT) {
long l = 0;
//l = lexical_cast<long>(token.value);
token.value.ToLong(&l);
script.addInstruction(I_PUSH_CONST, to_script(l));
} else if (token == TOK_DOUBLE) {
double d = 0;
//d = lexical_cast<double>(token.value);
token.value.ToDouble(&d);
script.addInstruction(I_PUSH_CONST, to_script(d));
} else if (token == TOK_STRING) {
script.addInstruction(I_PUSH_CONST, to_script(token.value));
} else {
input.expected(_("expression"));
return;
}
break;
}
}
void parseOper(TokenIterator& input, Script& script, Precedence minPrec, InstructionType closeWith, int closeWithData) {
parseExpr(input, script, minPrec); // first argument
// read any operators after an expression
// EBNF: expr = expr | expr oper expr
// without left recursion: expr = expr (oper expr)*
while (true) {
const Token& token = input.read();
if (token != TOK_OPER && token != TOK_NAME && token!=TOK_LPAREN) {
// not an operator-like token
input.putBack();
break;
}
if (minPrec <= PREC_SEQ && token==_(";")) {
Token next = input.peek(1);
if (next == TOK_RPAREN || next == TOK_EOF) {
// allow ; at end of expression without errors
break;
}
script.addInstruction(I_POP); // discard result of first expression
parseOper(input, script, PREC_SET);
} else if (minPrec <= PREC_SET && token==_(":=")) {
// We made a mistake, the part before the := should be a variable name,
// not an expression. Remove that instruction.
Instruction instr = script.getInstructions().back();
if (instr.instr != I_GET_VAR) {
input.add_error(_("Can only assign to variables"));
}
script.getInstructions().pop_back();
parseOper(input, script, PREC_SET, I_SET_VAR, instr.data);
}
else if (minPrec <= PREC_AND && token==_("and")) parseOper(input, script, PREC_CMP, I_BINARY, I_AND);
else if (minPrec <= PREC_AND && token==_("or" )) parseOper(input, script, PREC_CMP, I_BINARY, I_OR);
else if (minPrec <= PREC_CMP && token==_("=")) parseOper(input, script, PREC_ADD, I_BINARY, I_EQ);
else if (minPrec <= PREC_CMP && token==_("==")) parseOper(input, script, PREC_ADD, I_BINARY, I_EQ);
else if (minPrec <= PREC_CMP && token==_("!=")) parseOper(input, script, PREC_ADD, I_BINARY, I_NEQ);
else if (minPrec <= PREC_CMP && token==_("<")) parseOper(input, script, PREC_ADD, I_BINARY, I_LT);
else if (minPrec <= PREC_CMP && token==_(">")) parseOper(input, script, PREC_ADD, I_BINARY, I_GT);
else if (minPrec <= PREC_CMP && token==_("<=")) parseOper(input, script, PREC_ADD, I_BINARY, I_LE);
else if (minPrec <= PREC_CMP && token==_(">=")) parseOper(input, script, PREC_ADD, I_BINARY, I_GE);
else if (minPrec <= PREC_ADD && token==_("+")) parseOper(input, script, PREC_MUL, I_BINARY, I_ADD);
else if (minPrec <= PREC_ADD && token==_("-")) parseOper(input, script, PREC_MUL, I_BINARY, I_SUB);
else if (minPrec <= PREC_MUL && token==_("*")) parseOper(input, script, PREC_UNARY, I_BINARY, I_MUL);
else if (minPrec <= PREC_MUL && token==_("/")) parseOper(input, script, PREC_UNARY, I_BINARY, I_DIV);
else if (minPrec <= PREC_MUL && token==_("mod")) parseOper(input, script, PREC_UNARY, I_BINARY, I_MOD);
else if (minPrec <= PREC_FUN && token==_(".")) { // get member by name
const Token& token = input.read();
if (token == TOK_NAME || token == TOK_INT || token == TOK_DOUBLE || token == TOK_STRING) {
script.addInstruction(I_MEMBER_C, token.value);
} else {
input.expected(_("name"));
}
} else if (minPrec <= PREC_FUN && token==_("[")) { // get member by expr
parseOper(input, script, PREC_ALL, I_BINARY, I_MEMBER);
expectToken(input, _("]"));
} else if (minPrec <= PREC_FUN && token==_("(")) {
// function call, read arguments
vector<int> arguments;
Token t = input.peek();
while (t != _(")")) {
if (input.peek(2) == _(":")) {
// name: ...
arguments.push_back(string_to_variable(t.value));
input.read(); // skip the name
input.read(); // and the :
parseOper(input, script, PREC_SEQ);
} else {
// implicit "input" argument
arguments.push_back(string_to_variable(_("input")));
parseOper(input, script, PREC_SEQ);
}
t = input.peek();
if (t == _(",")) {
// Comma separating the arguments
input.read();
t = input.peek();
}
}
input.read(); // skip the )
// generate instruction
script.addInstruction(I_CALL, (unsigned int)arguments.size());
FOR_EACH(arg,arguments) {
script.addInstruction(I_NOP, arg);
}
} else if (minPrec <= PREC_STRING && token==_("\"{")) {
// for smart strings: "x" {{ e }} "y"
// optimize: "" + e -> e
Instruction i = script.getInstructions().back();
if (i.instr == I_PUSH_CONST && script.getConstants()[i.data]->toString().empty()) {
script.getInstructions().pop_back();
parseOper(input, script, PREC_ALL); // e
} else {
parseOper(input, script, PREC_ALL, I_BINARY, I_ADD); // e
}
if (expectToken(input, _("}\""), _("}"))) {
parseOper(input, script, PREC_NONE); // y
// optimize: e + "" -> e
i = script.getInstructions().back();
if (i.instr == I_PUSH_CONST && script.getConstants()[i.data]->toString().empty()) {
script.getInstructions().pop_back();
} else {
script.addInstruction(I_BINARY, I_ADD);
}
}
} else if (minPrec <= PREC_NEWLINE && token.newline) {
// newline functions as ;
// only if we don't match another token!
input.putBack();
script.addInstruction(I_POP);
parseOper(input, script, PREC_SET);
} else {
input.putBack();
break;
}
}
// add closing instruction
if (closeWith != I_NOP) {
script.addInstruction(closeWith, closeWithData);
}
}