diff --git a/doc/function/break_text.txt b/doc/function/break_text.txt new file mode 100644 index 00000000..f034c995 --- /dev/null +++ b/doc/function/break_text.txt @@ -0,0 +1,39 @@ +Function: break_text + +--Usage-- +> break_text(some_string, match: regular expression, in_context: regular expression) +> break_rule(match: ..., in_context: ...)(some_string) + +Break text by only keeping the parts of the input that match the regular expression. +The function returns a [[type:list]] of parts. + +If @in_context@ is given, the context must also match the string where the match is represented as <match>. + +This function is available in [[script:rule form]]. +When the @break_text@ is used many times the rule form is more efficient, because the regular expression is only compiled once. + +--Filter vs. break-- + +The function @filter_text@ is very similar to @break_text@, instead of returning a list it concatenates the items. +So for example where @break_text@ would return @["a","b","c"]@, @filter_text@ would return @"abc"@. +In fact, @filter_text@ could be implemented as +> filter_text := { for part in break_text() do part } + +--Parameters-- +! Parameter Type Description +| @input@ [[type:string]] String to replace in. +| @match@ [[type:regex]] Regular expression to match. +| @in_context@ [[type:regex]] (optional) Context to match + +--Examples-- +> break_text(match: "a", "banana") == ["a","a","a"] +> break_text(match: "na|.", "banana") == ["b","a","na","na"] +> break_text(match: "ap", "banana") == [] +> +> f := break_rule(match: "xx+") +> f("xyzxxxxyyzz") == ["xxxx"] + +--See also-- +| [[fun:filter_text|filter_text / filter_rule]] + Keep only the text matching a regular expression. + diff --git a/doc/function/filter_text.txt b/doc/function/filter_text.txt index eb81c63c..88c73ffc 100644 --- a/doc/function/filter_text.txt +++ b/doc/function/filter_text.txt @@ -9,7 +9,7 @@ Filter text by only keeping the parts of the input that match the regular expres If @in_context@ is given, the context must also match the string where the match is represented as <match>. This function is available in [[script:rule form]]. -When the filter is used many times the rule form is more efficient, because the regular expression is only compiled once. +When the @filter_text@ is used many times the rule form is more efficient, because the regular expression is only compiled once. --Parameters-- ! Parameter Type Description @@ -26,5 +26,7 @@ When the filter is used many times the rule form is more efficient, because the > f("xyzxxyyzz") == "xx" --See also-- +| [[fun:break_text|break_text / break_rule]] + Break text into parts each matching a regular expression. | [[fun:replace|replace / replace_rule]] Replace text matching a regular expression. diff --git a/doc/function/index.txt b/doc/function/index.txt index fe34aebe..dafb0ecc 100644 --- a/doc/function/index.txt +++ b/doc/function/index.txt @@ -16,6 +16,8 @@ These functions are built into the program, other [[type:function]]s can be defi Replace text matching a regular expression. | [[fun:filter_text|filter_text / filter_rule]] Keep only the text matching a regular expression. +| [[fun:break_text|break_text / break_rule]] + Break text into parts each matching a regular expression. | [[fun:sort_text|sort_text / sort_rule]] Sort the letters in a string using a custom order. | [[fun:contains]] Does a string contain another one? diff --git a/src/data/action/generic.hpp b/src/data/action/generic.hpp new file mode 100644 index 00000000..3f4aba36 --- /dev/null +++ b/src/data/action/generic.hpp @@ -0,0 +1,117 @@ +//+----------------------------------------------------------------------------+ +//| Description: Magic Set Editor - Program to make Magic (tm) cards | +//| Copyright: (C) 2001 - 2008 Twan van Laarhoven and "coppro" | +//| License: GNU General Public License 2 or later (see file COPYING) | +//+----------------------------------------------------------------------------+ + +#ifndef HEADER_DATA_ACTION_GENERIC +#define HEADER_DATA_ACTION_GENERIC + +/** @file data/action/generic.hpp + * + * Generic action stuff + */ + +// ----------------------------------------------------------------------------- : Includes + +#include +#include + +// ----------------------------------------------------------------------------- : Generic add/remove action + +enum AddingOrRemoving {ADD, REMOVE}; + +/// Adding or removing some objects from a vector +template +class GenericAddAction { + public: + GenericAddAction(AddingOrRemoving, const T& item, const vector& container); + GenericAddAction(AddingOrRemoving, const vector& items, const vector& container); + + String getName() const; + void perform(vector& container, bool to_undo) const; + + /// A step of removing/adding + struct Step { + inline Step(size_t pos, const T& item) : pos(pos), item(item) {} + size_t pos; + T item; + }; + bool adding; ///< Were objects added? (as opposed to removed) + vector steps; ///< Added/removed objects, sorted by ascending pos +}; + +// ----------------------------------------------------------------------------- : Implementation + +template +bool contains(const vector& items, const T& item) { + return find(items.begin(), items.end(), item) != items.end(); +} + +template +GenericAddAction::GenericAddAction(AddingOrRemoving ar, const T& item, const vector& container) + : adding(ar == ADD) +{ + if (ar == ADD) { + size_t pos = container.size(); + steps.push_back(Step(pos, item)); + } else { + for (size_t pos = 0 ; pos < container.size() ; ++pos) { + if (container[pos] == item) { + steps.push_back(Step(pos, item)); + return; + } + } + throw InternalError(_("Item to remove not found in container")); + } +} + +template +GenericAddAction::GenericAddAction(AddingOrRemoving ar, const vector& items, const vector& container) + : adding(ar == ADD) +{ + if (ar == ADD) { + size_t pos = container.size(); + for (vector::const_iterator it = items.begin() ; it != items.end() ; ++it) { + steps.push_back(Step(pos++, *it)); + } + } else { + for (size_t pos = 0 ; pos < container.size() ; ++pos) { + if (contains(items, container[pos])) { + steps.push_back(Step(pos, container[pos])); + return; + } + } + if (steps.size() != items.size()) { + throw InternalError(_("Item to remove not found in container")); + } + } +} + +template +String GenericAddAction::getName() const { + String type = type_name(steps.front().item) + (steps.size() == 1 ? _("") : _("s")); + return adding ? _ACTION_1_("add object", type) : _ACTION_1_("remove object", type); +} + +template +void GenericAddAction::perform(vector& container, bool to_undo) const { + if (adding != to_undo) { + // (re)insert the items + // ascending order, this is the reverse of removal + FOR_EACH_CONST(s, steps) { + assert(s.pos <= container.size()); + container.insert(container.begin() + s.pos, s.item); + } + } else { + // remove the items + // descending order, because earlier removals shift the rest of the vector + FOR_EACH_CONST_REVERSE(s, steps) { + assert(s.pos < container.size()); + container.erase(container.begin() + s.pos); + } + } +} + +// ----------------------------------------------------------------------------- : EOF +#endif diff --git a/src/script/functions/basic.cpp b/src/script/functions/basic.cpp index ee5ade71..80e48577 100644 --- a/src/script/functions/basic.cpp +++ b/src/script/functions/basic.cpp @@ -447,7 +447,7 @@ ScriptValueP replace_rule(Context& ctx) { ret->replacement = replace->toString(); } // in_context - SCRIPT_OPTIONAL_PARAM_N(String, _("in context"), in_context) { + SCRIPT_OPTIONAL_PARAM_C(String, in_context) { if (!ret->context.Compile(in_context, wxRE_ADVANCED)) { throw ScriptError(_("Error while compiling regular expression: '")+in_context+_("'")); } @@ -479,7 +479,7 @@ class ScriptFilterRule : public ScriptValue { bool ok = regex.GetMatch(&start, &len, 0); assert(ok); String inside = input.substr(start, len); // the match - String next_input = input.substr(start + len); // everything after the match + String next_input = input.substr(start + len); // everything after the match if (!context.IsValid() || context.Matches(input.substr(0,start) + _("") + next_input)) { // no context or context match ret += inside; @@ -497,7 +497,7 @@ class ScriptFilterRule : public ScriptValue { ScriptValueP filter_rule(Context& ctx) { // cached? SCRIPT_PARAM_C(String, match); - SCRIPT_PARAM_DEFAULT_N(String, _("in context"), in_context, String()); + SCRIPT_PARAM_DEFAULT_C(String, in_context, String()); // cache const int CACHE_SIZE = 6; @@ -540,6 +540,59 @@ SCRIPT_FUNCTION(filter_text) { return filter_rule(ctx)->eval(ctx); } +// ----------------------------------------------------------------------------- : Rules : regex filter/break + +class ScriptBreakRule : public ScriptValue { + public: + virtual ScriptType type() const { return SCRIPT_FUNCTION; } + virtual String typeName() const { return _("break_rule"); } + virtual ScriptValueP eval(Context& ctx) const { + SCRIPT_PARAM_C(String, input); + intrusive_ptr ret(new ScriptCustomCollection); + while (regex.Matches(input)) { + // match, append to result + size_t start, len; + bool ok = regex.GetMatch(&start, &len, 0); + assert(ok); + String inside = input.substr(start, len); // the match + String next_input = input.substr(start + len); // everything after the match + if (!context.IsValid() || context.Matches(input.substr(0,start) + _("") + next_input)) { + // no context or context match + ret->value.push_back(to_script(inside)); + } + input = next_input; + } + return ret; + } + + wxRegEx regex; ///< Regex to match + wxRegEx context; ///< Match only in a given context, optional +}; + +// Create a regular expression rule for breaking strings +ScriptValueP break_rule(Context& ctx) { + intrusive_ptr ret(new ScriptBreakRule); + // match + SCRIPT_PARAM_C(String, match); + if (!ret->regex.Compile(match, wxRE_ADVANCED)) { + throw ScriptError(_("Error while compiling regular expression: '")+match+_("'")); + } + // in_context + SCRIPT_OPTIONAL_PARAM_C(String, in_context) { + if (!ret->context.Compile(in_context, wxRE_ADVANCED)) { + throw ScriptError(_("Error while compiling regular expression: '")+in_context+_("'")); + } + } + return ret; +} + +SCRIPT_FUNCTION(break_rule) { + return break_rule(ctx); +} +SCRIPT_FUNCTION(break_text) { + return break_rule(ctx)->eval(ctx); +} + // ----------------------------------------------------------------------------- : Rules : regex match class ScriptMatchRule : public ScriptValue { @@ -657,10 +710,12 @@ void init_script_basic_functions(Context& ctx) { // advanced string rules/functions ctx.setVariable(_("replace"), script_replace); ctx.setVariable(_("filter text"), script_filter_text); + ctx.setVariable(_("break text"), script_break_text); ctx.setVariable(_("match"), script_match); ctx.setVariable(_("sort text"), script_sort_text); ctx.setVariable(_("replace rule"), script_replace_rule); ctx.setVariable(_("filter rule"), script_filter_rule); + ctx.setVariable(_("break rule"), script_break_rule); ctx.setVariable(_("match rule"), script_match_rule); ctx.setVariable(_("sort rule"), script_sort_rule); } diff --git a/src/script/functions/util.hpp b/src/script/functions/util.hpp index e4b2dd17..295802cb 100644 --- a/src/script/functions/util.hpp +++ b/src/script/functions/util.hpp @@ -136,6 +136,8 @@ inline Type from_script(const ScriptValueP& v, Variable var) { #define SCRIPT_PARAM_DEFAULT_N(Type, str, name, def) \ ScriptValueP name##_ = ctx.getVariableOpt(str); \ Type name = name##_ ? from_script(name##_, str) : def +#define SCRIPT_PARAM_DEFAULT_C(Type, name, def) \ + SCRIPT_PARAM_DEFAULT_N(Type, SCRIPT_VAR_ ## name, name, name) // ----------------------------------------------------------------------------- : Rules diff --git a/src/script/script.hpp b/src/script/script.hpp index 5ac5a891..6a50ed31 100644 --- a/src/script/script.hpp +++ b/src/script/script.hpp @@ -111,6 +111,7 @@ enum Variable , SCRIPT_VAR_in , SCRIPT_VAR_match , SCRIPT_VAR_replace +, SCRIPT_VAR_in_context , SCRIPT_VAR_order , SCRIPT_VAR_filter , SCRIPT_VAR_choice