mirror of
https://github.com/amyinspace/MagicSetEditor2.git
synced 2026-06-12 13:37:00 -04:00
Simplify regex code by moving things to the Results class, and by using iterators instead of positions
git-svn-id: svn://svn.code.sf.net/p/magicseteditor/code/trunk@1193 0fc631ac-6414-0410-93d0-97cfa31319b6
This commit is contained in:
+115
-102
@@ -15,7 +15,7 @@
|
|||||||
/* 2008-09-01:
|
/* 2008-09-01:
|
||||||
* Script profiling shows that the boost library is significantly faster:
|
* Script profiling shows that the boost library is significantly faster:
|
||||||
* When loading a large magic set (which calls ScriptManager::updateAll):
|
* When loading a large magic set (which calls ScriptManager::updateAll):
|
||||||
* function Calls wxRegex boost
|
* function Calls wxRegEx boost
|
||||||
* ------------------------------------------------------------------
|
* ------------------------------------------------------------------
|
||||||
* replace 3791 0.38607 0.20857
|
* replace 3791 0.38607 0.20857
|
||||||
* filter_text 11 0.32251 0.02446
|
* filter_text 11 0.32251 0.02446
|
||||||
@@ -43,8 +43,6 @@ class ScriptRegex : public ScriptValue {
|
|||||||
|
|
||||||
#if USE_BOOST_REGEX
|
#if USE_BOOST_REGEX
|
||||||
|
|
||||||
typedef boost::match_results<const Char*> Results;
|
|
||||||
|
|
||||||
ScriptRegex(const String& code) {
|
ScriptRegex(const String& code) {
|
||||||
// compile string
|
// compile string
|
||||||
try {
|
try {
|
||||||
@@ -56,56 +54,42 @@ class ScriptRegex : public ScriptValue {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct Results : public boost::match_results<const Char*> {
|
||||||
|
/// Get a sub match
|
||||||
|
inline String str(int sub = 0) const {
|
||||||
|
const_reference v = (*this)[sub];
|
||||||
|
return String(v.first, v.second);
|
||||||
|
}
|
||||||
|
/// Format a replacement string
|
||||||
|
inline String format(const String& format) const {
|
||||||
|
std::basic_string<Char> fmt(format.begin(),format.end());
|
||||||
|
String output;
|
||||||
|
boost::match_results<const Char*>::format(
|
||||||
|
insert_iterator<String>(output, output.end()), fmt, boost::format_sed);
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
inline bool matches(const String& str) {
|
inline bool matches(const String& str) {
|
||||||
return regex_search(str.c_str(), regex);
|
return regex_search(str.c_str(), regex);
|
||||||
}
|
}
|
||||||
inline bool matches(const String& str, Results& results) {
|
inline bool matches(Results& results, const Char* begin, const Char* end) {
|
||||||
return regex_search(str.c_str(), results, regex);
|
return regex_search(begin, end, results, regex);
|
||||||
}
|
|
||||||
inline size_t match_count(const Results& results) {
|
|
||||||
return results.size();
|
|
||||||
}
|
|
||||||
inline void get(const Results& results, size_t* start, size_t* length, int sub) {
|
|
||||||
*start = results.position(sub);
|
|
||||||
*length = results.length(sub);
|
|
||||||
}
|
|
||||||
inline String replace(const Results& results, const String&, const String& format) {
|
|
||||||
std::basic_string<Char> fmt; format_string(format,fmt);
|
|
||||||
String output;
|
|
||||||
results.format(insert_iterator<String>(output, output.end()), fmt);
|
|
||||||
return output;
|
|
||||||
}
|
}
|
||||||
inline void replace_all(String* input, const String& format) {
|
inline void replace_all(String* input, const String& format) {
|
||||||
std::basic_string<Char> fmt; format_string(format,fmt);
|
//std::basic_string<Char> fmt; format_string(format,fmt);
|
||||||
|
std::basic_string<Char> fmt(format.begin(),format.end());
|
||||||
String output;
|
String output;
|
||||||
regex_replace(insert_iterator<String>(output, output.end()),
|
regex_replace(insert_iterator<String>(output, output.end()),
|
||||||
input->begin(), input->end(), regex, fmt);
|
input->begin(), input->end(), regex, fmt, boost::format_sed);
|
||||||
*input = output;
|
*input = output;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
BoostRegex regex; ///< The regular expression
|
BoostRegex regex; ///< The regular expression
|
||||||
|
|
||||||
// convert wx style format string to boost style
|
|
||||||
// i.e. "&" -> "$&"
|
|
||||||
static void format_string(const String& format, std::basic_string<Char>& fmt) {
|
|
||||||
for (size_t i = 0 ; i < format.size() ; ++i) {
|
|
||||||
Char c = format.GetChar(i);
|
|
||||||
if (c == _('\\') && i + 1 < format.size()) {
|
|
||||||
fmt.append(format.begin()+i,format.begin()+i+2);
|
|
||||||
i++;
|
|
||||||
} else if (c == _('&')) {
|
|
||||||
fmt += _("$&");
|
|
||||||
} else {
|
|
||||||
fmt += c;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
struct Results{}; // dummy for compatability
|
|
||||||
|
|
||||||
ScriptRegex(const String& code) {
|
ScriptRegex(const String& code) {
|
||||||
// compile string
|
// compile string
|
||||||
if (!regex.Compile(code, wxRE_ADVANCED)) {
|
if (!regex.Compile(code, wxRE_ADVANCED)) {
|
||||||
@@ -114,19 +98,44 @@ class ScriptRegex : public ScriptValue {
|
|||||||
assert(regex.IsValid());
|
assert(regex.IsValid());
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool matches(const String& str, Results=Results()) {
|
// Interface for compatability with boost::regex
|
||||||
|
struct Results {
|
||||||
|
typedef pair<const Char*,const Char*> value_type; // (begin,end)
|
||||||
|
typedef value_type const_reference;
|
||||||
|
/// Number of submatches (+1 for the total match)
|
||||||
|
inline size_t size() const { return regex->GetMatchCount(); }
|
||||||
|
/// Get a submatch
|
||||||
|
inline value_type operator [] (int sub) const {
|
||||||
|
size_t pos, length;
|
||||||
|
bool ok = regex->GetMatch(&pos, &length, sub);
|
||||||
|
assert(ok);
|
||||||
|
return make_pair(begin + pos, begin + pos + length);
|
||||||
|
}
|
||||||
|
/// Get a sub match
|
||||||
|
inline String str(int sub = 0) const {
|
||||||
|
const_reference v = (*this)[sub];
|
||||||
|
return String(v.first, v.second);
|
||||||
|
}
|
||||||
|
/// Format a replacement string
|
||||||
|
inline String format(const String& format) const {
|
||||||
|
const_reference v = (*this)[0];
|
||||||
|
String inside(v.first, v.second);
|
||||||
|
regex->ReplaceFirst(&inside, format);
|
||||||
|
return inside;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
wxRegEx* regex;
|
||||||
|
const Char* begin;
|
||||||
|
friend class ScriptRegex;
|
||||||
|
};
|
||||||
|
|
||||||
|
inline bool matches(const String& str) {
|
||||||
return regex.Matches(str);
|
return regex.Matches(str);
|
||||||
}
|
}
|
||||||
inline size_t match_count(Results) {
|
inline bool matches(Results& results, const Char* begin, const Char* end) {
|
||||||
return regex.GetMatchCount();
|
results.regex = ®ex;
|
||||||
}
|
results.begin = begin;
|
||||||
inline void get(Results, size_t* start, size_t* length, int sub) {
|
return regex.Matches(begin, 0, end - begin);
|
||||||
bool ok = regex.GetMatch(start, length, sub);
|
|
||||||
assert(ok);
|
|
||||||
}
|
|
||||||
inline String replace(Results, String input, const String& format) {
|
|
||||||
regex.Replace(&input, format, 1);
|
|
||||||
return input;
|
|
||||||
}
|
}
|
||||||
inline void replace_all(String* input, const String& format) {
|
inline void replace_all(String* input, const String& format) {
|
||||||
regex.Replace(input, format);
|
regex.Replace(input, format);
|
||||||
@@ -136,6 +145,26 @@ class ScriptRegex : public ScriptValue {
|
|||||||
wxRegEx regex; ///< The regular expression
|
wxRegEx regex; ///< The regular expression
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
public:
|
||||||
|
/// Match only if in_context also matches
|
||||||
|
bool matches(Results& results, const String& str, const Char* begin, const ScriptRegexP& in_context) {
|
||||||
|
if (!in_context) {
|
||||||
|
return matches(results, begin, str.end());
|
||||||
|
} else {
|
||||||
|
while (matches(results, begin, str.end())) {
|
||||||
|
Results::const_reference match = results[0];
|
||||||
|
String context_str(str.begin(), match.first); // before
|
||||||
|
context_str += _("<match>");
|
||||||
|
context_str.append(match.second, str.end());
|
||||||
|
if (in_context->matches(context_str)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
begin = match.second; // skip
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
ScriptRegexP regex_from_script(const ScriptValueP& value) {
|
ScriptRegexP regex_from_script(const ScriptValueP& value) {
|
||||||
@@ -161,41 +190,36 @@ struct RegexReplacer {
|
|||||||
ScriptValueP replacement_function; ///< Replacement function instead of a simple string, optional
|
ScriptValueP replacement_function; ///< Replacement function instead of a simple string, optional
|
||||||
bool recursive; ///< Recurse into the replacement
|
bool recursive; ///< Recurse into the replacement
|
||||||
|
|
||||||
String apply(Context& ctx, String& input, int level = 0) const {
|
String apply(Context& ctx, const String& input, int level = 0) const {
|
||||||
// match first, then check context of match
|
|
||||||
String ret;
|
String ret;
|
||||||
|
const Char* start = input.begin();
|
||||||
ScriptRegex::Results results;
|
ScriptRegex::Results results;
|
||||||
while (match->matches(input, results)) {
|
while (match->matches(results, input, start, context)) {
|
||||||
// for each match ...
|
// for each match ...
|
||||||
size_t start, len;
|
ScriptRegex::Results::const_reference pos = results[0];
|
||||||
match->get(results, &start, &len, 0);
|
ret.append(start, pos.first); // everything before the match position stays
|
||||||
ret += input.substr(0, start); // everything before the match position stays
|
// determine replacement
|
||||||
String inside = input.substr(start, len); // inside the match
|
String inside;
|
||||||
String next_input = input.substr(start + len); // next loop the input is after this match
|
if (replacement_function) {
|
||||||
if (!context || context->matches(ret + _("<match>") + next_input)) {
|
// set match results in context
|
||||||
// the context matches -> perform replacement
|
for (UInt sub = 0 ; sub < results.size() ; ++sub) {
|
||||||
if (replacement_function) {
|
String name = sub == 0 ? _("input") : String(_("_")) << sub;
|
||||||
// set match results in context
|
ctx.setVariable(name, to_script(results.str(sub)));
|
||||||
for (UInt m = 0 ; m < match->match_count(results) ; ++m) {
|
|
||||||
match->get(results, &start, &len, m);
|
|
||||||
String name = m == 0 ? _("input") : String(_("_")) << m;
|
|
||||||
String value = input.substr(start, len);
|
|
||||||
ctx.setVariable(name, to_script(value));
|
|
||||||
}
|
|
||||||
// call
|
|
||||||
inside = replacement_function->eval(ctx)->toString();
|
|
||||||
} else {
|
|
||||||
inside = match->replace(results, inside, replacement_string); // replace inside
|
|
||||||
}
|
}
|
||||||
|
// call
|
||||||
|
inside = replacement_function->eval(ctx)->toString();
|
||||||
|
} else {
|
||||||
|
inside = results.format(replacement_string);
|
||||||
}
|
}
|
||||||
|
// append replaced inside
|
||||||
if (recursive && level < 20) {
|
if (recursive && level < 20) {
|
||||||
ret += apply(ctx, inside, level + 1);
|
ret += apply(ctx, inside, level + 1);
|
||||||
} else {
|
} else {
|
||||||
ret += inside;
|
ret += inside;
|
||||||
}
|
}
|
||||||
input = next_input;
|
start = pos.second;
|
||||||
}
|
}
|
||||||
ret += input;
|
ret.append(start, input.end());
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -244,18 +268,13 @@ SCRIPT_FUNCTION_WITH_SIMPLIFY(filter_text) {
|
|||||||
SCRIPT_OPTIONAL_PARAM_C_(ScriptRegexP, in_context);
|
SCRIPT_OPTIONAL_PARAM_C_(ScriptRegexP, in_context);
|
||||||
String ret;
|
String ret;
|
||||||
// find all matches
|
// find all matches
|
||||||
|
const Char* start = input.begin();
|
||||||
ScriptRegex::Results results;
|
ScriptRegex::Results results;
|
||||||
while (match->matches(input, results)) {
|
while (match->matches(results, input, start, in_context)) {
|
||||||
// match, append to result
|
// match, append to result
|
||||||
size_t start, len;
|
ScriptRegex::Results::const_reference pos = results[0];
|
||||||
match->get(results, &start, &len, 0);
|
ret.append(start, pos.second); // the match
|
||||||
String inside = input.substr(start, len); // the match
|
start = pos.second;
|
||||||
String next_input = input.substr(start + len); // everything after the match
|
|
||||||
if (!in_context || in_context->matches(input.substr(0,start) + _("<match>") + next_input)) {
|
|
||||||
// no context or context match
|
|
||||||
ret += inside;
|
|
||||||
}
|
|
||||||
input = next_input;
|
|
||||||
}
|
}
|
||||||
SCRIPT_RETURN(ret);
|
SCRIPT_RETURN(ret);
|
||||||
}
|
}
|
||||||
@@ -276,18 +295,12 @@ SCRIPT_FUNCTION_WITH_SIMPLIFY(break_text) {
|
|||||||
SCRIPT_OPTIONAL_PARAM_C_(ScriptRegexP, in_context);
|
SCRIPT_OPTIONAL_PARAM_C_(ScriptRegexP, in_context);
|
||||||
ScriptCustomCollectionP ret(new ScriptCustomCollection);
|
ScriptCustomCollectionP ret(new ScriptCustomCollection);
|
||||||
// find all matches
|
// find all matches
|
||||||
|
const Char* start = input.begin();
|
||||||
ScriptRegex::Results results;
|
ScriptRegex::Results results;
|
||||||
while (match->matches(input, results)) {
|
while (match->matches(results, input, start, in_context)) {
|
||||||
// match, append to result
|
// match, append to result
|
||||||
size_t start, len;
|
ret->value.push_back(to_script(results.str()));
|
||||||
match->get(results, &start, &len, 0);
|
start = results[0].second;
|
||||||
String inside = input.substr(start, len); // the match
|
|
||||||
String next_input = input.substr(start + len); // everything after the match
|
|
||||||
if (!in_context || in_context->matches(input.substr(0,start) + _("<match>") + next_input)) {
|
|
||||||
// no context or context match
|
|
||||||
ret->value.push_back(to_script(inside));
|
|
||||||
}
|
|
||||||
input = next_input;
|
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@@ -308,18 +321,18 @@ SCRIPT_FUNCTION_WITH_SIMPLIFY(split_text) {
|
|||||||
SCRIPT_PARAM_DEFAULT_N(bool, _("include empty"), include_empty, true);
|
SCRIPT_PARAM_DEFAULT_N(bool, _("include empty"), include_empty, true);
|
||||||
ScriptCustomCollectionP ret(new ScriptCustomCollection);
|
ScriptCustomCollectionP ret(new ScriptCustomCollection);
|
||||||
// find all matches
|
// find all matches
|
||||||
|
const Char* start = input.begin();
|
||||||
ScriptRegex::Results results;
|
ScriptRegex::Results results;
|
||||||
while (match->matches(input, results)) {
|
while (match->matches(results, start, input.end())) {
|
||||||
// match, append to result
|
// match, append the part before it to the result
|
||||||
size_t start, len;
|
ScriptRegex::Results::const_reference pos = results[0];
|
||||||
match->get(results, &start, &len, 0);
|
if (include_empty || pos.first != start) {
|
||||||
if (include_empty || start > 0) {
|
ret->value.push_back(to_script( String(start,pos.first) ));
|
||||||
ret->value.push_back(to_script(input.substr(0,start)));
|
|
||||||
}
|
}
|
||||||
input = input.substr(start + len); // everything after the match
|
start = pos.second;
|
||||||
}
|
}
|
||||||
if (include_empty || !input.empty()) {
|
if (include_empty || start != input.end()) {
|
||||||
ret->value.push_back(to_script(input));
|
ret->value.push_back(to_script( String(start,input.end()) ));
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user