mirror of
https://github.com/amyinspace/MagicSetEditor2.git
synced 2026-06-10 21:06:59 -04:00
Rewrite of keyword matching code. This fixes #20
Uses new iterator based tagged string functions.
This commit is contained in:
@@ -220,3 +220,39 @@ String regex_escape(const String& s);
|
||||
/** Basicly replaces "(" with "(?:" */
|
||||
String make_non_capturing(const String& re);
|
||||
|
||||
// ----------------------------------------------------------------------------- : Iterator utilities
|
||||
|
||||
struct end_sentinel_t {} end_sentinel;
|
||||
|
||||
// Iterate over a string, removing all matching substrings.
|
||||
// match.operator(it,end) should return false or return true and advance it past the substring
|
||||
template <typename It, typename End, typename Match>
|
||||
struct SkipSubstringIterator {
|
||||
public:
|
||||
SkipSubstringIterator(It it, End end, Match const& match) : it(it), end(end), match(match) {
|
||||
while (match(it, end));
|
||||
}
|
||||
bool operator == (end_sentinel_t) const {
|
||||
return it == end;
|
||||
}
|
||||
bool operator != (end_sentinel_t) const {
|
||||
return it != end;
|
||||
}
|
||||
auto operator * () const {
|
||||
return *it;
|
||||
}
|
||||
auto& operator ++ () {
|
||||
++it;
|
||||
while (match(it, end));
|
||||
return *this;
|
||||
}
|
||||
private:
|
||||
It it;
|
||||
End end;
|
||||
Match match;
|
||||
};
|
||||
|
||||
template <typename It, typename End, typename Match>
|
||||
inline SkipSubstringIterator<It,End,Match> skip_substring_iterator(It it, End end, Match const& match) {
|
||||
return SkipSubstringIterator<It,End,Match>(it, end, match);
|
||||
}
|
||||
|
||||
+107
-7
@@ -106,9 +106,105 @@ String fix_old_tags(const String& str) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------- : Iterator algorithms
|
||||
|
||||
[[nodiscard]] String::const_iterator skip_tag(String::const_iterator it, String::const_iterator end) {
|
||||
assert(it != end && *it == '<');
|
||||
++it;
|
||||
while (it != end && *it != '>') ++it;
|
||||
if (it != end) ++it;
|
||||
return it;
|
||||
}
|
||||
|
||||
[[nodiscard]] String::const_iterator skip_all_tags(String::const_iterator it, String::const_iterator end) {
|
||||
while (it != end && *it == '<') {
|
||||
it = skip_tag(it, end);
|
||||
}
|
||||
return it;
|
||||
}
|
||||
|
||||
[[nodiscard]] String::const_iterator skip_all_tags(String::const_iterator it, String::const_iterator end, bool skip_open, bool skip_close) {
|
||||
// move after first possible position corresponding
|
||||
while (it != end && *it == '<') {
|
||||
if (it + 1 != end && *(it + 1) == '/') {
|
||||
if (skip_close) {
|
||||
it = skip_tag(it, end);
|
||||
} else {
|
||||
return it;
|
||||
}
|
||||
} else {
|
||||
if (skip_open) {
|
||||
it = skip_tag(it, end);
|
||||
} else {
|
||||
return it;
|
||||
}
|
||||
}
|
||||
}
|
||||
return it;
|
||||
}
|
||||
|
||||
[[nodiscard]] String::const_iterator advance_untagged(String::const_iterator it, String::const_iterator end, size_t n, bool after_open, bool after_close) {
|
||||
while (n > 0) {
|
||||
it = skip_all_tags(it, end);
|
||||
if (it != end) {
|
||||
++it;
|
||||
--n;
|
||||
} else {
|
||||
return it;
|
||||
}
|
||||
}
|
||||
return skip_all_tags(it, end, after_open, after_close);
|
||||
}
|
||||
|
||||
/*
|
||||
// Does the string [it..end) contain the matching close tag for [tag..tag_end)?
|
||||
bool is_close_tag(String::const_iterator it, String::const_iterator end, String::const_iterator tag, String::const_iterator tag_end) {
|
||||
if (it == end) return false;
|
||||
if (*it != '<') return false;
|
||||
++it;
|
||||
if (it == end) return false;
|
||||
if (*it != '/') return false;
|
||||
assert(tag != tag_end && *tag == '<');
|
||||
++tag;
|
||||
return is_substr(it,end, tag,end);
|
||||
}
|
||||
|
||||
String::const_iterator find_close_tag(String::const_iterator tag, String::const_iterator end) {
|
||||
assert(tag != end && *tag == '<');
|
||||
auto tag_end = skip_tag(tag,end);
|
||||
int nesting = 1;
|
||||
String::const_iterator it = tag_end;
|
||||
while (it != end) {
|
||||
if (*it == '<') {
|
||||
if (is_substr(it,end, tag,tag_end)) {
|
||||
++nesting;
|
||||
} else if (is_close_tag(it,end, tag,tag_end)) {
|
||||
--nesting;
|
||||
if (nesting == 0) return it;
|
||||
}
|
||||
it = skip_tag(it,end);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
return end;
|
||||
}*/
|
||||
|
||||
[[nodiscard]] size_t untagged_length(String::const_iterator it, String::const_iterator end) {
|
||||
size_t n = 0;
|
||||
while (it != end) {
|
||||
it = skip_all_tags(it, end);
|
||||
if (it != end) {
|
||||
++n;
|
||||
++it;
|
||||
}
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------- : Finding tags
|
||||
|
||||
size_t tag_start(const String& str, size_t pos) {
|
||||
[[nodiscard]] size_t tag_start(const String& str, size_t pos) {
|
||||
size_t start = str.find_last_of(_('<'), pos);
|
||||
if (start == String::npos) return String::npos;
|
||||
size_t end = skip_tag(str, start);
|
||||
@@ -116,13 +212,13 @@ size_t tag_start(const String& str, size_t pos) {
|
||||
return start;
|
||||
}
|
||||
|
||||
size_t skip_tag(const String& str, size_t start) {
|
||||
[[nodiscard]] size_t skip_tag(const String& str, size_t start) {
|
||||
if (start >= str.size()) return String::npos;
|
||||
size_t end = str.find_first_of(_('>'), start);
|
||||
return end == String::npos ? String::npos : end + 1;
|
||||
}
|
||||
|
||||
size_t match_close_tag(const String& str, size_t start) {
|
||||
[[nodiscard]] size_t match_close_tag(const String& str, size_t start) {
|
||||
String tag = tag_type_at(str, start);
|
||||
String ctag = _("/") + tag;
|
||||
size_t size = str.size();
|
||||
@@ -143,11 +239,11 @@ size_t match_close_tag(const String& str, size_t start) {
|
||||
return String::npos;
|
||||
}
|
||||
|
||||
size_t match_close_tag_end(const String& str, size_t start) {
|
||||
[[nodiscard]] size_t match_close_tag_end(const String& str, size_t start) {
|
||||
return skip_tag(str, match_close_tag(str, start));
|
||||
}
|
||||
|
||||
size_t last_start_tag_before(const String& str, const String& tag, size_t start) {
|
||||
[[nodiscard]] size_t last_start_tag_before(const String& str, const String& tag, size_t start) {
|
||||
start = min(str.size(), start);
|
||||
for (size_t pos = start ; pos > 0 ; --pos) {
|
||||
if (is_substr(str, pos - 1, tag)) {
|
||||
@@ -157,7 +253,7 @@ size_t last_start_tag_before(const String& str, const String& tag, size_t start)
|
||||
return String::npos;
|
||||
}
|
||||
|
||||
size_t in_tag(const String& str, const String& tag, size_t start, size_t end) {
|
||||
[[nodiscard]] size_t in_tag(const String& str, const String& tag, size_t start, size_t end) {
|
||||
size_t last_start = String::npos;
|
||||
size_t size = str.size();
|
||||
int taglevel = 0;
|
||||
@@ -604,17 +700,19 @@ String simplify_tagged_overlap(const String& str) {
|
||||
|
||||
// ----------------------------------------------------------------------------- : Verification
|
||||
|
||||
void check_tagged(const String& str, bool check_balance) {
|
||||
bool check_tagged(const String& str, bool check_balance) {
|
||||
for (size_t i = 0 ; i < str.size() ; ) {
|
||||
if (str.GetChar(i) == _('<')) {
|
||||
size_t end = skip_tag(str,i);
|
||||
if (end == String::npos) {
|
||||
queue_message(MESSAGE_WARNING, _("Invalid tagged string: missing '>'"));
|
||||
return false;
|
||||
}
|
||||
for (size_t j = i + 1 ; j + 1 < end ; ++j) {
|
||||
Char c = str.GetChar(j);
|
||||
if (c == ESCAPED_LANGLE || c == _('<')) {
|
||||
queue_message(MESSAGE_WARNING, _("Invalid character in tag"));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (check_balance) {
|
||||
@@ -626,6 +724,7 @@ void check_tagged(const String& str, bool check_balance) {
|
||||
size_t close = match_close_tag(str,i);
|
||||
if (close == String::npos) {
|
||||
queue_message(MESSAGE_WARNING, _("Invalid tagged string: missing close tag for <") + tag_at(str,i) + _(">"));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -634,6 +733,7 @@ void check_tagged(const String& str, bool check_balance) {
|
||||
++i;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------- : Other utilities
|
||||
|
||||
+34
-10
@@ -53,24 +53,24 @@ String fix_old_tags(const String&);
|
||||
* < t a g >
|
||||
* n y y y y n
|
||||
*/
|
||||
size_t tag_start(const String& str, size_t pos);
|
||||
[[nodiscard]] size_t tag_start(const String& str, size_t pos);
|
||||
|
||||
/// Returns the position just beyond the tag starting at start
|
||||
size_t skip_tag(const String& str, size_t start);
|
||||
[[nodiscard]] size_t skip_tag(const String& str, size_t start);
|
||||
|
||||
/// Find the position of the closing tag matching the tag at start
|
||||
/** If not found returns String::npos */
|
||||
size_t match_close_tag(const String& str, size_t start);
|
||||
[[nodiscard]] size_t match_close_tag(const String& str, size_t start);
|
||||
|
||||
/// Find the position of the closing tag matching the tag at start
|
||||
/** Returns the position just after that tag.
|
||||
* match_close_tag_end(s,i) == skip_tag(s, match_close_tag(s,i) )
|
||||
* If not found returns String::npos */
|
||||
size_t match_close_tag_end(const String& str, size_t start);
|
||||
[[nodiscard]] size_t match_close_tag_end(const String& str, size_t start);
|
||||
|
||||
/// Find the last start tag before position start
|
||||
/** If not found returns String::npos */
|
||||
size_t last_start_tag_before(const String& str, const String& tag, size_t start);
|
||||
[[nodiscard]] size_t last_start_tag_before(const String& str, const String& tag, size_t start);
|
||||
|
||||
/// Is the given range entirely contained in a given tagged block?
|
||||
/** If so: return the start position of that tag, otherwise returns String::npos
|
||||
@@ -79,7 +79,7 @@ size_t last_start_tag_before(const String& str, const String& tag, size_t start)
|
||||
* <tag><tag></tag>x</tag>
|
||||
* the x is in_tag
|
||||
*/
|
||||
size_t in_tag(const String& str, const String& tag, size_t start, size_t end);
|
||||
[[nodiscard]] size_t in_tag(const String& str, const String& tag, size_t start, size_t end);
|
||||
/// Boolean returning version of the above
|
||||
bool is_in_tag(const String& str, const String& tag, size_t start, size_t end);
|
||||
|
||||
@@ -96,6 +96,29 @@ String close_tag(const String& tag);
|
||||
/// The matching close tag for an open tag and vice versa
|
||||
String anti_tag(const String& tag);
|
||||
|
||||
// ----------------------------------------------------------------------------- : Iterators in tagged strings
|
||||
|
||||
// Skip to the end of a tag, it must point to the start of a tag
|
||||
[[nodiscard]] String::const_iterator skip_tag(String::const_iterator it, String::const_iterator end);
|
||||
|
||||
// Skip past all tags
|
||||
[[nodiscard]] String::const_iterator skip_all_tags(String::const_iterator it, String::const_iterator end);
|
||||
|
||||
// Skip past all open/close tags
|
||||
[[nodiscard]] String::const_iterator skip_all_tags(String::const_iterator it, String::const_iterator end, bool skip_open, bool skip_close);
|
||||
|
||||
// Advance an iterator by n positions, not counting tags
|
||||
// For example: advance_untagged("<b>abc</b>",_,2) = "c</b>"
|
||||
[[nodiscard]] String::const_iterator advance_untagged(String::const_iterator it, String::const_iterator end, size_t n, bool after_open=false, bool after_close=false);
|
||||
|
||||
// Find the position of the closing tag matching the tag at it
|
||||
// If not found, returns end
|
||||
[[nodiscard]] String::const_iterator find_close_tag(String::const_iterator it, String::const_iterator end);
|
||||
|
||||
// Length of a string when not counting tags
|
||||
// For example: untagged_length("<b>abc</b>",_) = 3
|
||||
[[nodiscard]] size_t untagged_length(String::const_iterator it, String::const_iterator end);
|
||||
|
||||
// ----------------------------------------------------------------------------- : Cursor position
|
||||
|
||||
/// Directions of cursor movement
|
||||
@@ -188,13 +211,14 @@ String tagged_substr_replace(const String& input, size_t start, size_t end, cons
|
||||
* - There are no tags containing '<' or whitespace
|
||||
* - For each open tag there is a matching close tag.
|
||||
*
|
||||
* In case of an error, throws an exception.
|
||||
* In case of an error, shows a warning
|
||||
* Return true if the string is a valid tagged string
|
||||
*/
|
||||
void check_tagged(const String& str, bool check_balance = true);
|
||||
bool check_tagged(const String& str, bool check_balance = true);
|
||||
#ifdef _DEBUG
|
||||
#define assert_tagged check_tagged
|
||||
#define assert_tagged(x) assert(check_tagged(x))
|
||||
#else
|
||||
inline void assert_tagged(const String& str, bool check_balance = true){}
|
||||
#define assert_tagged(x) do{}while(0)
|
||||
#endif
|
||||
|
||||
/// Simplify a tagged string
|
||||
|
||||
Reference in New Issue
Block a user