Rewrite of keyword matching code. This fixes #20

Uses new iterator based tagged string functions.
This commit is contained in:
Twan van Laarhoven
2020-05-17 20:37:53 +02:00
parent 0b653938cc
commit b3ddb295fc
8 changed files with 528 additions and 267 deletions
+36
View File
@@ -220,3 +220,39 @@ String regex_escape(const String& s);
/** Basicly replaces "(" with "(?:" */
String make_non_capturing(const String& re);
// ----------------------------------------------------------------------------- : Iterator utilities
struct end_sentinel_t {} end_sentinel;
// Iterate over a string, removing all matching substrings.
// match.operator(it,end) should return false or return true and advance it past the substring
template <typename It, typename End, typename Match>
struct SkipSubstringIterator {
public:
SkipSubstringIterator(It it, End end, Match const& match) : it(it), end(end), match(match) {
while (match(it, end));
}
bool operator == (end_sentinel_t) const {
return it == end;
}
bool operator != (end_sentinel_t) const {
return it != end;
}
auto operator * () const {
return *it;
}
auto& operator ++ () {
++it;
while (match(it, end));
return *this;
}
private:
It it;
End end;
Match match;
};
template <typename It, typename End, typename Match>
inline SkipSubstringIterator<It,End,Match> skip_substring_iterator(It it, End end, Match const& match) {
return SkipSubstringIterator<It,End,Match>(it, end, match);
}
+107 -7
View File
@@ -106,9 +106,105 @@ String fix_old_tags(const String& str) {
return ret;
}
// ----------------------------------------------------------------------------- : Iterator algorithms
[[nodiscard]] String::const_iterator skip_tag(String::const_iterator it, String::const_iterator end) {
assert(it != end && *it == '<');
++it;
while (it != end && *it != '>') ++it;
if (it != end) ++it;
return it;
}
[[nodiscard]] String::const_iterator skip_all_tags(String::const_iterator it, String::const_iterator end) {
while (it != end && *it == '<') {
it = skip_tag(it, end);
}
return it;
}
[[nodiscard]] String::const_iterator skip_all_tags(String::const_iterator it, String::const_iterator end, bool skip_open, bool skip_close) {
// move after first possible position corresponding
while (it != end && *it == '<') {
if (it + 1 != end && *(it + 1) == '/') {
if (skip_close) {
it = skip_tag(it, end);
} else {
return it;
}
} else {
if (skip_open) {
it = skip_tag(it, end);
} else {
return it;
}
}
}
return it;
}
[[nodiscard]] String::const_iterator advance_untagged(String::const_iterator it, String::const_iterator end, size_t n, bool after_open, bool after_close) {
while (n > 0) {
it = skip_all_tags(it, end);
if (it != end) {
++it;
--n;
} else {
return it;
}
}
return skip_all_tags(it, end, after_open, after_close);
}
/*
// Does the string [it..end) contain the matching close tag for [tag..tag_end)?
bool is_close_tag(String::const_iterator it, String::const_iterator end, String::const_iterator tag, String::const_iterator tag_end) {
if (it == end) return false;
if (*it != '<') return false;
++it;
if (it == end) return false;
if (*it != '/') return false;
assert(tag != tag_end && *tag == '<');
++tag;
return is_substr(it,end, tag,end);
}
String::const_iterator find_close_tag(String::const_iterator tag, String::const_iterator end) {
assert(tag != end && *tag == '<');
auto tag_end = skip_tag(tag,end);
int nesting = 1;
String::const_iterator it = tag_end;
while (it != end) {
if (*it == '<') {
if (is_substr(it,end, tag,tag_end)) {
++nesting;
} else if (is_close_tag(it,end, tag,tag_end)) {
--nesting;
if (nesting == 0) return it;
}
it = skip_tag(it,end);
} else {
++it;
}
}
return end;
}*/
[[nodiscard]] size_t untagged_length(String::const_iterator it, String::const_iterator end) {
size_t n = 0;
while (it != end) {
it = skip_all_tags(it, end);
if (it != end) {
++n;
++it;
}
}
return n;
}
// ----------------------------------------------------------------------------- : Finding tags
size_t tag_start(const String& str, size_t pos) {
[[nodiscard]] size_t tag_start(const String& str, size_t pos) {
size_t start = str.find_last_of(_('<'), pos);
if (start == String::npos) return String::npos;
size_t end = skip_tag(str, start);
@@ -116,13 +212,13 @@ size_t tag_start(const String& str, size_t pos) {
return start;
}
size_t skip_tag(const String& str, size_t start) {
[[nodiscard]] size_t skip_tag(const String& str, size_t start) {
if (start >= str.size()) return String::npos;
size_t end = str.find_first_of(_('>'), start);
return end == String::npos ? String::npos : end + 1;
}
size_t match_close_tag(const String& str, size_t start) {
[[nodiscard]] size_t match_close_tag(const String& str, size_t start) {
String tag = tag_type_at(str, start);
String ctag = _("/") + tag;
size_t size = str.size();
@@ -143,11 +239,11 @@ size_t match_close_tag(const String& str, size_t start) {
return String::npos;
}
size_t match_close_tag_end(const String& str, size_t start) {
[[nodiscard]] size_t match_close_tag_end(const String& str, size_t start) {
return skip_tag(str, match_close_tag(str, start));
}
size_t last_start_tag_before(const String& str, const String& tag, size_t start) {
[[nodiscard]] size_t last_start_tag_before(const String& str, const String& tag, size_t start) {
start = min(str.size(), start);
for (size_t pos = start ; pos > 0 ; --pos) {
if (is_substr(str, pos - 1, tag)) {
@@ -157,7 +253,7 @@ size_t last_start_tag_before(const String& str, const String& tag, size_t start)
return String::npos;
}
size_t in_tag(const String& str, const String& tag, size_t start, size_t end) {
[[nodiscard]] size_t in_tag(const String& str, const String& tag, size_t start, size_t end) {
size_t last_start = String::npos;
size_t size = str.size();
int taglevel = 0;
@@ -604,17 +700,19 @@ String simplify_tagged_overlap(const String& str) {
// ----------------------------------------------------------------------------- : Verification
void check_tagged(const String& str, bool check_balance) {
bool check_tagged(const String& str, bool check_balance) {
for (size_t i = 0 ; i < str.size() ; ) {
if (str.GetChar(i) == _('<')) {
size_t end = skip_tag(str,i);
if (end == String::npos) {
queue_message(MESSAGE_WARNING, _("Invalid tagged string: missing '>'"));
return false;
}
for (size_t j = i + 1 ; j + 1 < end ; ++j) {
Char c = str.GetChar(j);
if (c == ESCAPED_LANGLE || c == _('<')) {
queue_message(MESSAGE_WARNING, _("Invalid character in tag"));
return false;
}
}
if (check_balance) {
@@ -626,6 +724,7 @@ void check_tagged(const String& str, bool check_balance) {
size_t close = match_close_tag(str,i);
if (close == String::npos) {
queue_message(MESSAGE_WARNING, _("Invalid tagged string: missing close tag for <") + tag_at(str,i) + _(">"));
return false;
}
}
}
@@ -634,6 +733,7 @@ void check_tagged(const String& str, bool check_balance) {
++i;
}
}
return true;
}
// ----------------------------------------------------------------------------- : Other utilities
+34 -10
View File
@@ -53,24 +53,24 @@ String fix_old_tags(const String&);
* < t a g >
* n y y y y n
*/
size_t tag_start(const String& str, size_t pos);
[[nodiscard]] size_t tag_start(const String& str, size_t pos);
/// Returns the position just beyond the tag starting at start
size_t skip_tag(const String& str, size_t start);
[[nodiscard]] size_t skip_tag(const String& str, size_t start);
/// Find the position of the closing tag matching the tag at start
/** If not found returns String::npos */
size_t match_close_tag(const String& str, size_t start);
[[nodiscard]] size_t match_close_tag(const String& str, size_t start);
/// Find the position of the closing tag matching the tag at start
/** Returns the position just after that tag.
* match_close_tag_end(s,i) == skip_tag(s, match_close_tag(s,i) )
* If not found returns String::npos */
size_t match_close_tag_end(const String& str, size_t start);
[[nodiscard]] size_t match_close_tag_end(const String& str, size_t start);
/// Find the last start tag before position start
/** If not found returns String::npos */
size_t last_start_tag_before(const String& str, const String& tag, size_t start);
[[nodiscard]] size_t last_start_tag_before(const String& str, const String& tag, size_t start);
/// Is the given range entirely contained in a given tagged block?
/** If so: return the start position of that tag, otherwise returns String::npos
@@ -79,7 +79,7 @@ size_t last_start_tag_before(const String& str, const String& tag, size_t start)
* <tag><tag></tag>x</tag>
* the x is in_tag
*/
size_t in_tag(const String& str, const String& tag, size_t start, size_t end);
[[nodiscard]] size_t in_tag(const String& str, const String& tag, size_t start, size_t end);
/// Boolean returning version of the above
bool is_in_tag(const String& str, const String& tag, size_t start, size_t end);
@@ -96,6 +96,29 @@ String close_tag(const String& tag);
/// The matching close tag for an open tag and vice versa
String anti_tag(const String& tag);
// ----------------------------------------------------------------------------- : Iterators in tagged strings
// Skip to the end of a tag, it must point to the start of a tag
[[nodiscard]] String::const_iterator skip_tag(String::const_iterator it, String::const_iterator end);
// Skip past all tags
[[nodiscard]] String::const_iterator skip_all_tags(String::const_iterator it, String::const_iterator end);
// Skip past all open/close tags
[[nodiscard]] String::const_iterator skip_all_tags(String::const_iterator it, String::const_iterator end, bool skip_open, bool skip_close);
// Advance an iterator by n positions, not counting tags
// For example: advance_untagged("<b>abc</b>",_,2) = "c</b>"
[[nodiscard]] String::const_iterator advance_untagged(String::const_iterator it, String::const_iterator end, size_t n, bool after_open=false, bool after_close=false);
// Find the position of the closing tag matching the tag at it
// If not found, returns end
[[nodiscard]] String::const_iterator find_close_tag(String::const_iterator it, String::const_iterator end);
// Length of a string when not counting tags
// For example: untagged_length("<b>abc</b>",_) = 3
[[nodiscard]] size_t untagged_length(String::const_iterator it, String::const_iterator end);
// ----------------------------------------------------------------------------- : Cursor position
/// Directions of cursor movement
@@ -188,13 +211,14 @@ String tagged_substr_replace(const String& input, size_t start, size_t end, cons
* - There are no tags containing '<' or whitespace
* - For each open tag there is a matching close tag.
*
* In case of an error, throws an exception.
* In case of an error, shows a warning
* Return true if the string is a valid tagged string
*/
void check_tagged(const String& str, bool check_balance = true);
bool check_tagged(const String& str, bool check_balance = true);
#ifdef _DEBUG
#define assert_tagged check_tagged
#define assert_tagged(x) assert(check_tagged(x))
#else
inline void assert_tagged(const String& str, bool check_balance = true){}
#define assert_tagged(x) do{}while(0)
#endif
/// Simplify a tagged string