More strict matching of tags: <blah> doesn't count as <b> anymore

This commit is contained in:
Twan van Laarhoven
2020-05-17 23:51:36 +02:00
parent ab4e7e59f3
commit 7ac44fcff1
4 changed files with 66 additions and 48 deletions
+45 -40
View File
@@ -37,7 +37,7 @@ struct TextElementsFromString {
int bold = 0, italic = 0, symbol = 0; int bold = 0, italic = 0, symbol = 0;
int soft = 0, kwpph = 0, param = 0, line = 0, soft_line = 0; int soft = 0, kwpph = 0, param = 0, line = 0, soft_line = 0;
int code = 0, code_kw = 0, code_string = 0, param_ref = 0; int code = 0, code_kw = 0, code_string = 0, param_ref = 0;
int param_id = 0; int param_id = 0, li = 0;
vector<Color> colors; vector<Color> colors;
vector<double> sizes; vector<double> sizes;
vector<String> fonts; vector<String> fonts;
@@ -75,29 +75,31 @@ private:
// a (formatting) tag // a (formatting) tag
size_t tag_start = pos; size_t tag_start = pos;
pos = skip_tag(text, tag_start); pos = skip_tag(text, tag_start);
if (is_substr(text, tag_start, _( "<b"))) bold += 1; if (is_tag(text, tag_start, _( "<b"))) bold += 1;
else if (is_substr(text, tag_start, _("</b"))) bold -= 1; else if (is_tag(text, tag_start, _("</b"))) bold -= 1;
else if (is_substr(text, tag_start, _( "<i"))) italic += 1; else if (is_tag(text, tag_start, _( "<i"))) italic += 1;
else if (is_substr(text, tag_start, _("</i"))) italic -= 1; else if (is_tag(text, tag_start, _("</i"))) italic -= 1;
else if (is_substr(text, tag_start, _( "<sym"))) symbol += 1; else if (is_tag(text, tag_start, _( "<sym"))) symbol += 1;
else if (is_substr(text, tag_start, _("</sym"))) symbol -= 1; else if (is_tag(text, tag_start, _("</sym"))) symbol -= 1;
else if (is_substr(text, tag_start, _( "<line"))) line += 1; else if (is_tag(text, tag_start, _( "<line"))) line += 1;
else if (is_substr(text, tag_start, _("</line"))) line -= 1; else if (is_tag(text, tag_start, _("</line"))) line -= 1;
else if (is_substr(text, tag_start, _( "<soft-line"))) soft_line += 1; else if (is_tag(text, tag_start, _( "<soft-line"))) soft_line += 1;
else if (is_substr(text, tag_start, _("</soft-line"))) soft_line -= 1; else if (is_tag(text, tag_start, _("</soft-line"))) soft_line -= 1;
else if (is_substr(text, tag_start, _( "<sep-soft"))) soft += 1; else if (is_tag(text, tag_start, _( "<sep-soft"))) soft += 1;
else if (is_substr(text, tag_start, _("</sep-soft"))) soft -= 1; else if (is_tag(text, tag_start, _("</sep-soft"))) soft -= 1;
else if (is_substr(text, tag_start, _( "<soft"))) soft += 1; // must be after <soft-line else if (is_tag(text, tag_start, _( "<soft"))) soft += 1; // must be after <soft-line
else if (is_substr(text, tag_start, _("</soft"))) soft -= 1; else if (is_tag(text, tag_start, _("</soft"))) soft -= 1;
else if (is_substr(text, tag_start, _( "<atom-kwpph"))) kwpph += 1; else if (is_tag(text, tag_start, _( "<li"))) li += 1;
else if (is_substr(text, tag_start, _("</atom-kwpph"))) kwpph -= 1; else if (is_tag(text, tag_start, _("</li"))) li -= 1;
else if (is_substr(text, tag_start, _( "<code-kw"))) code_kw += 1; else if (is_tag(text, tag_start, _( "<atom-kwpph"))) kwpph += 1;
else if (is_substr(text, tag_start, _("</code-kw"))) code_kw -= 1; else if (is_tag(text, tag_start, _("</atom-kwpph"))) kwpph -= 1;
else if (is_substr(text, tag_start, _( "<code-str"))) code_string += 1; else if (is_tag(text, tag_start, _( "<code-kw"))) code_kw += 1;
else if (is_substr(text, tag_start, _("</code-str"))) code_string -= 1; else if (is_tag(text, tag_start, _("</code-kw"))) code_kw -= 1;
else if (is_substr(text, tag_start, _( "<code"))) code += 1; else if (is_tag(text, tag_start, _( "<code-str"))) code_string += 1;
else if (is_substr(text, tag_start, _("</code"))) code -= 1; else if (is_tag(text, tag_start, _("</code-str"))) code_string -= 1;
else if (is_substr(text, tag_start, _( "<color"))) { else if (is_tag(text, tag_start, _( "<code"))) code += 1;
else if (is_tag(text, tag_start, _("</code"))) code -= 1;
else if (is_tag(text, tag_start, _( "<color"))) {
size_t colon = text.find_first_of(_(">:"), tag_start); size_t colon = text.find_first_of(_(">:"), tag_start);
if (colon < pos - 1 && text.GetChar(colon) == _(':')) { if (colon < pos - 1 && text.GetChar(colon) == _(':')) {
auto c = parse_color(text.substr(colon+1, pos-colon-2)); auto c = parse_color(text.substr(colon+1, pos-colon-2));
@@ -108,18 +110,18 @@ private:
colors.push_back(style.font.color); colors.push_back(style.font.color);
} }
} }
} else if (is_substr(text, tag_start, _("</color"))) { } else if (is_tag(text, tag_start, _("</color"))) {
if (!colors.empty()) colors.pop_back(); if (!colors.empty()) colors.pop_back();
} }
else if (is_substr(text, tag_start, _( "<font"))) { else if (is_tag(text, tag_start, _( "<font"))) {
size_t colon = text.find_first_of(_(">:"), tag_start); size_t colon = text.find_first_of(_(">:"), tag_start);
if (colon < pos - 1 && text.GetChar(colon) == _(':')) { if (colon < pos - 1 && text.GetChar(colon) == _(':')) {
fonts.push_back(text.substr(colon+1, pos-colon-2)); fonts.push_back(text.substr(colon+1, pos-colon-2));
} }
} else if (is_substr(text, tag_start, _("</font"))) { } else if (is_tag(text, tag_start, _("</font"))) {
if (!fonts.empty()) fonts.pop_back(); if (!fonts.empty()) fonts.pop_back();
} }
else if (is_substr(text, tag_start, _( "<size"))) { else if (is_tag(text, tag_start, _( "<size"))) {
size_t colon = text.find_first_of(_(">:"), tag_start); size_t colon = text.find_first_of(_(">:"), tag_start);
if (colon < pos - 1 && text.GetChar(colon) == _(':')) { if (colon < pos - 1 && text.GetChar(colon) == _(':')) {
double size = style.font.size; double size = style.font.size;
@@ -127,10 +129,10 @@ private:
v.ToDouble(&size); v.ToDouble(&size);
sizes.push_back(size); sizes.push_back(size);
} }
} else if (is_substr(text, tag_start, _("</size"))) { } else if (is_tag(text, tag_start, _("</size"))) {
if (!sizes.empty()) sizes.pop_back(); if (!sizes.empty()) sizes.pop_back();
} }
else if (is_substr(text, tag_start, _( "<ref-param"))) { else if (is_tag(text, tag_start, _( "<ref-param"))) {
// determine the param being referenced // determine the param being referenced
// from a tag <ref-param123> // from a tag <ref-param123>
if (pos != String::npos) { if (pos != String::npos) {
@@ -142,10 +144,10 @@ private:
} }
param_ref += 1; param_ref += 1;
} }
else if (is_substr(text, tag_start, _("</ref-param"))) param_ref -= 1; else if (is_tag(text, tag_start, _("</ref-param"))) param_ref -= 1;
else if (is_substr(text, tag_start, _( "<atom-param"))) param += 1; else if (is_tag(text, tag_start, _( "<atom-param"))) param += 1;
else if (is_substr(text, tag_start, _("</atom-param"))) param -= 1; else if (is_tag(text, tag_start, _("</atom-param"))) param -= 1;
else if (is_substr(text, tag_start, _("<atom"))) { else if (is_tag(text, tag_start, _("<atom"))) {
// 'atomic' indicator // 'atomic' indicator
#if 0 #if 0
// it would be nice if we could have semi-transparent brushes // it would be nice if we could have semi-transparent brushes
@@ -159,17 +161,20 @@ private:
fromString(e->children, text, pos, end_tag); fromString(e->children, text, pos, end_tag);
elements.push_back(e); elements.push_back(e);
pos = skip_tag(text, end_tag); pos = skip_tag(text, end_tag);
} else if (is_substr(text, tag_start, _( "<error"))) { } else if (is_tag(text, tag_start, _( "<error"))) {
// error indicator // error indicator
size_t end_tag = min(end, match_close_tag(text, tag_start)); size_t end_tag = min(end, match_close_tag(text, tag_start));
intrusive_ptr<ErrorTextElement> e = make_intrusive<ErrorTextElement>(pos, end_tag); intrusive_ptr<ErrorTextElement> e = make_intrusive<ErrorTextElement>(pos, end_tag);
fromString(e->children, text, pos, end_tag); fromString(e->children, text, pos, end_tag);
elements.push_back(e); elements.push_back(e);
pos = skip_tag(text, end_tag); pos = skip_tag(text, end_tag);
} else if (is_substr(text, tag_start, _("</li"))) { } else if (is_tag(text, tag_start, _("</bullet"))) {
// end of bullet point, set margin here // end of bullet point, set margin here
if (li <= 0) {
queue_message(MESSAGE_WARNING, _("<bullet> outside <li> tag"));
}
paragraphs.back().margin_end_char = pos; paragraphs.back().margin_end_char = pos;
} else if (is_substr(text, tag_start, _("<margin"))) { } else if (is_tag(text, tag_start, _("<margin"))) {
size_t colon = text.find_first_of(_(">:"), tag_start); size_t colon = text.find_first_of(_(">:"), tag_start);
if (colon < pos - 1 && text.GetChar(colon) == _(':')) { if (colon < pos - 1 && text.GetChar(colon) == _(':')) {
size_t colon2 = text.find_first_of(_(">:"), colon + 1); size_t colon2 = text.find_first_of(_(">:"), colon + 1);
@@ -188,16 +193,16 @@ private:
paragraphs.back().margin_right = m.right; paragraphs.back().margin_right = m.right;
paragraphs.back().margin_top = m.top; paragraphs.back().margin_top = m.top;
} }
} else if (is_substr(text, tag_start, _("</margin"))) { } else if (is_tag(text, tag_start, _("</margin"))) {
if (!margins.empty()) margins.pop_back(); if (!margins.empty()) margins.pop_back();
} else if (is_substr(text, tag_start, _("<align"))) { } else if (is_tag(text, tag_start, _("<align"))) {
size_t colon = text.find_first_of(_(">:"), tag_start); size_t colon = text.find_first_of(_(">:"), tag_start);
if (colon < pos - 1 && text.GetChar(colon) == _(':')) { if (colon < pos - 1 && text.GetChar(colon) == _(':')) {
Alignment align = alignment_from_string(text.substr(colon+1, pos-colon-2)); Alignment align = alignment_from_string(text.substr(colon+1, pos-colon-2));
aligns.push_back(align); aligns.push_back(align);
paragraphs.back().alignment = align; paragraphs.back().alignment = align;
} }
} else if (is_substr(text, tag_start, _("</align"))) { } else if (is_tag(text, tag_start, _("</align"))) {
if (!aligns.empty()) aligns.pop_back(); if (!aligns.empty()) aligns.pop_back();
} else { } else {
// ignore other tags // ignore other tags
+6 -6
View File
@@ -85,12 +85,12 @@ SCRIPT_FUNCTION(check_spelling) {
while (pos < input.size()) { while (pos < input.size()) {
Char c = input.GetChar(pos); Char c = input.GetChar(pos);
if (c == _('<')) { if (c == _('<')) {
if (is_substr(input, pos, _("<nospellcheck"))) unchecked_tag++; if (is_tag(input, pos, _("<nospellcheck"))) unchecked_tag++;
else if (is_substr(input, pos, _("</nospellcheck"))) unchecked_tag--; else if (is_tag(input, pos, _("</nospellcheck"))) unchecked_tag--;
else if (is_substr(input, pos, _("<sym"))) unchecked_tag++; else if (is_tag(input, pos, _("<sym"))) unchecked_tag++;
else if (is_substr(input, pos, _("</sym"))) unchecked_tag--; else if (is_tag(input, pos, _("</sym"))) unchecked_tag--;
else if (is_substr(input, pos, _("<atom"))) unchecked_tag++; else if (is_tag(input, pos, _("<atom"))) unchecked_tag++;
else if (is_substr(input, pos, _("</atom"))) unchecked_tag--; else if (is_tag(input, pos, _("</atom"))) unchecked_tag--;
// skip tag // skip tag
auto after = skip_tag(input,pos); auto after = skip_tag(input,pos);
if (word_start == pos) { if (word_start == pos) {
+11 -2
View File
@@ -253,6 +253,15 @@ String::const_iterator find_close_tag(String::const_iterator tag, String::const_
return String::npos; return String::npos;
} }
// don't mistake <tag> as <t>, only <t>, <t-stuff> and <t:stuff> are considered <t>
bool is_tag_end_char(Char c) {
return c == '>' || c == '-' || c == ':' || c == ' ';
}
bool is_tag(const String& str, size_t pos, const String& tag) {
return is_substr(str, pos, tag) && pos+tag.size() < str.size() && is_tag_end_char(str[pos+tag.size()]);
}
[[nodiscard]] size_t in_tag(const String& str, const String& tag, size_t start, size_t end) { [[nodiscard]] size_t in_tag(const String& str, const String& tag, size_t start, size_t end) {
size_t last_start = String::npos; size_t last_start = String::npos;
size_t size = str.size(); size_t size = str.size();
@@ -261,10 +270,10 @@ String::const_iterator find_close_tag(String::const_iterator tag, String::const_
for (size_t pos = 0 ; pos < end ; ) { for (size_t pos = 0 ; pos < end ; ) {
Char c = str.GetChar(pos); Char c = str.GetChar(pos);
if (c == _('<')) { if (c == _('<')) {
if (is_substr(str, pos + 1, static_cast<const Char*>(tag.c_str())+1)) { if (is_substr(str, pos + 1, static_cast<const Char*>(tag.c_str())+1) && pos+tag.size() < str.size() && is_tag_end_char(str[pos+tag.size()])) {
if (pos < start) last_start = pos; if (pos < start) last_start = pos;
++taglevel; ++taglevel;
} else if (pos + 2 < size && str.GetChar(pos+1) == _('/') && is_substr(str, pos + 2, static_cast<const Char*>(tag.c_str())+1)) { } else if (pos + 2 < size && str.GetChar(pos+1) == _('/') && is_substr(str, pos + 2, static_cast<const Char*>(tag.c_str())+1) && pos+1+tag.size() < str.size() && is_tag_end_char(str[pos+1+tag.size()])) {
--taglevel; // close tag --taglevel; // close tag
} }
pos = skip_tag(str,pos); pos = skip_tag(str,pos);
+4
View File
@@ -72,6 +72,10 @@ String fix_old_tags(const String&);
/** If not found returns String::npos */ /** If not found returns String::npos */
[[nodiscard]] size_t last_start_tag_before(const String& str, const String& tag, size_t start); [[nodiscard]] size_t last_start_tag_before(const String& str, const String& tag, size_t start);
/// Does a string contain a tag at the given location?
/** Only matches if the tag ends one of ">-: " */
[[nodiscard]] bool is_tag(const String& str, size_t pos, const String& tag);
/// Is the given range entirely contained in a given tagged block? /// Is the given range entirely contained in a given tagged block?
/** If so: return the start position of that tag, otherwise returns String::npos /** If so: return the start position of that tag, otherwise returns String::npos
* A tagged block is everything between <tag>...</tag> * A tagged block is everything between <tag>...</tag>