Fix #35

2026-06-13 05:57:00 -04:00 · 2020-05-14 23:48:06 +02:00
parent a13337c262
commit 1f3a2c2519
5 changed files with 32 additions and 88 deletions
@@ -20,13 +20,6 @@ inline size_t spelled_correctly(const String& input, size_t start, size_t end, S
  // untag
  String word = untag(input.substr(start,end-start));
  if (word.empty()) return true;
  // symbol?
  if (is_in_tag(input,_("<sym"),start,end) ||
    is_in_tag(input,_("<nospellcheck"),start,end)) {
    // symbols are always spelled correctly
    // and <nospellcheck> tags should prevent spellcheck
    return true;
  }
  // run through spellchecker(s)
  for (size_t i = 0 ; checkers[i] ; ++i) {
    if (checkers[i]->spell(word)) {
@@ -49,44 +42,12 @@ inline size_t spelled_correctly(const String& input, size_t start, size_t end, S
  return false;
 }
-void check_word(const String& tag, const String& input, String& out, size_t start, size_t end, SpellChecker** checkers, const ScriptValueP& extra_test, Context& ctx) {
+void check_word(const String& tag, const String& input, size_t start, size_t end, String& out, bool check, SpellChecker** checkers, const ScriptValueP& extra_test, Context& ctx) {
  if (start >= end) return;
-  bool good = spelled_correctly(input, start, end, checkers, extra_test, ctx);
+  bool good = !check || spelled_correctly(input, start, end, checkers, extra_test, ctx);
-  if (!good) out += _("<") + tag;
+  if (!good) { out += _("<"); out += tag; }
  out.append(input, start, end-start);
-  if (!good) out += _("</") + tag;
+  if (!good) { out += _("</"); out += tag; }
 }
 void check_word(const String& tag, const String& input, String& out, Char sep, size_t prev, size_t start, size_t end, size_t after, SpellChecker** checkers, const ScriptValueP& extra_test, Context& ctx) {
  if (start == end) {
    // word consisting of whitespace/punctuation only
    if (untag(input.substr(prev,after-prev)).empty()) {
      if (isSpace(sep) && (after == input.size() || isSpace(input.GetChar(after)))) {
        // double space
        out += _("<error-spelling>");
        out.append(sep);
        out.append(input, prev, after-prev);
        out += _("</error-spelling>");
      } else {
        if (sep) out.append(sep);
        out.append(input, prev, after-prev);
      }
    } else {
      // stand alone punctuation
      if (sep) out.append(sep);
      out += _("<error-spelling>");
      out.append(input, prev, after-prev);
      out += _("</error-spelling>");
    }
  } else {
    // before the word
    if (sep) out.append(sep);
    out.append(input, prev, start-prev);
    // the word itself
    check_word(tag, input, out, start, end, checkers, extra_test, ctx);
    // after the word
    out.append(input, end, after-end);
  }
 }
 SCRIPT_FUNCTION(check_spelling) {
@@ -118,44 +79,45 @@ SCRIPT_FUNCTION(check_spelling) {
  tag += _(">");
  // now walk over the words in the input, and mark misspellings
  String result;
-  Char sep = 0;
+  size_t word_start = String::npos; // start of the word to be checked, or npos if not inside a word
-  // indices are used as follows (at the time of check_word call):
+  size_t pos = 0;
-  //   input:      "previous <tag>(<tag>word<tag>)<tag>next"
+  int unchecked_tag = 0;
  //                        ^^          ^   ^          ^
  //                        ||          |   |          |
  //                     sep |          |   word_end   pos
  //                         prev_end   word_start
  //
  size_t prev_end = 0, word_start = 0, word_end = 0, pos = 0;
  while (pos < input.size()) {
    Char c = input.GetChar(pos);
    if (c == _('<')) {
      if      (is_substr(input, pos,  _("<nospellcheck"))) unchecked_tag++;
      else if (is_substr(input, pos, _("</nospellcheck"))) unchecked_tag--;
      else if (is_substr(input, pos,  _("<sym")))  unchecked_tag++;
      else if (is_substr(input, pos, _("</sym")))  unchecked_tag--;
      else if (is_substr(input, pos,  _("<atom"))) unchecked_tag++;
      else if (is_substr(input, pos, _("</atom"))) unchecked_tag--;
      // skip tag
      auto after = skip_tag(input,pos);
      if (word_start == pos) {
        // prefer to place word start inside tags, i.e. as late as possible
-        word_end = word_start = pos = skip_tag(input,pos);
+        word_start = pos = after;
        result.append(input, pos, after-pos);
      } else {
-        pos = skip_tag(input,pos);
+        if (word_start == String::npos) {
          result.append(input, pos, after - pos);
        }
        pos = after;
      }
-    } else if (isSpace(c) || c == EM_DASH || c == EN_DASH) {
+    } else if (isAlpha(c)) {
-      // word boundary => check the word
+      // a word character
-      check_word(tag, input, result, sep, prev_end, word_start, word_end, pos, checkers, extra_match, ctx);
+      if (word_start == String::npos) word_start = pos;
-      // next
+      ++pos;
      sep = c;
      prev_end = word_start = word_end = pos = pos + 1;
    } else {
-      pos++;
+      // a non-word character, punctuation or space
-      if (word_start == pos-1 && is_word_start_punctuation(c)) {
+      // check word, add to result
-        // skip punctuation at start of word
+      check_word(tag, input, word_start, pos, result, unchecked_tag <= 0, checkers, extra_match, ctx);
-        word_end = word_start = pos;
+      word_start = String::npos;
-      } else if (is_word_end_punctuation(c)) {
+      result += c;
-        // skip punctuation at end of word
+      ++pos;
      } else {
        word_end = pos;
      }
    }
  }
  // last word
-  check_word(tag, input, result, sep, prev_end, word_start, word_end, pos, checkers, extra_match, ctx);
+  check_word(tag, input, word_start, input.size(), result, unchecked_tag <= 0, checkers, extra_match, ctx);
  // done
  assert_tagged(result);
  SCRIPT_RETURN(result);
@@ -110,13 +110,6 @@ bool SpellChecker::spell(const String& word) {
  return Hunspell::spell(str);
 }
 bool SpellChecker::spell_with_punctuation(const String& word) {
  size_t start = 0, end = String::npos;
  trim_punctuation(word, start, end);
  if (start >= end) return true; // just punctuation is wrong
  return spell(word.substr(start,end-start));
 }
 void SpellChecker::suggest(const String& word, vector<String>& suggestions_out) {
  CharBuffer str;
  if (!convert_encoding(word,str)) return;
@@ -39,8 +39,6 @@ public:
  /// Check the spelling of a single word
  bool spell(const String& word);
  /// Check the spelling of a single word, ignore punctuation
  bool spell_with_punctuation(const String& word);
  /// Give spelling suggestions
  void suggest(const String& word, vector<String>& suggestions_out);
@@ -115,12 +115,6 @@ String strip_last_word(const String& s) {
 const String word_start_chars = String(_("[({\"\'")) + LEFT_SINGLE_QUOTE + LEFT_DOUBLE_QUOTE;
 const String word_end_chars   = String(_("])}.,;:?!\"\'")) + RIGHT_SINGLE_QUOTE + RIGHT_DOUBLE_QUOTE;
 void trim_punctuation(const String& str, size_t& start, size_t& end) {
  start = str.find_first_not_of(word_start_chars, start);
  end   = str.find_last_not_of(word_end_chars,    min(end,str.size()-1)) + 1;
  if (start >= end) start = end;
 }
 bool is_word_start_punctuation(Char c) {
  return word_start_chars.find_first_of(c) != String::npos;
 }
@@ -140,9 +140,6 @@ String last_word(const String&);
 /// Remove the last word from a string, leaves whitespace before that word
 String strip_last_word(const String&);
 /// Trim punctuation at the start/end of a word in the range [start..end)
 void trim_punctuation(const String&, size_t& start, size_t& end);
 bool is_word_start_punctuation(Char c);
 bool is_word_end_punctuation(Char c);