From 09216eca5de3a8d499cf465e727ceb7b58896565 Mon Sep 17 00:00:00 2001 From: twanvl Date: Mon, 29 Dec 2008 22:24:16 +0000 Subject: [PATCH] punctuation breaking is now handled by check_spelling function. Symbols are now handled correctly (i.e. they are considered to be always correct) git-svn-id: svn://svn.code.sf.net/p/magicseteditor/code/trunk@1268 0fc631ac-6414-0410-93d0-97cfa31319b6 --- src/script/functions/spelling.cpp | 65 +++++++++++++++++++------------ src/util/spell_checker.cpp | 39 ++++++++++++++----- src/util/string.cpp | 9 +++++ src/util/string.hpp | 3 ++ src/util/tagged_string.cpp | 4 +- src/util/tagged_string.hpp | 4 +- 6 files changed, 87 insertions(+), 37 deletions(-) diff --git a/src/script/functions/spelling.cpp b/src/script/functions/spelling.cpp index 0d295670..31c840f9 100644 --- a/src/script/functions/spelling.cpp +++ b/src/script/functions/spelling.cpp @@ -15,14 +15,34 @@ // ----------------------------------------------------------------------------- : Functions -void check_word(const String& input, String& out, size_t start, size_t end, SpellChecker& checker, bool must_be_empty) { - if (start >= end) return; +bool spelled_correctly(const String& input, size_t start, size_t end, SpellChecker& checker) { + // untag String word = untag(input.substr(start,end-start)); - // TODO: handle keywords and cardname references - bool error = !word.empty() && (must_be_empty || !checker.spell_with_punctuation(word)); - if (error) out += _(""); + if (word.empty()) return true; + // remove punctuation + size_t start_u = 0, end_u = String::npos; + trim_punctuation(word, start_u, end_u); + if (start_u >= end_u) { + // punctuation only, but not empty => error + return false; + } + // find the tagged text without punctuation + size_t start2 = untagged_to_index(input, start_u, true, start); + size_t end2 = untagged_to_index(input, end_u, true, start); + if (in_tag(input,_("= end) return; + bool good = spelled_correctly(input, start, end, checker); + if (!good) out += _(""); out.append(input, start, end-start); - if (error) out += _(""); + if (!good) out += _(""); } SCRIPT_FUNCTION(check_spelling) { @@ -37,37 +57,32 @@ SCRIPT_FUNCTION(check_spelling) { input = remove_tag(input, _(" check word - check_word(input,result, word_start,pos, checker, must_be_empty); + check_word(input, result, word_start, word_end, checker); + // non-word characters + result.append(input, word_end, pos - word_end + 1); // next - result += c; - pos++; - word_start = pos; - must_be_empty = false; + word_start = word_end = pos = pos + 1; } else { - pos++; + word_end = pos = pos + 1; } } // last word - check_word(input,result, word_start,input.size(), checker, must_be_empty); + check_word(input, result, word_start, word_end, checker); + result.append(input, word_end, String::npos); // done SCRIPT_RETURN(result); } diff --git a/src/util/spell_checker.cpp b/src/util/spell_checker.cpp index 38bac732..125d0dae 100644 --- a/src/util/spell_checker.cpp +++ b/src/util/spell_checker.cpp @@ -8,6 +8,7 @@ #include #include +#include #include // ----------------------------------------------------------------------------- : Spell checker : construction @@ -47,10 +48,33 @@ void SpellChecker::destroyAll() { bool SpellChecker::spell(const String& word) { if (word.empty()) return true; // empty word is okay + // fix curly quotes, especially apstrophes + String fixed; + FOR_EACH_CONST(c,word) { + if (c == LEFT_SINGLE_QUOTE || c == RIGHT_SINGLE_QUOTE) { + fixed += _('\''); + } else if (c == LEFT_DOUBLE_QUOTE || c == RIGHT_DOUBLE_QUOTE) { + fixed += _('\"'); + } else if (c == 0x00C6) { + // expand ligatures, TODO: put this in a better place + fixed += _("Ae"); + } else if (c == 0x0132) { + fixed += _("IJ"); + } else if (c == 0x0152) { + fixed += _("Oe"); + } else if (c == 0xFB01) { + fixed += _("fi"); + } else if (c == 0xFB02) { + fixed += _("fl"); + } else { + fixed += c; + } + } + // convert encoding #ifdef UNICODE - wxCharBuffer str = word.mb_str(encoding); + wxCharBuffer str = fixed.mb_str(encoding); #else - wxCharBuffer str = word.mb_str(encoding); + wxCharBuffer str = fixed.mb_str(encoding); #endif if (*str == '\0') { // If encoding fails we get an empty string, since the word was not empty this can never happen @@ -60,12 +84,9 @@ bool SpellChecker::spell(const String& word) { return Hunspell::spell(str); } -const String word_start = String(_("[({\"\'")) + LEFT_SINGLE_QUOTE + LEFT_DOUBLE_QUOTE; -const String word_end = String(_("])}.,;:?!\"\'")) + RIGHT_SINGLE_QUOTE + RIGHT_DOUBLE_QUOTE; - bool SpellChecker::spell_with_punctuation(const String& word) { - size_t first = word.find_first_not_of(word_start); - size_t last = word.find_last_not_of(word_end); - if (first > last) return false; // just punctuation is incorrect - return spell(word.substr(first, last-first+1)); + size_t start = 0, end = String::npos; + trim_punctuation(word, start, end); + if (start >= end) return true; // just punctuation is wrong + return spell(word.substr(start,end-start)); } diff --git a/src/util/string.cpp b/src/util/string.cpp index 3e172b06..8944f50a 100644 --- a/src/util/string.cpp +++ b/src/util/string.cpp @@ -122,6 +122,15 @@ String strip_last_word(const String& s) { } } +const String word_start = String(_("[({\"\'")) + LEFT_SINGLE_QUOTE + LEFT_DOUBLE_QUOTE; +const String word_end = String(_("])}.,;:?!\"\'")) + RIGHT_SINGLE_QUOTE + RIGHT_DOUBLE_QUOTE; + +void trim_punctuation(const String& str, size_t& start, size_t& end) { + start = str.find_first_not_of(word_start, start); + end = str.find_last_not_of(word_end, min(end,str.size()-1)) + 1; + if (start >= end) start = end; +} + // ----------------------------------------------------------------------------- : Caseing /// Quick check to see if the substring starting at the given iterator is equal to some given string diff --git a/src/util/string.hpp b/src/util/string.hpp index b022840a..5fdaadd1 100644 --- a/src/util/string.hpp +++ b/src/util/string.hpp @@ -136,6 +136,9 @@ String last_word(const String&); /// Remove the last word from a string, leaves whitespace before that word String strip_last_word(const String&); +/// Trim punctuation at the start/end of a word in the range [start..end) +void trim_punctuation(const String&, size_t& start, size_t& end); + // ----------------------------------------------------------------------------- : Caseing /// Make each word in a string start with an upper case character. diff --git a/src/util/tagged_string.cpp b/src/util/tagged_string.cpp index d6f0373f..4932f810 100644 --- a/src/util/tagged_string.cpp +++ b/src/util/tagged_string.cpp @@ -378,8 +378,8 @@ String untag_for_cursor(const String& str) { // ----------------------------------------------------------------------------- : Untagged position -size_t untagged_to_index(const String& str, size_t pos, bool inside) { - size_t i = 0, p = 0; +size_t untagged_to_index(const String& str, size_t pos, bool inside, size_t start_index) { + size_t i = start_index, p = 0; while (i < str.size()) { Char c = str.GetChar(i); if (c == _('<')) { diff --git a/src/util/tagged_string.hpp b/src/util/tagged_string.hpp index eaf8d6c6..e2513fb5 100644 --- a/src/util/tagged_string.hpp +++ b/src/util/tagged_string.hpp @@ -128,8 +128,10 @@ String untag_for_cursor(const String& str); /// Find the tagged position corresponding to the given untagged position. /** An untagged position in str is a position in untag(str). * @param inside if inside then it prefers to find positions inside tags (after open tags, before close tags) + * @param start_index is the position to start at in the string, + * untagged_to_index(s,_,_,i) == untagged_to_index(s.substr(i),_,_)+i */ -size_t untagged_to_index(const String& str, size_t pos, bool inside); +size_t untagged_to_index(const String& str, size_t pos, bool inside, size_t start_index = 0); /// Find the untagged position corresponding to the given tagged position. /** An untagged position in str is a position in untag(str).