punctuation breaking is now handled by check_spelling function. Symbols are now handled correctly (i.e. they are considered to be always correct)

git-svn-id: svn://svn.code.sf.net/p/magicseteditor/code/trunk@1268 0fc631ac-6414-0410-93d0-97cfa31319b6
2026-06-10 21:06:59 -04:00 · 2008-12-29 22:24:16 +00:00
parent cbfde59494
commit 09216eca5d
6 changed files with 87 additions and 37 deletions
@@ -8,6 +8,7 @@

 #include <util/prec.hpp>
 #include <util/spell_checker.hpp>
+#include <util/string.hpp>
 #include <util/io/package_manager.hpp>

 // ----------------------------------------------------------------------------- : Spell checker : construction
@@ -47,10 +48,33 @@ void SpellChecker::destroyAll() {

 bool SpellChecker::spell(const String& word) {
 	if (word.empty()) return true; // empty word is okay
+	// fix curly quotes, especially apstrophes
+	String fixed;
+	FOR_EACH_CONST(c,word) {
+		if (c == LEFT_SINGLE_QUOTE || c == RIGHT_SINGLE_QUOTE) {
+			fixed += _('\'');
+		} else if (c == LEFT_DOUBLE_QUOTE || c == RIGHT_DOUBLE_QUOTE) {
+			fixed += _('\"');
+		} else if (c == 0x00C6) {
+			// expand ligatures, TODO: put this in a better place
+			fixed += _("Ae");
+		} else if (c == 0x0132) {
+			fixed += _("IJ");
+		} else if (c == 0x0152) {
+			fixed += _("Oe");
+		} else if (c == 0xFB01) {
+			fixed += _("fi");
+		} else if (c == 0xFB02) {
+			fixed += _("fl");
+		} else {
+			fixed += c;
+		}
+	}
+	// convert encoding
 	#ifdef UNICODE
-		wxCharBuffer str = word.mb_str(encoding);
+		wxCharBuffer str = fixed.mb_str(encoding);
 	#else
-		wxCharBuffer str = word.mb_str(encoding);
+		wxCharBuffer str = fixed.mb_str(encoding);
 	#endif
 	if (*str == '\0') {
 		// If encoding fails we get an empty string, since the word was not empty this can never happen
@@ -60,12 +84,9 @@ bool SpellChecker::spell(const String& word) {
 	return Hunspell::spell(str);
 }

-const String word_start = String(_("[({\"\'")) + LEFT_SINGLE_QUOTE + LEFT_DOUBLE_QUOTE;
-const String word_end   = String(_("])}.,;:?!\"\'")) + RIGHT_SINGLE_QUOTE + RIGHT_DOUBLE_QUOTE;
-
 bool SpellChecker::spell_with_punctuation(const String& word) {
-	size_t first = word.find_first_not_of(word_start);
-	size_t last  = word.find_last_not_of(word_end);
-	if (first > last) return false; // just punctuation is incorrect
-	return spell(word.substr(first, last-first+1));
+	size_t start = 0, end = String::npos;
+	trim_punctuation(word, start, end);
+	if (start >= end) return true; // just punctuation is wrong
+	return spell(word.substr(start,end-start));
 }
@@ -122,6 +122,15 @@ String strip_last_word(const String& s) {
 	}
 }

+const String word_start = String(_("[({\"\'")) + LEFT_SINGLE_QUOTE + LEFT_DOUBLE_QUOTE;
+const String word_end   = String(_("])}.,;:?!\"\'")) + RIGHT_SINGLE_QUOTE + RIGHT_DOUBLE_QUOTE;
+
+void trim_punctuation(const String& str, size_t& start, size_t& end) {
+	start = str.find_first_not_of(word_start, start);
+	end   = str.find_last_not_of(word_end,    min(end,str.size()-1)) + 1;
+	if (start >= end) start = end;
+}
+
 // ----------------------------------------------------------------------------- : Caseing

 /// Quick check to see if the substring starting at the given iterator is equal to some given string
@@ -136,6 +136,9 @@ String last_word(const String&);
 /// Remove the last word from a string, leaves whitespace before that word
 String strip_last_word(const String&);

+/// Trim punctuation at the start/end of a word in the range [start..end)
+void trim_punctuation(const String&, size_t& start, size_t& end);
+
 // ----------------------------------------------------------------------------- : Caseing

 /// Make each word in a string start with an upper case character.
@@ -378,8 +378,8 @@ String untag_for_cursor(const String& str) {

 // ----------------------------------------------------------------------------- : Untagged position

-size_t untagged_to_index(const String& str, size_t pos, bool inside) {
-	size_t i = 0, p = 0;
+size_t untagged_to_index(const String& str, size_t pos, bool inside, size_t start_index) {
+	size_t i = start_index, p = 0;
 	while (i < str.size()) {
 		Char c = str.GetChar(i);
 		if (c == _('<')) {
@@ -128,8 +128,10 @@ String untag_for_cursor(const String& str);
 /// Find the tagged position corresponding to the given untagged position.
 /** An untagged position in str is a position in untag(str).
 *  @param inside if inside then it prefers to find positions inside tags (after open tags, before close tags)
+ *  @param start_index is the position to start at in the string,
+ *                     untagged_to_index(s,_,_,i) == untagged_to_index(s.substr(i),_,_)+i
 */
-size_t untagged_to_index(const String& str, size_t pos, bool inside);
+size_t untagged_to_index(const String& str, size_t pos, bool inside, size_t start_index = 0);

 /// Find the untagged position corresponding to the given tagged position.
 /** An untagged position in str is a position in untag(str).