mirror of
https://github.com/amyinspace/MagicSetEditor2.git
synced 2026-06-10 13:06:59 -04:00
punctuation breaking is now handled by check_spelling function. Symbols are now handled correctly (i.e. they are considered to be always correct)
git-svn-id: svn://svn.code.sf.net/p/magicseteditor/code/trunk@1268 0fc631ac-6414-0410-93d0-97cfa31319b6
This commit is contained in:
@@ -15,14 +15,34 @@
|
||||
|
||||
// ----------------------------------------------------------------------------- : Functions
|
||||
|
||||
void check_word(const String& input, String& out, size_t start, size_t end, SpellChecker& checker, bool must_be_empty) {
|
||||
if (start >= end) return;
|
||||
bool spelled_correctly(const String& input, size_t start, size_t end, SpellChecker& checker) {
|
||||
// untag
|
||||
String word = untag(input.substr(start,end-start));
|
||||
// TODO: handle keywords and cardname references
|
||||
bool error = !word.empty() && (must_be_empty || !checker.spell_with_punctuation(word));
|
||||
if (error) out += _("<error-spelling>");
|
||||
if (word.empty()) return true;
|
||||
// remove punctuation
|
||||
size_t start_u = 0, end_u = String::npos;
|
||||
trim_punctuation(word, start_u, end_u);
|
||||
if (start_u >= end_u) {
|
||||
// punctuation only, but not empty => error
|
||||
return false;
|
||||
}
|
||||
// find the tagged text without punctuation
|
||||
size_t start2 = untagged_to_index(input, start_u, true, start);
|
||||
size_t end2 = untagged_to_index(input, end_u, true, start);
|
||||
if (in_tag(input,_("<sym"),start2,end2) != String::npos) {
|
||||
// symbols are always spelled correctly
|
||||
return true;
|
||||
}
|
||||
// run through spellchecker
|
||||
return checker.spell(word.substr(start_u,end_u));
|
||||
}
|
||||
|
||||
void check_word(const String& input, String& out, size_t start, size_t end, SpellChecker& checker) {
|
||||
if (start >= end) return;
|
||||
bool good = spelled_correctly(input, start, end, checker);
|
||||
if (!good) out += _("<error-spelling>");
|
||||
out.append(input, start, end-start);
|
||||
if (error) out += _("</error-spelling>");
|
||||
if (!good) out += _("</error-spelling>");
|
||||
}
|
||||
|
||||
SCRIPT_FUNCTION(check_spelling) {
|
||||
@@ -37,37 +57,32 @@ SCRIPT_FUNCTION(check_spelling) {
|
||||
input = remove_tag(input, _("<error-spelling"));
|
||||
// now walk over the words in the input, and mark misspellings
|
||||
String result;
|
||||
size_t word_start = 0, pos = 0;
|
||||
bool must_be_empty = false; // must this word be empty?
|
||||
size_t word_start = 0, word_end = 0, pos = 0;
|
||||
while (pos < input.size()) {
|
||||
Char c = input.GetChar(pos);
|
||||
if (c == _('<')) {
|
||||
if (is_substr(input,pos,_("<sym"))) {
|
||||
// before symbols should be empty
|
||||
check_word(input,result, word_start,pos, checker, true);
|
||||
// don't spellcheck symbols
|
||||
word_start = pos;
|
||||
pos = min(input.size(), match_close_tag_end(input,pos));
|
||||
result.append(input, word_start, pos-word_start);
|
||||
word_start = pos;
|
||||
must_be_empty = true; // need a space after symbols
|
||||
if (word_start == pos) {
|
||||
// prefer to place word start inside tags
|
||||
pos = skip_tag(input,pos);
|
||||
result.append(input, word_start, pos - word_start);
|
||||
word_end = word_start = pos;
|
||||
} else {
|
||||
pos = skip_tag(input,pos);
|
||||
}
|
||||
} else if (isSpace(c)) {
|
||||
} else if (isSpace(c) || c == EM_DASH || c == EN_DASH) {
|
||||
// word boundary -> check word
|
||||
check_word(input,result, word_start,pos, checker, must_be_empty);
|
||||
check_word(input, result, word_start, word_end, checker);
|
||||
// non-word characters
|
||||
result.append(input, word_end, pos - word_end + 1);
|
||||
// next
|
||||
result += c;
|
||||
pos++;
|
||||
word_start = pos;
|
||||
must_be_empty = false;
|
||||
word_start = word_end = pos = pos + 1;
|
||||
} else {
|
||||
pos++;
|
||||
word_end = pos = pos + 1;
|
||||
}
|
||||
}
|
||||
// last word
|
||||
check_word(input,result, word_start,input.size(), checker, must_be_empty);
|
||||
check_word(input, result, word_start, word_end, checker);
|
||||
result.append(input, word_end, String::npos);
|
||||
// done
|
||||
SCRIPT_RETURN(result);
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
|
||||
#include <util/prec.hpp>
|
||||
#include <util/spell_checker.hpp>
|
||||
#include <util/string.hpp>
|
||||
#include <util/io/package_manager.hpp>
|
||||
|
||||
// ----------------------------------------------------------------------------- : Spell checker : construction
|
||||
@@ -47,10 +48,33 @@ void SpellChecker::destroyAll() {
|
||||
|
||||
bool SpellChecker::spell(const String& word) {
|
||||
if (word.empty()) return true; // empty word is okay
|
||||
// fix curly quotes, especially apstrophes
|
||||
String fixed;
|
||||
FOR_EACH_CONST(c,word) {
|
||||
if (c == LEFT_SINGLE_QUOTE || c == RIGHT_SINGLE_QUOTE) {
|
||||
fixed += _('\'');
|
||||
} else if (c == LEFT_DOUBLE_QUOTE || c == RIGHT_DOUBLE_QUOTE) {
|
||||
fixed += _('\"');
|
||||
} else if (c == 0x00C6) {
|
||||
// expand ligatures, TODO: put this in a better place
|
||||
fixed += _("Ae");
|
||||
} else if (c == 0x0132) {
|
||||
fixed += _("IJ");
|
||||
} else if (c == 0x0152) {
|
||||
fixed += _("Oe");
|
||||
} else if (c == 0xFB01) {
|
||||
fixed += _("fi");
|
||||
} else if (c == 0xFB02) {
|
||||
fixed += _("fl");
|
||||
} else {
|
||||
fixed += c;
|
||||
}
|
||||
}
|
||||
// convert encoding
|
||||
#ifdef UNICODE
|
||||
wxCharBuffer str = word.mb_str(encoding);
|
||||
wxCharBuffer str = fixed.mb_str(encoding);
|
||||
#else
|
||||
wxCharBuffer str = word.mb_str(encoding);
|
||||
wxCharBuffer str = fixed.mb_str(encoding);
|
||||
#endif
|
||||
if (*str == '\0') {
|
||||
// If encoding fails we get an empty string, since the word was not empty this can never happen
|
||||
@@ -60,12 +84,9 @@ bool SpellChecker::spell(const String& word) {
|
||||
return Hunspell::spell(str);
|
||||
}
|
||||
|
||||
const String word_start = String(_("[({\"\'")) + LEFT_SINGLE_QUOTE + LEFT_DOUBLE_QUOTE;
|
||||
const String word_end = String(_("])}.,;:?!\"\'")) + RIGHT_SINGLE_QUOTE + RIGHT_DOUBLE_QUOTE;
|
||||
|
||||
bool SpellChecker::spell_with_punctuation(const String& word) {
|
||||
size_t first = word.find_first_not_of(word_start);
|
||||
size_t last = word.find_last_not_of(word_end);
|
||||
if (first > last) return false; // just punctuation is incorrect
|
||||
return spell(word.substr(first, last-first+1));
|
||||
size_t start = 0, end = String::npos;
|
||||
trim_punctuation(word, start, end);
|
||||
if (start >= end) return true; // just punctuation is wrong
|
||||
return spell(word.substr(start,end-start));
|
||||
}
|
||||
|
||||
@@ -122,6 +122,15 @@ String strip_last_word(const String& s) {
|
||||
}
|
||||
}
|
||||
|
||||
const String word_start = String(_("[({\"\'")) + LEFT_SINGLE_QUOTE + LEFT_DOUBLE_QUOTE;
|
||||
const String word_end = String(_("])}.,;:?!\"\'")) + RIGHT_SINGLE_QUOTE + RIGHT_DOUBLE_QUOTE;
|
||||
|
||||
void trim_punctuation(const String& str, size_t& start, size_t& end) {
|
||||
start = str.find_first_not_of(word_start, start);
|
||||
end = str.find_last_not_of(word_end, min(end,str.size()-1)) + 1;
|
||||
if (start >= end) start = end;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------- : Caseing
|
||||
|
||||
/// Quick check to see if the substring starting at the given iterator is equal to some given string
|
||||
|
||||
@@ -136,6 +136,9 @@ String last_word(const String&);
|
||||
/// Remove the last word from a string, leaves whitespace before that word
|
||||
String strip_last_word(const String&);
|
||||
|
||||
/// Trim punctuation at the start/end of a word in the range [start..end)
|
||||
void trim_punctuation(const String&, size_t& start, size_t& end);
|
||||
|
||||
// ----------------------------------------------------------------------------- : Caseing
|
||||
|
||||
/// Make each word in a string start with an upper case character.
|
||||
|
||||
@@ -378,8 +378,8 @@ String untag_for_cursor(const String& str) {
|
||||
|
||||
// ----------------------------------------------------------------------------- : Untagged position
|
||||
|
||||
size_t untagged_to_index(const String& str, size_t pos, bool inside) {
|
||||
size_t i = 0, p = 0;
|
||||
size_t untagged_to_index(const String& str, size_t pos, bool inside, size_t start_index) {
|
||||
size_t i = start_index, p = 0;
|
||||
while (i < str.size()) {
|
||||
Char c = str.GetChar(i);
|
||||
if (c == _('<')) {
|
||||
|
||||
@@ -128,8 +128,10 @@ String untag_for_cursor(const String& str);
|
||||
/// Find the tagged position corresponding to the given untagged position.
|
||||
/** An untagged position in str is a position in untag(str).
|
||||
* @param inside if inside then it prefers to find positions inside tags (after open tags, before close tags)
|
||||
* @param start_index is the position to start at in the string,
|
||||
* untagged_to_index(s,_,_,i) == untagged_to_index(s.substr(i),_,_)+i
|
||||
*/
|
||||
size_t untagged_to_index(const String& str, size_t pos, bool inside);
|
||||
size_t untagged_to_index(const String& str, size_t pos, bool inside, size_t start_index = 0);
|
||||
|
||||
/// Find the untagged position corresponding to the given tagged position.
|
||||
/** An untagged position in str is a position in untag(str).
|
||||
|
||||
Reference in New Issue
Block a user