//+----------------------------------------------------------------------------+ //| Description: Magic Set Editor - Program to make Magic (tm) cards | //| Copyright: (C) Twan van Laarhoven and the other MSE developers | //| License: GNU General Public License 2 or later (see file COPYING) | //+----------------------------------------------------------------------------+ // ----------------------------------------------------------------------------- : Includes #include #include #include #include // ----------------------------------------------------------------------------- : Unicode void writeUTF8(wxTextOutputStream& stream, const String& str) { #ifdef UNICODE stream.WriteString(str); #else wxWCharBuffer buf = str.wc_str(*wxConvCurrent); stream.WriteString(wxString(buf, wxConvUTF8)); #endif } // ----------------------------------------------------------------------------- : Char functions #ifdef CHAR_FUNCTIONS_ARE_SLOW Char toLower(Char c) { if (c <= 128) { if (c >= _('A') && c <= _('Z')) return c + (_('a') - _('A')); else return c; } else { return IF_UNICODE( towlower(c) , tolower(c) ); } } Char toUpper(Char c) { if (c <= 128) { if (c >= _('a') && c <= _('z')) return c + (_('A') - _('a')); else return c; } else { return IF_UNICODE( towupper(c) , toupper(c) ); } } #endif // ----------------------------------------------------------------------------- : String utilities StringView trim(StringView s) { String::const_iterator begin = s.begin(); String::const_iterator end = s.end(); while (begin != end && isSpace(*begin)) ++begin; while (begin != end && isSpace(*(end - 1))) --end; return StringView(begin, end); } StringView trim_left(StringView s) { String::const_iterator begin = s.begin(); String::const_iterator end = s.end(); while (begin != end && isSpace(*begin)) ++begin; return StringView(begin, end); } String substr_replace(const String& input, size_t start, size_t end, const String& replacement) { return input.substr(0,start) + replacement + input.substr(end); } String replace_all(const String& heystack, const String& needle, const String& replacement) { String ret = heystack; ret.Replace(needle, replacement); return ret; } String reverse_string(String const& input) { // Note: std::reverse doesn't work because of unicode encoding stuff String reversed; for (auto it = input.rbegin(); it != input.rend(); ++it) { reversed += *it; } return reversed; } // ----------------------------------------------------------------------------- : Caseing /// Quick check to see if the substring starting at the given iterator is equal to some given string bool is_substr(const String& s, String::const_iterator it, const Char* cmp) { while (it != s.end() && *cmp != 0) { if (*it++ != *cmp++) return false; } return *cmp == 0; } void capitalize_in_place(String& s) { bool after_space = true; for (String::iterator it = s.begin(); it != s.end(); ++it) { if (*it == _(' ') || *it == _('/')) { after_space = true; } else if (after_space) { after_space = false; if (it != s.begin() && (is_substr(it, s.end(), _("is ")) || is_substr(it, s.end(), _("the ")) || is_substr(it, s.end(), _("in ")) || is_substr(it, s.end(), _("of ")) || is_substr(it, s.end(), _("to ")) || is_substr(it, s.end(), _("at ")) || is_substr(it, s.end(), _("a " )))) { // Short words are not capitalized, keep lower case } else { *it = toUpper(*it); } } } } void capitalize_sentence_in_place(String& s) { if (!s.empty()) { s[0] = toUpper(s[0]); } } void canonical_name_form_in_place(String& str) { for (String::iterator it = str.begin(); it != str.end(); ++it) { if (*it == ' ') *it = '_'; } } void uncanonical_name_form_in_place(String& str) { for (String::iterator it = str.begin(); it != str.end(); ++it) { if (*it == '_') *it = ' '; } } String name_to_caption(const String& str) { String ret; ret.reserve(str.size()); bool leading = true, first = true; for (wxUniChar c : str) { if ((c == _('_') || c == _(' '))) { ret += leading ? c : wxUniChar(_(' ')); } else if (first) { // capitalize_sentence ret += toUpper(c); leading = false; first = false; } else { ret += c; leading = false; } } return ret; } String singular_form(const String& str) { assert(str.size() > 1); assert(str.GetChar(str.size() - 1) == _('s')); // ends in 's' if (str.size() > 3 && is_substr(str, str.size()-3, _("ies"))) { return str.substr(0, str.size() - 3) + _("y"); } return str.substr(0, str.size() - 1); } // ----------------------------------------------------------------------------- : Comparing / finding // Nice unicode normalization tables, probably not conform the standards char latin_1[] = "aaaaaaaceeeeiiii" "dnooooo ouuuuy " "aaaaaaaceeeeiiii" "dnooooo ouuuuy y"; char latin_A[] = "aaaaaaccccccccdd" "ddeeeeeeeeeegggg" "gggghhhhiiiiiiii" "iiiijjkkklllllll" "lllnnnnnnnnnoooo" "oooorrrrrrssssss" "ssttttttuuuuuuuu" "uuuuwwyyyzzzzzzs"; char latin_B[] = "bbbbbbcccdddddee" "effgg iikkllmnno" "oo pprssssttttu" "uuuyyzz " " dddlllnnnaai" "ioouuuuuuuuuueaa" "aaaaggggkkoooo " "jdddgg nnaaaaoo" "aaaaeeeeiiiioooo" "rrrruuuusstt hh" "nd zzaaeeoooooo" "ooyylntj acclts" "z buveejjqqrryy"; char latin_E[] = "aabbbbbbccdddddd" "ddddeeeeeeeeeeff" "gghhhhhhhhhhiiii" "kkkkkkllllllllmm" "mmmmnnnnnnnnoooo" "oooopppprrrrrrrr" "sssssssssstttttt" "ttuuuuuuuuuuvvvv" "wwwwwwwwwwxxxxyy" "zzzzzzhtwyas " "aaaaaaaaaaaaaaaa" "aaaaaaaaeeeeeeee" "eeeeeeeeiiiioooo" "oooooooooooooooo" "oooouuuuuuuuuuuu" "uuyyyyyyyy "; /// Remove accents from a (lowercase) character Char remove_accents(Char c) { char dec = ' '; if (c >= 0xC0) { if (c <= 0xFF) { // Latin 1 dec = latin_1[c - 0xC0]; } else if (c <= 0x17E) { // Latin extended A dec = latin_A[c - 0x100]; } else if (c <= 0x180 && c <= 0x240) { // Latin extended B dec = latin_B[c - 0x180]; } else if (c <= 0x1E00 && c <= 0x1EFF) { // Latin additional dec = latin_E[c - 0x1E00]; } } return dec == ' ' ? toLower(c) : dec; } /// Is c a precomposed character (not counting accent marks) /** If so, returns the second character of the decomposition */ Char decompose_char2(Char c) { if (c < 0xC6) { return 0; } else if (c == 0xC6 || c == 0xE6 || c == 0x152 || c == 0x153 || c == 0x1E2 || c == 0x1E3 || c == 0x1FC || c == 0x1FD) { return _('e'); // "ae" or "oe" } else if (c == 0x132 || c == 0x133 || (c >= 0x1C7 && c <= 0x1CC)) { return _('j'); // "ij", "lj", "nj" } else if ((c >= 0x1C4 && c <= 0x1C6) || (c >= 0x1F1 && c <= 0x1F3)) { return _('z'); // "dz" } else { return 0; } } int smart_compare(const String& sa, const String& sb) { bool in_num = false; // are we inside a number? bool lt = false; // is sa less than sb? bool eq = true; // so far is everything equal? size_t na = sa.size(), nb = sb.size(); size_t pa = 0, pb = 0; for (; pa < na && pb < nb ; ++pa, ++pb) { Char a = sa.GetChar(pa), b = sb.GetChar(pb); next: bool da = isDigit(a), db = isDigit(b); if (da && db) { // compare numbers in_num = true; if (eq && a != b) { eq = false; lt = a < b; } } else if (in_num && da) { // comparing numbers, one is longer, therefore it is greater return 1; } else if (in_num && db) { return -1; } else if (in_num && !eq) { // two numbers of the same length, but not equal return lt ? -1 : 1; } else if (a != b) { // not a number eq = true; lt = false; if (a >= 0x20 && b >= 0x20) { // compare characters Char la = remove_accents(a), lb = remove_accents(b); // Decompose characters Char la2 = decompose_char2(a), lb2 = decompose_char2(b); // Compare if (la < lb) return -1; if (la > lb) return 1; // Remaining from decomposition if (la2 || lb2) { if (la2) a = la2; else { if (++pa >= na) return 1; a = sa.GetChar(pa); } if (lb2) b = lb2; else { if (++pb >= nb) return -1; b = sb.GetChar(pb); } goto next; // don't move to the next character in both strings } } else { // control characters if (a < b) return -1; else return 1; } } in_num = da && db; } // When we are at the end; shorter strings come first // This is true for normal string collation // and also when both end in a number and another digit follows if (in_num) { if (na - pa < nb - pb) { // number b continues? Char b = sb.GetChar(pb); if (isDigit(b) || eq) return -1; // b is longer } else if (na - pa > nb - pb) { Char a = sa.GetChar(pa); if (isDigit(a) || eq) return 1; // a is longer } return eq ? 0 : lt ? -1 : 1; // compare numbers } else { return na - pa == nb - pb ? 0 : na - pa < nb - pb ? -1 : 1; // outside number, shorter string comes first } } bool smart_less(const String& sa, const String& sb) { return smart_compare(sa, sb) == -1; } bool smart_equal(const String& sa, const String& sb) { return smart_compare(sa, sb) == 0; } bool starts_with(const String& str, const String& start) { if (str.size() < start.size()) return false; return equal(start.begin(), start.end(), str.begin()); } bool is_substr_i(const String& str, size_t pos, const Char* cmp) { for (String::const_iterator it = str.begin() + pos ; *cmp && it < str.end() ; ++cmp, ++it) { if (toLower(*cmp) != toLower(*it)) return false; } return *cmp == _('\0'); } bool is_substr_i(const String& str, size_t pos, const String& cmp) { return is_substr_i(str, pos, static_cast(cmp.c_str())); } bool canonical_name_compare(StringView as, const Char* b) { assert(canonical_name_form(b) == b); for (String::const_iterator a_it = as.begin(); a_it != as.end(); ++a_it, ++b) { if (*a_it != *b && !(*a_it == '_' && *b == ' ')) { return false; } } return true; } size_t find_i(const String& heystack, const String& needle) { if (needle.empty()) return 0; for (size_t i = 0 ; i + needle.size() <= heystack.size() ; ++i) { if (is_substr_i(heystack, i, needle)) return true; } return String::npos; } // ----------------------------------------------------------------------------- : Regular expressions /// Escape a single character for use in regular expressions String regex_escape(Char c) { if (c == _('(') || c == _(')') || c == _('[') || c == _(']') || c == _('{') || c == _('.') || c == _('^') || c == _('$') || c == _('#') || c == _('\\') || c == _('|') || c == _('+') || c == _('*') || c == _('?')) { // c needs to be escaped return _("\\") + String(1,c); } else { return String(1,c); } } /// Escape a string for use in regular expressions String regex_escape(const String& s) { String ret; FOR_EACH_CONST(c,s) ret += regex_escape(static_cast(c)); return ret; } String make_non_capturing(const String& re) { String ret; bool escape = false, bracket = false, capture = false; FOR_EACH_CONST(c, re) { if (capture) { if (c != _('?')) { // change this capture into a non-capturing "(" by appending "?:" ret += _("?:"); } capture = false; } if (escape) { // second char of escape sequence escape = false; } else if (c == _('\\')) { // start of escape sequence escape = true; } else if (c == _('[')) { // start of [...] bracket = true; } else if (c == _(']')) { // end of [...] bracket = false; } else if (bracket && c == _('(')) { // wx has a bug, it counts the '(' in "[(]" as a matching group // escape it so wx doesn't see it ret += _('\\'); } else if (c == _('(')) { // start of capture? capture = true; } ret += c; } return ret; }