Rewrite spec_sort: don't use REMOVED, fix code to work with wx unicode strings.

This commit is contained in:
Twan van Laarhoven
2020-04-14 21:07:33 +02:00
parent 0cffdb7d1a
commit 2ef73f3fa3
+110 -88
View File
@@ -10,7 +10,7 @@
#include <util/spec_sort.hpp> #include <util/spec_sort.hpp>
#include <util/error.hpp> #include <util/error.hpp>
const Char REMOVED = _('\2'); const Char REMOVED = _('\0');
const Char PLACEHOLDER = _('\3'); const Char PLACEHOLDER = _('\3');
String spec_sort(const String& spec, String& input, String& ret); String spec_sort(const String& spec, String& input, String& ret);
@@ -24,14 +24,14 @@ class SpecIterator {
: spec(spec), pos(pos) : spec(spec), pos(pos)
{} {}
Char value; ///< Current character wxUniChar value; ///< Current character
bool escaped; ///< Was the current character escaped? bool escaped; ///< Was the current character escaped?
bool preceded_by_space; ///< Was there a ' ' before this character? bool preceded_by_space; ///< Was there a ' ' before this character?
/// Move to the next item in the specification. /// Move to the next item in the specification.
/** returns false if we are at the end or encounter close. /** returns false if we are at the end or encounter close.
*/ */
bool nextUntil(Char close, bool skip_space = true) { bool nextUntil(wxUniChar close, bool skip_space = true) {
if (pos >= spec.size()) { if (pos >= spec.size()) {
value = 0; value = 0;
if (close == 0) { if (close == 0) {
@@ -112,34 +112,43 @@ class SpecIterator {
// ----------------------------------------------------------------------------- : Sort functions // ----------------------------------------------------------------------------- : Sort functions
//using Bag = vector<wxUniChar>;
using Bag = String;
size_t count_and_remove(wxUniChar c, Bag& input) {
size_t count = 0;
size_t j=0;
for (size_t i=0 ; i < input.size() ; ++i) {
if (input[i] == c) {
count++;
} else {
input[j++] = input[i];
}
}
input.resize(j);
return count;
}
/// Sort a string using a specification using the shortest cycle method, see spec_sort /// Sort a string using a specification using the shortest cycle method, see spec_sort
/** Removed used characters from input! */ /** Removed used characters from input! */
void cycle_sort(const String& spec, String& input, String& ret) { void cycle_sort(const String& spec, Bag& input, Bag& ret) {
size_t size = spec.size(); // count occurences of each item in spec
vector<UInt> counts; vector<size_t> counts;
// count occurences of each char in spec for(auto s : spec) {
FOR_EACH_CONST(s, spec) { counts.push_back(count_and_remove(s, input));
UInt c = 0;
for(wxUniCharRef i : input) {
if (s == i) {
i = REMOVED; // remove
c++;
}
}
counts.push_back(c);
} }
// determine best start point // determine best start point
size_t best_start = 0; size_t best_start = 0;
UInt best_start_score = 0xffffffff; size_t best_start_score = 0xffffffff;
for (size_t start = 0 ; start < size ; ++start) { for (size_t start = 0 ; start < spec.size() ; ++start) {
// score of a start position, can be considered as: // score of a start position, can be considered as:
// - count saturated to binary // - count saturated to binary
// - rotated left by start // - rotated left by start
// - interpreted as a binary number, but without trailing 0s // - interpreted as a binary number, but without trailing 0s
UInt score = 0, mul = 1; size_t score = 0, mul = 1;
for (size_t i = 0 ; i < size ; ++i) { for (size_t i = 0 ; i < spec.size() ; ++i) {
mul *= 2; mul *= 2;
if (counts[(start + i) % size]) { if (counts[(start + i) % spec.size()]) {
score = score * mul + 1; score = score * mul + 1;
mul = 1; mul = 1;
} }
@@ -149,72 +158,94 @@ void cycle_sort(const String& spec, String& input, String& ret) {
best_start = start; best_start = start;
} }
} }
// return string // add to return string
for (size_t i = 0 ; i < size ; ++i) { for (size_t i = 0 ; i < spec.size() ; ++i) {
size_t pos = (best_start + i) % size; size_t pos = (best_start + i) % spec.size();
ret.append(counts[pos], spec[pos]); ret.append(counts[pos], spec[pos]);
} }
} }
/// Sort a string, keeping the characters in the original order /// Sort a string, keeping the characters in the original order
/** Removed used characters from input! */ /** Removed used characters from input! */
void mixed_sort(const String& spec, String& input, String& ret) { void mixed_sort(const String& spec, Bag& input, Bag& ret) {
size_t j = 0;
for (wxUniCharRef c : input) { for (wxUniCharRef c : input) {
if (spec.find(c) != String::npos) { if (spec.find(c) != String::npos) {
ret += c; ret += c;
c = REMOVED; } else {
input[j++] = c;
} }
} }
input.resize(j);
} }
/// Sort a string, find a compound item /// Sort a string, find a compound item
/** Removed used characters from input! */ /** Removed used characters from input! */
void compound_sort(const String& spec, String& input, String& ret) { void compound_sort(const String& spec, Bag& input, Bag& ret) {
size_t pos = input.find(spec); size_t j=0;
while (pos != String::npos) { for (size_t i=0 ; i < input.size() ; ++i) {
ret += spec; // match?
for (size_t j = 0 ; j < spec.size() ; ++j) input.SetChar(pos + j, REMOVED); if (i+spec.size() <= input.size() && std::equal(spec.begin(), spec.end(), input.begin()+i)) {
pos = input.find(spec, pos + 1); i += spec.size() - 1;
//std::copy(spec.begin(), spec.end(), back_inserter(ret));
ret += spec;
} else {
input[j++] = input[i];
}
} }
input.resize(j);
} }
/// Sort things matching a pattern /// Sort things matching a pattern
void pattern_sort(const String& pattern, const String& spec, String& input, String& ret) { void pattern_sort(const String& pattern, const String& spec, Bag& input, Bag& ret) {
if (pattern.size() > input.size()) return; if (pattern.size() > input.size()) return;
size_t end = input.size() - pattern.size() + 1; size_t end = input.size() - pattern.size() + 1;
for (size_t pos = 0 ; pos < end ; ++pos) { size_t pos_new = 0;
// does the pattern match here? //for (size_t pos = 0 ; pos < end ; ++pos) {
String placeholders; for (size_t pos = 0; pos < end; ++pos) {
bool match = true; if (pos + pattern.size() > input.size()) {
for (size_t j = 0 ; j < pattern.size() ; ++j) { goto no_match;
Char c = input.GetChar(pos + j);
Char p = pattern.GetChar(j);
if (c == REMOVED) { match = false; break; }
else if (p == PLACEHOLDER) {
placeholders += c;
} else if (c != p) { match = false; break; }
} }
// do we have a match? {
if (match) { // does the pattern match here?
String placeholders;
for (size_t j = 0; j < pattern.size(); ++j) {
wxUniChar c = input[pos + j];
wxUniChar p = pattern[j];
if (c == REMOVED) {
goto no_match;
}
else if (p == PLACEHOLDER) {
placeholders += c;
}
else if (c != p) {
goto no_match;
}
}
// we have a match
// sort placeholders // sort placeholders
String new_placeholders = spec_sort(spec, placeholders); String new_placeholders = spec_sort(spec, placeholders);
if (new_placeholders.size() == placeholders.size()) { if (new_placeholders.size() == placeholders.size()) {
// add to output, erase from input // add to output, erase from input
size_t ph = 0; size_t ph = 0;
for (size_t j = 0 ; j < pattern.size() ; ++j) { for (size_t j = 0; j < pattern.size(); ++j) {
Char p = pattern.GetChar(j); wxUniChar p = pattern[j];
if (p == PLACEHOLDER) { if (p == PLACEHOLDER) {
ret += new_placeholders.GetChar(ph++); ret += new_placeholders[ph++];
} else { }
else {
ret += p; ret += p;
} }
input.SetChar(pos + j, REMOVED);
} }
// erase from input // skip over matched pattern
pos += pattern.size() - 1; pos += pattern.size() - 1;
continue;
} }
} }
no_match:
input[pos_new++] = input[pos];
} }
input.resize(pos_new);
} }
/// Sort things in place, keep the rest of the input /// Sort things in place, keep the rest of the input
@@ -240,18 +271,14 @@ void in_place_sort(const String& spec, String& input, String& ret) {
String spec_sort(const String& spec, String& input, String& ret) { String spec_sort(const String& spec, String& input, String& ret) {
SpecIterator it(spec); SpecIterator it(spec);
while(it.nextUntil(0)) { while(it.nextUntil(0)) {
if (it.escaped) { // single character, escaped if (it.escaped) { // single character, escaped
for (wxUniCharRef d : input) { size_t count = count_and_remove(it.value, input);
if (d == it.value) { ret.append(count, it.value);
ret += d; } else if (it.value == _('<')) { // keep only a single copy
d = REMOVED;
}
}
} else if (it.value == _('<')) { // keep only a single copy
while (it.nextUntil(_('>'))) { while (it.nextUntil(_('>'))) {
size_t pos = input.find_first_of(it.value); size_t pos = input.find_first_of(it.value);
if (pos != String::npos) { if (pos != String::npos) {
input.SetChar(pos, REMOVED); input.erase(pos, 1);
ret += it.value; // input contains it.value ret += it.value; // input contains it.value
} }
} }
@@ -259,7 +286,7 @@ String spec_sort(const String& spec, String& input, String& ret) {
while (it.nextUntil(_(')'))) { while (it.nextUntil(_(')'))) {
size_t pos = input.find_first_of(it.value); size_t pos = input.find_first_of(it.value);
if (pos != String::npos) { if (pos != String::npos) {
input.SetChar(pos, REMOVED); input.erase(pos, 1);
ret += it.value; // input contains it.value ret += it.value; // input contains it.value
} }
} }
@@ -282,9 +309,10 @@ String spec_sort(const String& spec, String& input, String& ret) {
// read pattern // read pattern
while (it.nextUntil(_(' '), false)) { while (it.nextUntil(_(' '), false)) {
if (it.value == _('.') && !it.escaped) { if (it.value == _('.') && !it.escaped) {
it.value = PLACEHOLDER; pattern += PLACEHOLDER;
} else {
pattern += it.value;
} }
pattern += it.value;
} }
// read spec to apply to pattern // read spec to apply to pattern
String sub_spec = it.readRawParam(_(')')); String sub_spec = it.readRawParam(_(')'));
@@ -297,41 +325,35 @@ String spec_sort(const String& spec, String& input, String& ret) {
in_place_sort(sub_spec, input, ret); in_place_sort(sub_spec, input, ret);
} else if (it.keyword(_("any()"))) { // remaining input } else if (it.keyword(_("any()"))) { // remaining input
FOR_EACH(d, input) { FOR_EACH_CONST(d, input) {
if (d != REMOVED) { if (d != REMOVED) {
ret += d; ret += d;
d = REMOVED;
} }
} }
input.clear();
} else if (it.keyword(_("reverse_order("))) { // reverse order of preference } else if (it.keyword(_("reverse_order("))) { // reverse order of preference
size_t old_ret_size = ret.size(); vector<String> parts;
while (it.value != _(')')) { while (it.value != _(')')) {
size_t before_ret_size = ret.size();
String sub_spec = it.readRawParam(_(')'),_(' ')); String sub_spec = it.readRawParam(_(')'),_(' '));
spec_sort(sub_spec, input, ret); String part;
// reverse this item spec_sort(sub_spec, input, part);
reverse(ret.begin() + before_ret_size, ret.end()); parts.push_back(part);
}
// add parts in reverse order
reverse(parts.begin(), parts.end());
for (auto const& part : parts) {
ret += part;
} }
// re-reverse all items
reverse(ret.begin() + old_ret_size, ret.end());
} else if (it.keyword(_("ordered("))) { // in spec order } else if (it.keyword(_("ordered("))) { // in spec order
while (it.nextUntil(_(')'))) { while (it.nextUntil(_(')'))) {
FOR_EACH(d, input) { size_t count = count_and_remove(it.value, input);
if (d == it.value) { ret.append(count, it.value);
ret += d;
d = REMOVED;
}
}
}
} else { // single char
FOR_EACH(d, input) {
if (d == it.value) {
ret += d;
d = REMOVED;
}
} }
} else { // single char
size_t count = count_and_remove(it.value, input);
ret.append(count, it.value);
} }
} }
return ret; return ret;