This is an attempt to show how to use fewer comparisons in selection sorting a sequence of \$n\$ items than the \$(n2-n)/2\$ naive selection sort uses.
This is a first Python re-iteration of a Java effort, find the next at Selection sort with reduced comparison count: Python iteration 2.
'''
selection sort implementations to show how to reduce
the number of comparisons (and input traversals).
Ideas:
- exploit finding min and max of n items in 3n/2 comparisons
- avoid repeating "input pair comparisons"
If and when indulging in (micro-)benchmarking,
be sure to use an appropriate framework
- and compare lemons to lemons.
'''
class Sorter:
''' In-place sorter. '''
def sort(self):
''' sort items in place. '''
raise NotImplementedError("Sorters better know how to sort");
class LoCoSelectionSorter(Sorter):
''' In-place selection sorter striving for a low comparison count. '''
def getAssignments(self, items):
raise NotImplementedError("LoCoSelectionSorters better know how to report the number of assignments");
def getComparisons(self, items):
raise NotImplementedError("LoCoSelectionSorters better know how to report the number of comparisons");
class TallyAssignments(LoCoSelectionSorter):
def getAssignments(self, items):
''' assignments to <code>items</code>. '''
return self.assignments;
def __init__(self, items):
self.items = items;
self.assignments = 0;
def trace(self):
pass;
def swap(self, i, j):
if (i != j):
items = self.items;
items[j], items[i] = items[i], items[j];
def prepare(self):
''' prepare iterations of <code>items</code>.
return first index not to process "going upwards"
'''
# ? and destination index for the first max (after) ?
raise NotImplementedError("TallyAssignments better know how to prepare iteration of items");
def iteration(self, loDest, hiDest):
''' iterate (the unordered part of) <code>items</code> once. '''
raise NotImplementedError("TallyAssignments better know how to iterate items");
def put(self, dest, value):
''' put one value in <code>items</code>,
keeping invariants as needed.
'''
self.items[dest] = value;
self.assignments += 1;
def put2(self, dest1, value1, dest2, value2):
''' puts two values in <code>items</code>,
keeping invariants as needed.
'''
self.items[dest1] = value1;
self.items[dest2] = value2;
self.assignments += 2;
def sort(self):
''' Override In-place selection sort striving for a low comparison count.
throws <code>NullPointerException</code> if an element of a
non-trivial array is </code>null</code>
'''
if not self.items or len(self.items) < 2:
return;
limit = self.prepare();
self.trace(self);
for up in range(limit):
self.iteration(up);
self.trace(self);
# ''' In-place selection sort striving for a low comparison count.
# * @throws <code>NullPointerException</code> if an element of a
# * non-trivial array is </code>null</code> '''
# public static TallyAssignments.Keeper sort(Comparable[]items) {
# TallyAssignments.Keeper sorter = new Keeper(items);
# sorter.sort();
# return sorter;
# }
class MinMax(TallyAssignments):
''' find both min and max in one traversal
avoiding about one quarter of the naive comparisons.
'''
def __init__(self, items):
super().__init__(items);
self.comparisons = 0;
def getComparisons(self, items):
'''Override'''
# print((((self.last - 3) * (self.last - 1)) * 3) / 8 + 2*self.last+1);
return self.comparisons;
#@Override
def prepare(self):
'''Override'''
last = self.last = len(self.items) - 1;
if ((len(self.items) & 1) == 1):
# for odd(length) arrays, find max and get it out of the way
# so only pairs need to be considered ever after
maxI = max(range(last+1), key=lambda i: self.items[i])
self.comparisons += last;
self.swap(maxI, last);
self.last -= 1;
return len(self.items)//2;
def minIMaxI(self, loDest, hiDest):
'''find indices of minimum and maximum'''
items = self.items;
lo, hi = (loDest, loDest+1) if items[loDest] < items[loDest+1] \
else (loDest+1, loDest);
self.comparisons += 1;
for up in range(loDest+2, hiDest, 2):
if items[up] < items[up+1]:
if items[up] < items[lo]:
lo = up;
if items[hi] < items[up+1]:
hi = up+1;
else:
if items[up+1] < items[lo]:
lo = up+1;
if items[hi] < items[up]:
hi = up;
# self.comparisons += 3;
self.comparisons += (3 * (hiDest - loDest - 1)) // 2;
return lo, hi;
def iteration(self, loDest):
'''Override'''
hiDest = self.last - loDest;
lo, hi = self.minIMaxI(loDest, hiDest);
items = self.items;
# the tricky part is three elements moving
if (lo == loDest): # min does not move
if (hi != hiDest): # max moves
ex = items[hi];
self.put(hi, items[hiDest]);
items[hiDest] = ex;
self.assignments += 1;
# else nothing moves
else: # min moves
mini = items[lo];
if (hi == hiDest): # max stays in place
self.put(lo, items[loDest]);
else: # both move
maxi = items[hi];
if (lo == hiDest): # min comes from hiDest
if (hi != loDest): # max from in between
self.put(hi, items[loDest]);
# else min and max get exchanged
else: # min from in between
if (hi == loDest): # max comes from loDest
self.put(lo, items[hiDest]);
else:
self.put2(lo, items[loDest], hi, items[hiDest]);
items[hiDest] = maxi;
self.assignments += 1;
items[loDest] = mini;
self.assignments += 1;
class Keeper(MinMax):
''' keeps pairs ordered to avoid about half the naive comparisons. '''
def put(self, destination, value):
''' Override: order <code>value</code> conceptually at <code>destination</code>
and the value at its "mirror-position" in <code>items</code>
(such that the lower value is at the lower index).
'''
mirror = self.last - destination;
if (#//destination == mirror ||
(value < self.items[mirror]) == (destination < mirror)):
# if destination != mirror:
self.comparisons += 1;
self.items[destination] = value;
self.assignments += 1;
else:
self.comparisons += 1;
self.items[destination] = self.items[mirror];
self.items[mirror] = value;
self.assignments += 2;
def put2(self, dest1, val1, dest2, val2):
''' Override: order <code>value1</code> conceptually at <code>dest1</code>,
<code>value2</code> conceptually at <code>dest2</code> and the
values at their "mirror-positions" in <code>items</code>
(have the lower value of each pair at the lower index).
'''
items = self.items;
mirror = self.last - dest1;
if mirror != dest2:
self.put(dest1, val1);
self.put(dest2, val2);
return;
if (val1 < val2) == (dest1 < dest2):
items[dest1] = val1;
items[dest2] = val2;
else:
items[dest1] = val2;
items[dest2] = val1;
self.comparisons += 1;
self.assignments += 2;
def prepare(self):
''' Override: prepare iterations of <code>items</code>.
<br/>Resets <code>comparisons</code>.
return first index not to process "going upwards"
'''
items = self.items;
nItems = len(items);
last = self.last = (nItems & ~1) - 1;
middle = nItems//2;
# place lower(higher) "buddies" at the lower(higher) index,
# never to be compared again unless at least one gets replaced.
for up in range(middle):
# put(up, items[up]) # ?!
u = items[up];
d = items[last - up];
if d < u:
items[up] = d;
items[last - up] = u;
self.assignments += 1;
# for odd(length) arrays, determine max and get it out of the way
# so only pairs need to be considered ever after
if nItems & 1:
maxI = max(range(middle, last+1), key=lambda i: self.items[i])
self.comparisons = last;
if maxI <= last:
maxVal = items[maxI];
self.put(maxI, items[last+1]);
items[last+1] = maxVal;
else:
self.comparisons = middle;
# when there are just two elements left, they are already ordered
return middle - 1;
def minIMaxI(self, loDest, hiDest):
'''Override'''
lo = loDest;
hi = hiDest;
# double ended traversal just makes this easier to read
# - it may be easier on the memory hierarchy to do
# a single upwards traversal, switching from looking
# for min to looking for max in the middle
for up in range(loDest, hiDest): ### XXX upper limit WRONG
if items[up] < items[lo]:
lo = up;
if items[hi] < items[self.last-up]:
hi = self.last-up;
self.comparisons += hiDest - loDest;
return [ lo, hi ];
if __name__ == "__main__":
import random
nItems = 18;#19
items = [random.randint(10, 99) for r in range(nItems)];
check = items[:];
print(items);
sorter = Keeper(items);
sorter.trace = lambda sorter: print(sorter.items);
sorter.sort();
print(str(sorter.getComparisons(items)),
" (", str(items == sorted(check)), ')', sep='');
I have never been a Python coder - it looks weird to me even when beautiful.
2 Answers 2
Put the main code inside a function to avoid scope conflicts. In particular this will make it clear that you forgot items = self.items
in minIMaxI
.
The trace method is useless. You can easily insert prints into the source if you want to see this kind of thing, no need to provide a hook. In any case you've defined the default empty implementation wrong: if you remove sorter.trace = lambda: ...
you'll see an error.
The abstract methods don't add much value. You can get rid of them.
getAssignments
and getComparisons
aren't useful.
Tracking assignments and comparisons yourself is tedious and error prone. In fact, I see that swap
doesn't add to assignments
, so you've missed some. You can track them automatically:
class AssignmentTrackingList(list):
def __init__(self, it):
super().__init__(it)
self.assignments = 0
def __setitem__(self, key, value):
super(AssignmentTrackingList, self).__setitem__(key, value)
self.assignments += 1
...
items = AssignmentTrackingList([random.randint(10, 99) for r in range(nItems)])
...
print(items.assignments)
This also makes it very easy to swap back and forth between code that tracks assignments, and code that doesn't (which is faster).
Similarly, you can track comparisons of integers:
@functools.total_ordering
class ComparisonTracker:
comparisons = 0
def __lt__(self, other):
ComparisonTracker.comparisons += 1
return super().__lt__(other)
class ComparisonTrackingInt(ComparisonTracker, int):
pass
...
items = [ComparisonTrackingInt(random.randint(10, 99)) for r in range(nItems)]
...
print(ComparisonTrackingInt.comparisons)
str()
is redundant inside print
.
You don't need the first check in if not self.items or len(self.items) < 2:
.
-
\$\begingroup\$
Put the main code
- did that about 80 mins ago after noticing one of the mistakes inKeeper.minIMaxI
. There's another iteration in the makings... \$\endgroup\$greybeard– greybeard2017年09月05日 12:48:05 +00:00Commented Sep 5, 2017 at 12:48 -
\$\begingroup\$ (not a prompt to resolve this for me)
ComparisonTrackingInt
looks sane using@functools.total_ordering
, but there seems to be an issue withmax(seq)
(which I might not have suspected without computing the count or keeping it with naked code) \$\endgroup\$greybeard– greybeard2017年09月06日 10:24:03 +00:00Commented Sep 6, 2017 at 10:24 -
\$\begingroup\$ total_ordering only fills in missing methods, so it doesn't work in this case. \$\endgroup\$Alex Hall– Alex Hall2017年09月06日 10:30:06 +00:00Commented Sep 6, 2017 at 10:30
-
\$\begingroup\$ Given (at least) one of
__eq__
and__ne__
and one of the others, it does. So does using an open codedmax_index(iterable, low=0, high=-1)
- looking ugly... \$\endgroup\$greybeard– greybeard2017年09月06日 11:20:17 +00:00Commented Sep 6, 2017 at 11:20 -
\$\begingroup\$ @greybeard playing around with it I don't think it does. But I've edited to include a solution which does work. \$\endgroup\$Alex Hall– Alex Hall2017年09月06日 12:12:24 +00:00Commented Sep 6, 2017 at 12:12
Yes, this does look like a Java-structured program. Here are a few things to start with:
- remove
;
at the end of the lines, you don't need them in Python you don't need external parenthesis in the
if
orelif
conditions. For instance, replace:if (lo == hiDest):
with:
if lo == hiDest:
there is a specific format for documentation strings - use triple quotes, start with a capital letter and end with a dot
- improve your variable naming - first of all, in Python, it is agreed to name your variables and methods in a
lower_case_with_underscores
style. And, overall, make your variable and method names communicate what they mean and are used for - for instance,put()
,put2()
,u
ord
are meaningless - make sure to respect the current indentation level when adding comments - some of your comments are indented differently than the code they belong to
you can make use of variable unpacking in some parts of the code, e.g. replacing:
lo = loDest; hi = hiDest;
with:
lo, hi = loDest, hiDest
look for other
PEP8
code style violations like using spaces between operators, the use of blank lines
You can also run tools like flake8
or pylint
to perform static code analysis checks which would reveal the above and other code style and quality violations automatically.
-
\$\begingroup\$
lo, hi = loDest, hiDest
doesn't seem like an improvement. \$\endgroup\$Alex Hall– Alex Hall2017年09月05日 10:35:20 +00:00Commented Sep 5, 2017 at 10:35
Explore related questions
See similar questions with these tags.