Selection sort with reduced comparison count

Question 1

This is an attempt to show how to use fewer comparisons in selection sorting a sequence of \$n\$ items than the \$(n2-n)/2\$ naive selection sort uses.

This is a first Python re-iteration of a Java effort, find the next at Selection sort with reduced comparison count: Python iteration 2.

'''
selection sort implementations to show how to reduce
the number of comparisons (and input traversals).
Ideas:
- exploit finding min and max of n items in 3n/2 comparisons
- avoid repeating "input pair comparisons"
If and when indulging in (micro-)benchmarking,
 be sure to use an appropriate framework
 - and compare lemons to lemons.
'''
class Sorter:
 ''' In-place sorter. '''
 def sort(self):
 ''' sort items in place. '''
 raise NotImplementedError("Sorters better know how to sort");
class LoCoSelectionSorter(Sorter):
 ''' In-place selection sorter striving for a low comparison count. '''
 def getAssignments(self, items):
 raise NotImplementedError("LoCoSelectionSorters better know how to report the number of assignments");
 def getComparisons(self, items):
 raise NotImplementedError("LoCoSelectionSorters better know how to report the number of comparisons");
class TallyAssignments(LoCoSelectionSorter):
 def getAssignments(self, items):
 ''' assignments to <code>items</code>. '''
 return self.assignments;
 def __init__(self, items):
 self.items = items;
 self.assignments = 0;
 def trace(self):
 pass;
 def swap(self, i, j):
 if (i != j):
 items = self.items;
 items[j], items[i] = items[i], items[j];
 def prepare(self):
 ''' prepare iterations of <code>items</code>.
 return first index not to process "going upwards"
 '''
# ? and destination index for the first max (after) ?
 raise NotImplementedError("TallyAssignments better know how to prepare iteration of items");
 def iteration(self, loDest, hiDest):
 ''' iterate (the unordered part of) <code>items</code> once. '''
 raise NotImplementedError("TallyAssignments better know how to iterate items");
 def put(self, dest, value):
 ''' put one value in <code>items</code>,
 keeping invariants as needed.
 '''
 self.items[dest] = value;
 self.assignments += 1;
 def put2(self, dest1, value1, dest2, value2):
 ''' puts two values in <code>items</code>,
 keeping invariants as needed.
 '''
 self.items[dest1] = value1;
 self.items[dest2] = value2;
 self.assignments += 2;
 def sort(self):
 ''' Override In-place selection sort striving for a low comparison count.
 throws <code>NullPointerException</code> if an element of a
 non-trivial array is </code>null</code>
 '''
 if not self.items or len(self.items) < 2:
 return;
 limit = self.prepare();
 self.trace(self);
 for up in range(limit):
 self.iteration(up);
 self.trace(self);
 # ''' In-place selection sort striving for a low comparison count.
 # * @throws <code>NullPointerException</code> if an element of a
 # * non-trivial array is </code>null</code> '''
 # public static TallyAssignments.Keeper sort(Comparable[]items) {
 # TallyAssignments.Keeper sorter = new Keeper(items);
 # sorter.sort();
 # return sorter;
 # }
class MinMax(TallyAssignments):
 ''' find both min and max in one traversal
 avoiding about one quarter of the naive comparisons.
 '''
 def __init__(self, items):
 super().__init__(items);
 self.comparisons = 0;
 def getComparisons(self, items):
 '''Override'''
 # print((((self.last - 3) * (self.last - 1)) * 3) / 8 + 2*self.last+1);
 return self.comparisons;
 #@Override
 def prepare(self):
 '''Override'''
 last = self.last = len(self.items) - 1;
 if ((len(self.items) & 1) == 1):
 # for odd(length) arrays, find max and get it out of the way
 # so only pairs need to be considered ever after
 maxI = max(range(last+1), key=lambda i: self.items[i])
 self.comparisons += last;
 self.swap(maxI, last);
 self.last -= 1;
 return len(self.items)//2;
 def minIMaxI(self, loDest, hiDest):
 '''find indices of minimum and maximum'''
 items = self.items;
 lo, hi = (loDest, loDest+1) if items[loDest] < items[loDest+1] \
 else (loDest+1, loDest);
 self.comparisons += 1;
 for up in range(loDest+2, hiDest, 2):
 if items[up] < items[up+1]:
 if items[up] < items[lo]:
 lo = up;
 if items[hi] < items[up+1]:
 hi = up+1;
 else:
 if items[up+1] < items[lo]:
 lo = up+1;
 if items[hi] < items[up]:
 hi = up;
# self.comparisons += 3;
 self.comparisons += (3 * (hiDest - loDest - 1)) // 2;
 return lo, hi;
 def iteration(self, loDest):
 '''Override'''
 hiDest = self.last - loDest;
 lo, hi = self.minIMaxI(loDest, hiDest);
 items = self.items;
 # the tricky part is three elements moving
 if (lo == loDest): # min does not move
 if (hi != hiDest): # max moves
 ex = items[hi];
 self.put(hi, items[hiDest]);
 items[hiDest] = ex;
 self.assignments += 1;
 # else nothing moves
 else: # min moves
 mini = items[lo];
 if (hi == hiDest): # max stays in place
 self.put(lo, items[loDest]);
 else: # both move
 maxi = items[hi];
 if (lo == hiDest): # min comes from hiDest
 if (hi != loDest): # max from in between
 self.put(hi, items[loDest]);
 # else min and max get exchanged
 else: # min from in between
 if (hi == loDest): # max comes from loDest
 self.put(lo, items[hiDest]);
 else:
 self.put2(lo, items[loDest], hi, items[hiDest]);
 items[hiDest] = maxi;
 self.assignments += 1;
 items[loDest] = mini;
 self.assignments += 1;
class Keeper(MinMax):
 ''' keeps pairs ordered to avoid about half the naive comparisons. '''
 def put(self, destination, value):
 ''' Override: order <code>value</code> conceptually at <code>destination</code>
 and the value at its "mirror-position" in <code>items</code>
 (such that the lower value is at the lower index).
 '''
 mirror = self.last - destination;
 if (#//destination == mirror ||
 (value < self.items[mirror]) == (destination < mirror)):
# if destination != mirror:
 self.comparisons += 1;
 self.items[destination] = value;
 self.assignments += 1;
 else:
 self.comparisons += 1;
 self.items[destination] = self.items[mirror];
 self.items[mirror] = value;
 self.assignments += 2;
 def put2(self, dest1, val1, dest2, val2):
 ''' Override: order <code>value1</code> conceptually at <code>dest1</code>,
 <code>value2</code> conceptually at <code>dest2</code> and the
 values at their "mirror-positions" in <code>items</code>
 (have the lower value of each pair at the lower index).
 '''
 items = self.items;
 mirror = self.last - dest1;
 if mirror != dest2:
 self.put(dest1, val1);
 self.put(dest2, val2);
 return;
 if (val1 < val2) == (dest1 < dest2):
 items[dest1] = val1;
 items[dest2] = val2;
 else:
 items[dest1] = val2;
 items[dest2] = val1;
 self.comparisons += 1;
 self.assignments += 2;
 def prepare(self):
 ''' Override: prepare iterations of <code>items</code>.
 <br/>Resets <code>comparisons</code>.
 return first index not to process "going upwards"
 '''
 items = self.items;
 nItems = len(items);
 last = self.last = (nItems & ~1) - 1;
 middle = nItems//2;
 # place lower(higher) "buddies" at the lower(higher) index,
 # never to be compared again unless at least one gets replaced.
 for up in range(middle):
 # put(up, items[up]) # ?!
 u = items[up];
 d = items[last - up];
 if d < u:
 items[up] = d;
 items[last - up] = u;
 self.assignments += 1;
 # for odd(length) arrays, determine max and get it out of the way
 # so only pairs need to be considered ever after
 if nItems & 1:
 maxI = max(range(middle, last+1), key=lambda i: self.items[i])
 self.comparisons = last;
 if maxI <= last:
 maxVal = items[maxI];
 self.put(maxI, items[last+1]);
 items[last+1] = maxVal;
 else:
 self.comparisons = middle;
 # when there are just two elements left, they are already ordered
 return middle - 1;
 def minIMaxI(self, loDest, hiDest):
 '''Override'''
 lo = loDest;
 hi = hiDest;
 # double ended traversal just makes this easier to read
 # - it may be easier on the memory hierarchy to do
 # a single upwards traversal, switching from looking
 # for min to looking for max in the middle
 for up in range(loDest, hiDest): ### XXX upper limit WRONG
 if items[up] < items[lo]:
 lo = up;
 if items[hi] < items[self.last-up]:
 hi = self.last-up;
 self.comparisons += hiDest - loDest;
 return [ lo, hi ];
if __name__ == "__main__":
 import random
 nItems = 18;#19
 items = [random.randint(10, 99) for r in range(nItems)];
 check = items[:];
 print(items);
 sorter = Keeper(items);
 sorter.trace = lambda sorter: print(sorter.items);
 sorter.sort();
 print(str(sorter.getComparisons(items)),
 " (", str(items == sorted(check)), ')', sep='');

I have never been a Python coder - it looks weird to me even when beautiful.

Question 2

Put the main code inside a function to avoid scope conflicts. In particular this will make it clear that you forgot items = self.items in minIMaxI.

The trace method is useless. You can easily insert prints into the source if you want to see this kind of thing, no need to provide a hook. In any case you've defined the default empty implementation wrong: if you remove sorter.trace = lambda: ... you'll see an error.

The abstract methods don't add much value. You can get rid of them.

getAssignments and getComparisons aren't useful.

Tracking assignments and comparisons yourself is tedious and error prone. In fact, I see that swap doesn't add to assignments, so you've missed some. You can track them automatically:

class AssignmentTrackingList(list):
 def __init__(self, it):
 super().__init__(it)
 self.assignments = 0
 def __setitem__(self, key, value):
 super(AssignmentTrackingList, self).__setitem__(key, value)
 self.assignments += 1
...
items = AssignmentTrackingList([random.randint(10, 99) for r in range(nItems)])
...
print(items.assignments)

This also makes it very easy to swap back and forth between code that tracks assignments, and code that doesn't (which is faster).

Similarly, you can track comparisons of integers:

@functools.total_ordering
class ComparisonTracker:
 comparisons = 0
 def __lt__(self, other):
 ComparisonTracker.comparisons += 1
 return super().__lt__(other)
class ComparisonTrackingInt(ComparisonTracker, int):
 pass
...
items = [ComparisonTrackingInt(random.randint(10, 99)) for r in range(nItems)]
...
print(ComparisonTrackingInt.comparisons)

str() is redundant inside print.

You don't need the first check in if not self.items or len(self.items) < 2:.

Question 3

Put the main code - did that about 80 mins ago after noticing one of the mistakes in Keeper.minIMaxI. There's another iteration in the makings...

Question 4

(not a prompt to resolve this for me) ComparisonTrackingInt looks sane using @functools.total_ordering, but there seems to be an issue with max(seq) (which I might not have suspected without computing the count or keeping it with naked code)

Question 5

total_ordering only fills in missing methods, so it doesn't work in this case.

Question 6

Given (at least) one of __eq__ and __ne__ and one of the others, it does. So does using an open coded max_index(iterable, low=0, high=-1) - looking ugly...

Question 7

@greybeard playing around with it I don't think it does. But I've edited to include a solution which does work.

Question 8

Yes, this does look like a Java-structured program. Here are a few things to start with:

remove ; at the end of the lines, you don't need them in Python
you don't need external parenthesis in the if or elif conditions. For instance, replace:
```
if (lo == hiDest):
```
with:
```
if lo == hiDest:
```
there is a specific format for documentation strings - use triple quotes, start with a capital letter and end with a dot
improve your variable naming - first of all, in Python, it is agreed to name your variables and methods in a lower_case_with_underscores style. And, overall, make your variable and method names communicate what they mean and are used for - for instance, put(), put2(), u or d are meaningless
make sure to respect the current indentation level when adding comments - some of your comments are indented differently than the code they belong to
you can make use of variable unpacking in some parts of the code, e.g. replacing:
```
lo = loDest;
hi = hiDest;
```
with:
```
lo, hi = loDest, hiDest
```
look for other PEP8 code style violations like using spaces between operators, the use of blank lines

You can also run tools like flake8 or pylint to perform static code analysis checks which would reveal the above and other code style and quality violations automatically.

Question 9

lo, hi = loDest, hiDest doesn't seem like an improvement.

Alex Hall Alex Hall 4062 silver badges6 bronze badges · Accepted Answer · 2017-09-05 12:34:31Z

Put the main code inside a function to avoid scope conflicts. In particular this will make it clear that you forgot items = self.items in minIMaxI.

The trace method is useless. You can easily insert prints into the source if you want to see this kind of thing, no need to provide a hook. In any case you've defined the default empty implementation wrong: if you remove sorter.trace = lambda: ... you'll see an error.

The abstract methods don't add much value. You can get rid of them.

getAssignments and getComparisons aren't useful.

Tracking assignments and comparisons yourself is tedious and error prone. In fact, I see that swap doesn't add to assignments, so you've missed some. You can track them automatically:

class AssignmentTrackingList(list):
 def __init__(self, it):
 super().__init__(it)
 self.assignments = 0
 def __setitem__(self, key, value):
 super(AssignmentTrackingList, self).__setitem__(key, value)
 self.assignments += 1
...
items = AssignmentTrackingList([random.randint(10, 99) for r in range(nItems)])
...
print(items.assignments)

This also makes it very easy to swap back and forth between code that tracks assignments, and code that doesn't (which is faster).

Similarly, you can track comparisons of integers:

@functools.total_ordering
class ComparisonTracker:
 comparisons = 0
 def __lt__(self, other):
 ComparisonTracker.comparisons += 1
 return super().__lt__(other)
class ComparisonTrackingInt(ComparisonTracker, int):
 pass
...
items = [ComparisonTrackingInt(random.randint(10, 99)) for r in range(nItems)]
...
print(ComparisonTrackingInt.comparisons)

str() is redundant inside print.

You don't need the first check in if not self.items or len(self.items) < 2:.

Put the main code - did that about 80 mins ago after noticing one of the mistakes in Keeper.minIMaxI. There's another iteration in the makings...
(not a prompt to resolve this for me) ComparisonTrackingInt looks sane using @functools.total_ordering, but there seems to be an issue with max(seq) (which I might not have suspected without computing the count or keeping it with naked code)
total_ordering only fills in missing methods, so it doesn't work in this case.
Given (at least) one of __eq__ and __ne__ and one of the others, it does. So does using an open coded max_index(iterable, low=0, high=-1) - looking ugly...
@greybeard playing around with it I don't think it does. But I've edited to include a solution which does work.

Stack Exchange Network

Selection sort with reduced comparison count

2 Answers 2

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Linked

Hot Network Questions

Selection sort with reduced comparison count

2 Answers 2

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Linked

Related

Hot Network Questions