I wanted to take a stab at implementing a binary search, and here's what I came up with. Is there anything I could have done better, from a code clarity perspective, or code optimization perspective? Any and all criticism would be appreciated:
import random
# implement a binary search
def binary_search(alist, item):
first = 0
last = len(alist)-1
found = False
midpoint = None
while True:
midpoint = first + ((last-first) // 2)
if alist[midpoint] == item:
return midpoint
elif alist[midpoint] < item:
first = midpoint
elif alist[midpoint] > item:
last = midpoint
def _create_search_criteria(cap=10000):
# choose a target to search for
choice = random.randrange(0, cap/2)
# create some data full of random numbers
data = [random.randrange(0, cap/2) for i in xrange(cap)]
# ensure the choice is nowhere in the data
data = [d for d in data if d != choice]
# put a single of instance of choice back into the data
data[random.randrange(0, cap)] = choice
# sort the data
data = sorted(data)
return data, choice
def test_binary_search():
data, choice = _create_search_criteria()
print 'Searching for: ' + str(choice)
index = binary_search(data, choice)
print 'Found it at index: ' + str(index)
assert data[index] == choice
for x in xrange(1000):
test_binary_search()
1 Answer 1
See the source of Python's bisect
module in the standard library to see the accepted implementations used for binary searches and binary insertions used in the language:
"""Bisection algorithms."""
def insort_right(a, x, lo=0, hi=None):
"""Insert item x in list a, and keep it sorted assuming a is sorted.
If x is already in a, insert it to the right of the rightmost x.
Optional args lo (default 0) and hi (default len(a)) bound the
slice of a to be searched.
"""
if lo < 0:
raise ValueError('lo must be non-negative')
if hi is None:
hi = len(a)
while lo < hi:
mid = (lo+hi)//2
if x < a[mid]: hi = mid
else: lo = mid+1
a.insert(lo, x)
insort = insort_right # backward compatibility
def bisect_right(a, x, lo=0, hi=None):
"""Return the index where to insert item x in list a, assuming a is sorted.
The return value i is such that all e in a[:i] have e <= x, and all e in
a[i:] have e > x. So if x already appears in the list, a.insert(x) will
insert just after the rightmost x already there.
Optional args lo (default 0) and hi (default len(a)) bound the
slice of a to be searched.
"""
if lo < 0:
raise ValueError('lo must be non-negative')
if hi is None:
hi = len(a)
while lo < hi:
mid = (lo+hi)//2
if x < a[mid]: hi = mid
else: lo = mid+1
return lo
bisect = bisect_right # backward compatibility
def insort_left(a, x, lo=0, hi=None):
"""Insert item x in list a, and keep it sorted assuming a is sorted.
If x is already in a, insert it to the left of the leftmost x.
Optional args lo (default 0) and hi (default len(a)) bound the
slice of a to be searched.
"""
if lo < 0:
raise ValueError('lo must be non-negative')
if hi is None:
hi = len(a)
while lo < hi:
mid = (lo+hi)//2
if a[mid] < x: lo = mid+1
else: hi = mid
a.insert(lo, x)
def bisect_left(a, x, lo=0, hi=None):
"""Return the index where to insert item x in list a, assuming a is sorted.
The return value i is such that all e in a[:i] have e < x, and all e in
a[i:] have e >= x. So if x already appears in the list, a.insert(x) will
insert just before the leftmost x already there.
Optional args lo (default 0) and hi (default len(a)) bound the
slice of a to be searched.
"""
if lo < 0:
raise ValueError('lo must be non-negative')
if hi is None:
hi = len(a)
while lo < hi:
mid = (lo+hi)//2
if a[mid] < x: lo = mid+1
else: hi = mid
return lo
# Overwrite above definitions with a fast C implementation
try:
from _bisect import *
except ImportError:
pass
Regarding your code, let us see what improvements can be made:
def binary_search(alist, item):
first = 0
last = len(alist)-1
found = False
midpoint = None
while True:
midpoint = first + ((last-first) // 2)
if alist[midpoint] == item:
return midpoint
elif alist[midpoint] < item:
first = midpoint
elif alist[midpoint] > item:
last = midpoint
The initial values of found
and midpoint
are never used and so can be removed:
def binary_search(alist, item):
first = 0
last = len(alist)-1
while True:
midpoint = first + ((last-first) // 2)
if alist[midpoint] == item:
return midpoint
elif alist[midpoint] < item:
first = midpoint
elif alist[midpoint] > item:
last = midpoint
My tests are being performed in Python 3.5, so the code may looks slightly strange. What happens when searching for something that does not exist?
def binary_search(alist, item):
first = 0
last = len(alist)-1
while True:
midpoint = first + ((last-first) // 2)
if alist[midpoint] == item:
return midpoint
elif alist[midpoint] < item:
first = midpoint
elif alist[midpoint] > item:
last = midpoint
alist = list(range(10, 20))
item = 9
print(binary_search(alist, item))
The function enters an infinite loop if the value is too small. What about when the value is too large?
def binary_search(alist, item):
first = 0
last = len(alist)-1
while True:
midpoint = first + ((last-first) // 2)
if alist[midpoint] == item:
return midpoint
elif alist[midpoint] < item:
first = midpoint
elif alist[midpoint] > item:
last = midpoint
alist = list(range(10, 20))
item = 10
print(binary_search(alist, item))
The program prints out zero. This is not correct either. Here is a rewrite of your code in hopes to correct the problem:
import random
def main():
for _ in range(1000):
array = sorted(random.sample(range(10000), 9000))
value = random.randrange(10000)
index = binary_search(array, value)
if index is None:
assert value not in array
else:
assert array[index] == value
def binary_search(array, value):
start, stop = 0, len(array)
while start < stop:
offset = start + stop >> 1
sample = array[offset]
if sample < value:
start = offset + 1
elif sample > value:
stop = offset
else:
return offset
if __name__ == '__main__':
main()
-
2\$\begingroup\$ Thanks. Definitely helpful to see the de facto standard implementation of what I am doing, but was hoping to have my code scrutinized rather than just see what someone else did. \$\endgroup\$Ben174– Ben1742016年01月18日 20:01:09 +00:00Commented Jan 18, 2016 at 20:01
-
1\$\begingroup\$ Thanks for the update! That's exactly what I was hoping for. And also to know if there are any blatant performance issues. Much appreciated!! \$\endgroup\$Ben174– Ben1742016年01月18日 20:32:35 +00:00Commented Jan 18, 2016 at 20:32
found
inbinary_search
and then never use it. The last term could also be else, because if it's not greater than or equal, it has to be less than. \$\endgroup\$