Python 2 binary search

Question 1

I wanted to take a stab at implementing a binary search, and here's what I came up with. Is there anything I could have done better, from a code clarity perspective, or code optimization perspective? Any and all criticism would be appreciated:

import random
# implement a binary search
def binary_search(alist, item):
 first = 0
 last = len(alist)-1
 found = False
 midpoint = None
 while True:
 midpoint = first + ((last-first) // 2)
 if alist[midpoint] == item:
 return midpoint
 elif alist[midpoint] < item:
 first = midpoint
 elif alist[midpoint] > item:
 last = midpoint
def _create_search_criteria(cap=10000):
 # choose a target to search for
 choice = random.randrange(0, cap/2)
 # create some data full of random numbers
 data = [random.randrange(0, cap/2) for i in xrange(cap)]
 # ensure the choice is nowhere in the data
 data = [d for d in data if d != choice]
 # put a single of instance of choice back into the data
 data[random.randrange(0, cap)] = choice
 # sort the data
 data = sorted(data)
 return data, choice
def test_binary_search():
 data, choice = _create_search_criteria()
 print 'Searching for: ' + str(choice)
 index = binary_search(data, choice)
 print 'Found it at index: ' + str(index)
 assert data[index] == choice
for x in xrange(1000):
 test_binary_search()

Question 2

You declare found in binary_search and then never use it. The last term could also be else, because if it's not greater than or equal, it has to be less than.

Question 3

Thanks! I was using found before, but forgot to remove it when I took a different approach. I'll make sure to remove it.

Question 4

See the source of Python's bisect module in the standard library to see the accepted implementations used for binary searches and binary insertions used in the language:

"""Bisection algorithms."""
def insort_right(a, x, lo=0, hi=None):
 """Insert item x in list a, and keep it sorted assuming a is sorted.
 If x is already in a, insert it to the right of the rightmost x.
 Optional args lo (default 0) and hi (default len(a)) bound the
 slice of a to be searched.
 """
 if lo < 0:
 raise ValueError('lo must be non-negative')
 if hi is None:
 hi = len(a)
 while lo < hi:
 mid = (lo+hi)//2
 if x < a[mid]: hi = mid
 else: lo = mid+1
 a.insert(lo, x)
insort = insort_right # backward compatibility
def bisect_right(a, x, lo=0, hi=None):
 """Return the index where to insert item x in list a, assuming a is sorted.
 The return value i is such that all e in a[:i] have e <= x, and all e in
 a[i:] have e > x. So if x already appears in the list, a.insert(x) will
 insert just after the rightmost x already there.
 Optional args lo (default 0) and hi (default len(a)) bound the
 slice of a to be searched.
 """
 if lo < 0:
 raise ValueError('lo must be non-negative')
 if hi is None:
 hi = len(a)
 while lo < hi:
 mid = (lo+hi)//2
 if x < a[mid]: hi = mid
 else: lo = mid+1
 return lo
bisect = bisect_right # backward compatibility
def insort_left(a, x, lo=0, hi=None):
 """Insert item x in list a, and keep it sorted assuming a is sorted.
 If x is already in a, insert it to the left of the leftmost x.
 Optional args lo (default 0) and hi (default len(a)) bound the
 slice of a to be searched.
 """
 if lo < 0:
 raise ValueError('lo must be non-negative')
 if hi is None:
 hi = len(a)
 while lo < hi:
 mid = (lo+hi)//2
 if a[mid] < x: lo = mid+1
 else: hi = mid
 a.insert(lo, x)
def bisect_left(a, x, lo=0, hi=None):
 """Return the index where to insert item x in list a, assuming a is sorted.
 The return value i is such that all e in a[:i] have e < x, and all e in
 a[i:] have e >= x. So if x already appears in the list, a.insert(x) will
 insert just before the leftmost x already there.
 Optional args lo (default 0) and hi (default len(a)) bound the
 slice of a to be searched.
 """
 if lo < 0:
 raise ValueError('lo must be non-negative')
 if hi is None:
 hi = len(a)
 while lo < hi:
 mid = (lo+hi)//2
 if a[mid] < x: lo = mid+1
 else: hi = mid
 return lo
# Overwrite above definitions with a fast C implementation
try:
 from _bisect import *
except ImportError:
 pass

Regarding your code, let us see what improvements can be made:

def binary_search(alist, item):
 first = 0
 last = len(alist)-1
 found = False
 midpoint = None
 while True:
 midpoint = first + ((last-first) // 2)
 if alist[midpoint] == item:
 return midpoint
 elif alist[midpoint] < item:
 first = midpoint
 elif alist[midpoint] > item:
 last = midpoint

The initial values of found and midpoint are never used and so can be removed:

def binary_search(alist, item):
 first = 0
 last = len(alist)-1
 while True:
 midpoint = first + ((last-first) // 2)
 if alist[midpoint] == item:
 return midpoint
 elif alist[midpoint] < item:
 first = midpoint
 elif alist[midpoint] > item:
 last = midpoint

My tests are being performed in Python 3.5, so the code may looks slightly strange. What happens when searching for something that does not exist?

def binary_search(alist, item):
 first = 0
 last = len(alist)-1
 while True:
 midpoint = first + ((last-first) // 2)
 if alist[midpoint] == item:
 return midpoint
 elif alist[midpoint] < item:
 first = midpoint
 elif alist[midpoint] > item:
 last = midpoint
alist = list(range(10, 20))
item = 9
print(binary_search(alist, item))

The function enters an infinite loop if the value is too small. What about when the value is too large?

def binary_search(alist, item):
 first = 0
 last = len(alist)-1
 while True:
 midpoint = first + ((last-first) // 2)
 if alist[midpoint] == item:
 return midpoint
 elif alist[midpoint] < item:
 first = midpoint
 elif alist[midpoint] > item:
 last = midpoint
alist = list(range(10, 20))
item = 10
print(binary_search(alist, item))

The program prints out zero. This is not correct either. Here is a rewrite of your code in hopes to correct the problem:

import random
def main():
 for _ in range(1000):
 array = sorted(random.sample(range(10000), 9000))
 value = random.randrange(10000)
 index = binary_search(array, value)
 if index is None:
 assert value not in array
 else:
 assert array[index] == value
def binary_search(array, value):
 start, stop = 0, len(array)
 while start < stop:
 offset = start + stop >> 1
 sample = array[offset]
 if sample < value:
 start = offset + 1
 elif sample > value:
 stop = offset
 else:
 return offset
if __name__ == '__main__':
 main()

Question 5

Thanks. Definitely helpful to see the de facto standard implementation of what I am doing, but was hoping to have my code scrutinized rather than just see what someone else did.

Question 6

Thanks for the update! That's exactly what I was hoping for. And also to know if there are any blatant performance issues. Much appreciated!!

Noctis Skytower Noctis Skytower 9181 gold badge9 silver badges21 bronze badges · Accepted Answer · 2016-01-18 19:57:12Z

See the source of Python's bisect module in the standard library to see the accepted implementations used for binary searches and binary insertions used in the language:

"""Bisection algorithms."""
def insort_right(a, x, lo=0, hi=None):
 """Insert item x in list a, and keep it sorted assuming a is sorted.
 If x is already in a, insert it to the right of the rightmost x.
 Optional args lo (default 0) and hi (default len(a)) bound the
 slice of a to be searched.
 """
 if lo < 0:
 raise ValueError('lo must be non-negative')
 if hi is None:
 hi = len(a)
 while lo < hi:
 mid = (lo+hi)//2
 if x < a[mid]: hi = mid
 else: lo = mid+1
 a.insert(lo, x)
insort = insort_right # backward compatibility
def bisect_right(a, x, lo=0, hi=None):
 """Return the index where to insert item x in list a, assuming a is sorted.
 The return value i is such that all e in a[:i] have e <= x, and all e in
 a[i:] have e > x. So if x already appears in the list, a.insert(x) will
 insert just after the rightmost x already there.
 Optional args lo (default 0) and hi (default len(a)) bound the
 slice of a to be searched.
 """
 if lo < 0:
 raise ValueError('lo must be non-negative')
 if hi is None:
 hi = len(a)
 while lo < hi:
 mid = (lo+hi)//2
 if x < a[mid]: hi = mid
 else: lo = mid+1
 return lo
bisect = bisect_right # backward compatibility
def insort_left(a, x, lo=0, hi=None):
 """Insert item x in list a, and keep it sorted assuming a is sorted.
 If x is already in a, insert it to the left of the leftmost x.
 Optional args lo (default 0) and hi (default len(a)) bound the
 slice of a to be searched.
 """
 if lo < 0:
 raise ValueError('lo must be non-negative')
 if hi is None:
 hi = len(a)
 while lo < hi:
 mid = (lo+hi)//2
 if a[mid] < x: lo = mid+1
 else: hi = mid
 a.insert(lo, x)
def bisect_left(a, x, lo=0, hi=None):
 """Return the index where to insert item x in list a, assuming a is sorted.
 The return value i is such that all e in a[:i] have e < x, and all e in
 a[i:] have e >= x. So if x already appears in the list, a.insert(x) will
 insert just before the leftmost x already there.
 Optional args lo (default 0) and hi (default len(a)) bound the
 slice of a to be searched.
 """
 if lo < 0:
 raise ValueError('lo must be non-negative')
 if hi is None:
 hi = len(a)
 while lo < hi:
 mid = (lo+hi)//2
 if a[mid] < x: lo = mid+1
 else: hi = mid
 return lo
# Overwrite above definitions with a fast C implementation
try:
 from _bisect import *
except ImportError:
 pass

Regarding your code, let us see what improvements can be made:

def binary_search(alist, item):
 first = 0
 last = len(alist)-1
 found = False
 midpoint = None
 while True:
 midpoint = first + ((last-first) // 2)
 if alist[midpoint] == item:
 return midpoint
 elif alist[midpoint] < item:
 first = midpoint
 elif alist[midpoint] > item:
 last = midpoint

The initial values of found and midpoint are never used and so can be removed:

def binary_search(alist, item):
 first = 0
 last = len(alist)-1
 while True:
 midpoint = first + ((last-first) // 2)
 if alist[midpoint] == item:
 return midpoint
 elif alist[midpoint] < item:
 first = midpoint
 elif alist[midpoint] > item:
 last = midpoint

My tests are being performed in Python 3.5, so the code may looks slightly strange. What happens when searching for something that does not exist?

def binary_search(alist, item):
 first = 0
 last = len(alist)-1
 while True:
 midpoint = first + ((last-first) // 2)
 if alist[midpoint] == item:
 return midpoint
 elif alist[midpoint] < item:
 first = midpoint
 elif alist[midpoint] > item:
 last = midpoint
alist = list(range(10, 20))
item = 9
print(binary_search(alist, item))

The function enters an infinite loop if the value is too small. What about when the value is too large?

def binary_search(alist, item):
 first = 0
 last = len(alist)-1
 while True:
 midpoint = first + ((last-first) // 2)
 if alist[midpoint] == item:
 return midpoint
 elif alist[midpoint] < item:
 first = midpoint
 elif alist[midpoint] > item:
 last = midpoint
alist = list(range(10, 20))
item = 10
print(binary_search(alist, item))

The program prints out zero. This is not correct either. Here is a rewrite of your code in hopes to correct the problem:

import random
def main():
 for _ in range(1000):
 array = sorted(random.sample(range(10000), 9000))
 value = random.randrange(10000)
 index = binary_search(array, value)
 if index is None:
 assert value not in array
 else:
 assert array[index] == value
def binary_search(array, value):
 start, stop = 0, len(array)
 while start < stop:
 offset = start + stop >> 1
 sample = array[offset]
 if sample < value:
 start = offset + 1
 elif sample > value:
 stop = offset
 else:
 return offset
if __name__ == '__main__':
 main()

Thanks. Definitely helpful to see the de facto standard implementation of what I am doing, but was hoping to have my code scrutinized rather than just see what someone else did.
Thanks for the update! That's exactly what I was hoping for. And also to know if there are any blatant performance issues. Much appreciated!!

Stack Exchange Network

Python 2 binary search

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Python 2 binary search

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions