Auctionsite API Wrapper in Python

Question 1

This is the first API wrapper I've written and I'm wondering what I can do to improve. I've read through the feedback of a few similar posts

Readme

Code

import requests
import time as notdatetime
import datetime
import matplotlib.pyplot as plt
# uses Algolia for API
class API():
 def __init__(self):
 self.base_url = 'https://bwhj2cu1lu-dsn.algolia.net/1/indexes/hpitem/query?x-algolia-agent=Algolia%20for%20JavaScript%20(4.11.0)%3B%20Browser'
 self.api_key = None
 self.app_id = None
 def generate_tokens(self):
 url = 'https://www.maxsold.com/main.14c22a1d14637769c939.js'
 response = requests.get(url)
 tokens = {'algoliaApplicationId':'', 'algoliaSearchAPIKey':''}
 for token in tokens.keys():
 start = response.text.find(f'{token}:"') + len(f'{token}:"')
 end = start + response.text[start:].find('"')
 tokens[token] = response.text[start:end]
 self.api_key = tokens['algoliaSearchAPIKey']
 self.app_id = tokens['algoliaApplicationId']
 def set_tokens(self, api_key, app_id):
 self.api_key = api_key
 self.app_id = app_id
 if self.test_search().status_code == 200:
 pass
 else:
 self.api_key = None
 self.app_id = None
 raise Exception(f"Either {self.api_key} api_key or {self.app_id} app_id don't work")
 def test_search(self): #test to see if tokens are valid. search methods require api-key and application-id tokens, get methods do not
 headers = {
 'User-Agent': "Erik's API wrapper",
 'x-algolia-api-key': self.api_key,
 'x-algolia-application-id': self.app_id,
 }
 data = f'{{}}'
 response = requests.post('https://bwhj2cu1lu-dsn.algolia.net/1/indexes/hpitem/query?x-algolia-agent=Algolia%20for%20JavaScript%20(4.11.0)%3B%20Browser', headers=headers, data=data)
 return response
 def search(self, search_type='auctions', query='', location='', country=False, page=0): #search for some listings based on query and location, leave fields blank to get all auctions
 if search_type.lower() not in ['auctions','items']:
 raise Exception("Search by 'auctions' (default) or 'items'")
 search_type = ['hpauction', 'hpitem'][['auctions','items'].index(search_type.lower())]
 headers = {
 'User-Agent': "Erik's API wrapper",
 'x-algolia-api-key': self.api_key,
 'x-algolia-application-id': self.app_id,
 }
 start_date = int(notdatetime.time()) # basically the time range of auctions you want to see, not super useful so not a function parameter
 end_date = start_date - 900 # 900 seconds because that's how it's in production
 query = query.replace('"',r'\"') #escape quotations
 country_string = ''
 if country:
 country_string = ' AND country:\\"{country}\\"'
 data = f'{{"query":"","filters":"start_date <= {start_date} AND end_date > {end_date}{country_string}","facetFilters":["auction_phase:-cancelledAuction"],"hitsPerPage":100,"page":{page},"aroundLatLng":"{location}","aroundLatLngViaIP":false,"aroundRadius":100000}}'
 url = f'https://bwhj2cu1lu-dsn.algolia.net/1/indexes/{search_type}/query?x-algolia-agent=Algolia%20for%20JavaScript%20(4.11.0)%3B%20Browser'
 response = requests.post(url, headers=headers, data=data)
 return response.json()
 def get_auction_items(self, auction_id, page_id=False): #get auction items for a given auction
 headers = {
 'User-Agent': "Erik's API wrapper",
 }
 if not page_id:
 page_id = 1
 data = {
 'auction_id': f'{auction_id}',
 'filters[page]': f'{page_id}',
 'item_type': 'itemlist',
 'lotnum': '0',
 'close_groups': '',
 'show_closed': 'closed',
 'perpetual': ''
 }
 response = requests.post('https://maxsold.maxsold.com/api/getitems', headers=headers, data=data)
 return response.json()
 def get_item(self, item_id): #get details from one item for a given listing
 headers = {
 'User-Agent': "Erik's API wrapper",
 }
 data = {
 'item_id': item_id
 }
 response = requests.post('https://maxsold.maxsold.com/api/itemdata', headers=headers, data=data)
 return response.json()
 def get_bidding(self, item_data):
 title = item_data['title']
 description = item_data['item_description']
 starting_bid = item_data['starting_bid']
 current_bid = item_data['current_bid']
 minimum_bid = item_data['minimum_bid']
 bidding = item_data['bid_history']
 bidding_data = {}
 for bid in reversed(bidding):
 time = datetime.datetime.fromtimestamp(int(bid['time_of_bid_unix']))
 price = bid['bid']
 bidding_data[time] = price
 plt.plot(bidding_data.keys(), bidding_data.values(), linewidth=3)
 plt.title(title)
 plt.xlabel('Time')
 plt.ylabel('$ Price')
 number_of_bids = len(bidding)
 bidding_time_days = (int(notdatetime.time()) - int(bidding[-1]['time_of_bid_unix'])) / 3600 / 24
 bids_per_day = number_of_bids / bidding_time_days
 return {'title': title, 'description': description, 'starting_bid': starting_bid, 'current_bid': current_bid, 'minimum_bid': minimum_bid, 'bidding_data': bidding_data, 
 'bidding_plt': plt, 'number_of_bids': number_of_bids, 'bidding_time_days': bidding_time_days, 'bids_per_day': bids_per_day}

Question 2

In API.__init__, you're assigning a value to self.base_url. However, that value is never changed and thus is the same for every instance of API. In such cases, this should be a class field:

class API: # The parentheses are not needed if you're not inheriting from anything.
 BASE_URL = 'https://bwhj2cu1lu-dsn.algolia.net/1/indexes/hpitem/query?x-algolia-agent=Algolia%20for%20JavaScript%20(4.11.0)%3B%20Browser'

In API.set_tokens, the else in this section

 if self.test_search().status_code == 200:
 pass
 else:
 ...

is superfluous. You can simplify it to

 if self.test_search().status_code != 200:
 ...

In API.search, this line

 country_string = ' AND country:\\"{country}\\"'

is missing an f before the string.

There are several places where you make requests but don't check the status codes. What if the API changes or the site goes down? You need to gracefully inform the user of that.
Why are you renaming time to notdatetime?
In API.get_auction_items, you've got inconsistent typing for page_id. Even though Python is very loose with types, it's still good practice to maintain semi-consistent types. Why not just have 1 be the default value for page_id?
This is a matter of style but I'd run your code through an auto-formatter like black.

Question 3

5. I was running into a weird error that gets resolved when I rename time to something else

Question 4

I know you're supposed to avoid saying "thanks" but, thank you, I really appreciate you're feedback, I've implemented all of it.

Question 5

I'm very happy to help!

Question 6

What was the error?

Question 7

Payload

This code is pretty hard to read (and possibly prone to encoding errors):

data = f'{{"query":"","filters":"start_date <= {start_date} AND end_date > {end_date}{country_string}","facetFilters":["auction_phase:-cancelledAuction"],"hitsPerPage":100,"page":{page},"aroundLatLng":"{location}","aroundLatLngViaIP":false,"aroundRadius":100000}}'

Just use a plain dict for your arguments like:

 data = {
 "query":"",
 "filters": "",
 "start_date": ""
 }

And then you can either use the json or data argument in requests.post to send a properly-encoded payload depending on the type of endpoint (JSON vs multipart form). If it's really needed you can always "stringify" your dict at the end before sending the request. Reference

In fact, that's what you're doing in get_auction_items.

Harcoded values

Hardcoding the URLs in your class makes it inflexible. Consider these alternatives:

provide URLs in function arguments
using a config file

The point of writing a lib is to reuse it. So it has to be flexible and reasonably future-proof. Surely you have already imported plenty of third-party packages. And then you just start using them, you normally never have to edit the source code, unless you found a bug and you fix it.

Error handling

When you are fetching online resources you should 1) always check the status code as @Daniel Walker already mentioned 2) also verify that the data you expect in return is really present. Raise an error if those conditions are not met. Otherwise the rest of the code will not perform like it should. Maybe it will not crash but return empty data or inconsistent results. It may not even be obvious there was a problem during execution and that makes troubleshooting more difficult.

In fact, this is where your wrapper could perhaps provide some added value: by performing enhanced validation, so that the caller can have confidence that the results that are returned by your API are sound and accurate. A lib that returns garbage or unreliable data is not useful.

Formatting

Formatting can be improved. Get acquainted with PEP8, use proper indentation and line spacing, here there should be one empty line between functions inside your class.

PEP 8 suggests lines should be limited to 79 characters. I don't respect the rule absolutely strictly, but the point is to avoid excessive scrolling to read your code. Some lines are way too long because of the way you build the payloads.

Another line that is too long:

def search(self, search_type='auctions', query='', location='', country=False, page=0): #search for some listings based on query and location, leave fields blank to get all auctions

Move the comments to the next line, better yet triple-quote that section and make it a docstring. Inline comments are OK if you don't abuse them and they should be short (typically 1-2 words)

I recommend that you choose a good IDE (+ some optional plugins) to enforce code style as you write it.

Naming

Function names should be intuitive. The function test_search should rather be named test_tokens or something like that, to reflect its stated purpose. Since you already have set_tokens and generate_tokens that would bring some harmony and consistency. At least that's a start.

Likewise, you have a function get_auction_items - we can guess what it does. But the function search has a very broad name: without looking at the source code we are not sure what it's all about.

Question 8

Use the start position parameter of `.find(...)` instead of string slicing

This code uses a string slice:

end = start + response.text[start:].find('"')

It's simpler and probably more efficient to take advantage of the optional start parameter of find(...):

end = response.text.find('"', start)

Simplify the extraction of access tokens

The current logic of getting the access tokens is a little bit strange:

tokens = {'algoliaApplicationId':'', 'algoliaSearchAPIKey':''}
for token in tokens.keys():
 start = response.text.find(f'{token}:"') + len(f'{token}:"')
 end = start + response.text[start:].find('"')
 tokens[token] = response.text[start:end]
self.api_key = tokens['algoliaSearchAPIKey']
self.app_id = tokens['algoliaApplicationId']

To paraphrase what's happening:

create a dictionary with just keys and empty values
for each key, extract the token value and put it in the dictionary
set instance properties from the dictionary

This could be more straightforward:

self.api_key = extract_token(response.text, 'algoliaSearchAPIKey')
self.app_id = extract_token(response.text, 'algoliaApplicationId')

Where extract_token does essentially what the loop body did before.

Consider using regex to extract patterns

The code extracts tokens using the indexes of " from a the format name:"the wanted value". This is fine, but perhaps it would be more elegant to use regex for this:

def extract_token(text, key):
 match = re.search(f'{key}:"([^"]*)"', text)
 if match:
 return match.group(1)
 return ''

Simplify iterating over the keys of a dictionary

Instead of this:

for token in tokens.keys():

You can write simply:

for token in tokens:

Avoid repeated computations

In this code, search_type.lower() is executed twice:

if search_type.lower() not in ['auctions','items']:
 raise Exception("Search by 'auctions' (default) or 'items'")
search_type = ['hpauction', 'hpitem'][['auctions','items'].index(search_type.lower())]

It would be better to run it only once and store the result in a local variable.

Question 9

Regarding "Simplify the extraction of access tokens", would I simply create an "extract_token" function inside of the "generate_tokens" class method?

Question 10

Thank you for the feedback, it's been implemented

Daniel Walker Daniel Walker 4972 silver badges12 bronze badges · Answer 1 · 2022-09-23 02:59:45Z

In API.__init__, you're assigning a value to self.base_url. However, that value is never changed and thus is the same for every instance of API. In such cases, this should be a class field:

class API: # The parentheses are not needed if you're not inheriting from anything.
 BASE_URL = 'https://bwhj2cu1lu-dsn.algolia.net/1/indexes/hpitem/query?x-algolia-agent=Algolia%20for%20JavaScript%20(4.11.0)%3B%20Browser'

In API.set_tokens, the else in this section

 if self.test_search().status_code == 200:
 pass
 else:
 ...

is superfluous. You can simplify it to

 if self.test_search().status_code != 200:
 ...

In API.search, this line

 country_string = ' AND country:\\"{country}\\"'

is missing an f before the string.

There are several places where you make requests but don't check the status codes. What if the API changes or the site goes down? You need to gracefully inform the user of that.
Why are you renaming time to notdatetime?
In API.get_auction_items, you've got inconsistent typing for page_id. Even though Python is very loose with types, it's still good practice to maintain semi-consistent types. Why not just have 1 be the default value for page_id?
This is a matter of style but I'd run your code through an auto-formatter like black.

5. I was running into a weird error that gets resolved when I rename time to something else
I know you're supposed to avoid saying "thanks" but, thank you, I really appreciate you're feedback, I've implemented all of it.

Kate Kate 8,1089 silver badges21 bronze badges · Answer 2 · 2022-09-23 10:26:58Z

Payload

This code is pretty hard to read (and possibly prone to encoding errors):

data = f'{{"query":"","filters":"start_date <= {start_date} AND end_date > {end_date}{country_string}","facetFilters":["auction_phase:-cancelledAuction"],"hitsPerPage":100,"page":{page},"aroundLatLng":"{location}","aroundLatLngViaIP":false,"aroundRadius":100000}}'

Just use a plain dict for your arguments like:

 data = {
 "query":"",
 "filters": "",
 "start_date": ""
 }

And then you can either use the json or data argument in requests.post to send a properly-encoded payload depending on the type of endpoint (JSON vs multipart form). If it's really needed you can always "stringify" your dict at the end before sending the request. Reference

In fact, that's what you're doing in get_auction_items.

Harcoded values

Hardcoding the URLs in your class makes it inflexible. Consider these alternatives:

provide URLs in function arguments
using a config file

The point of writing a lib is to reuse it. So it has to be flexible and reasonably future-proof. Surely you have already imported plenty of third-party packages. And then you just start using them, you normally never have to edit the source code, unless you found a bug and you fix it.

Error handling

When you are fetching online resources you should 1) always check the status code as @Daniel Walker already mentioned 2) also verify that the data you expect in return is really present. Raise an error if those conditions are not met. Otherwise the rest of the code will not perform like it should. Maybe it will not crash but return empty data or inconsistent results. It may not even be obvious there was a problem during execution and that makes troubleshooting more difficult.

In fact, this is where your wrapper could perhaps provide some added value: by performing enhanced validation, so that the caller can have confidence that the results that are returned by your API are sound and accurate. A lib that returns garbage or unreliable data is not useful.

Formatting

Formatting can be improved. Get acquainted with PEP8, use proper indentation and line spacing, here there should be one empty line between functions inside your class.

PEP 8 suggests lines should be limited to 79 characters. I don't respect the rule absolutely strictly, but the point is to avoid excessive scrolling to read your code. Some lines are way too long because of the way you build the payloads.

Another line that is too long:

def search(self, search_type='auctions', query='', location='', country=False, page=0): #search for some listings based on query and location, leave fields blank to get all auctions

Move the comments to the next line, better yet triple-quote that section and make it a docstring. Inline comments are OK if you don't abuse them and they should be short (typically 1-2 words)

I recommend that you choose a good IDE (+ some optional plugins) to enforce code style as you write it.

Naming

Function names should be intuitive. The function test_search should rather be named test_tokens or something like that, to reflect its stated purpose. Since you already have set_tokens and generate_tokens that would bring some harmony and consistency. At least that's a start.

Likewise, you have a function get_auction_items - we can guess what it does. But the function search has a very broad name: without looking at the source code we are not sure what it's all about.

janos janos 113k15 gold badges154 silver badges396 bronze badges · Answer 3 · 2022-09-23 16:52:42Z

Use the start position parameter of `.find(...)` instead of string slicing

This code uses a string slice:

end = start + response.text[start:].find('"')

It's simpler and probably more efficient to take advantage of the optional start parameter of find(...):

end = response.text.find('"', start)

Simplify the extraction of access tokens

The current logic of getting the access tokens is a little bit strange:

tokens = {'algoliaApplicationId':'', 'algoliaSearchAPIKey':''}
for token in tokens.keys():
 start = response.text.find(f'{token}:"') + len(f'{token}:"')
 end = start + response.text[start:].find('"')
 tokens[token] = response.text[start:end]
self.api_key = tokens['algoliaSearchAPIKey']
self.app_id = tokens['algoliaApplicationId']

To paraphrase what's happening:

create a dictionary with just keys and empty values
for each key, extract the token value and put it in the dictionary
set instance properties from the dictionary

This could be more straightforward:

self.api_key = extract_token(response.text, 'algoliaSearchAPIKey')
self.app_id = extract_token(response.text, 'algoliaApplicationId')

Where extract_token does essentially what the loop body did before.

Consider using regex to extract patterns

The code extracts tokens using the indexes of " from a the format name:"the wanted value". This is fine, but perhaps it would be more elegant to use regex for this:

def extract_token(text, key):
 match = re.search(f'{key}:"([^"]*)"', text)
 if match:
 return match.group(1)
 return ''

Simplify iterating over the keys of a dictionary

Instead of this:

for token in tokens.keys():

You can write simply:

for token in tokens:

Avoid repeated computations

In this code, search_type.lower() is executed twice:

if search_type.lower() not in ['auctions','items']:
 raise Exception("Search by 'auctions' (default) or 'items'")
search_type = ['hpauction', 'hpitem'][['auctions','items'].index(search_type.lower())]

It would be better to run it only once and store the result in a local variable.

Regarding "Simplify the extraction of access tokens", would I simply create an "extract_token" function inside of the "generate_tokens" class method?

Stack Exchange Network

Auctionsite API Wrapper in Python

3 Answers 3

Payload

Harcoded values

Error handling

Formatting

Naming

Use the start position parameter of `.find(...)` instead of string slicing

Simplify the extraction of access tokens

Consider using regex to extract patterns

Simplify iterating over the keys of a dictionary

Avoid repeated computations

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Auctionsite API Wrapper in Python

3 Answers 3

Payload

Harcoded values

Error handling

Formatting

Naming

Use the start position parameter of .find(...) instead of string slicing

Simplify the extraction of access tokens

Consider using regex to extract patterns

Simplify iterating over the keys of a dictionary

Avoid repeated computations

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions

Use the start position parameter of `.find(...)` instead of string slicing