As part of the hiring process (QA position), I received as a homework assignment (which involves independent study) the following task:
Create a full automation test suite (in Python3), so that when our developers finish writing the code, we will all be able to immediately test the code.
Note: This task should be completed without using the actual binary file. We recommend that you create your own mock file in working on this task.
./MyBinary --add [[int]] Adds an integer to an online database that the binary is connected to. Example: ./MyBinary --add 5678
./MyBinary --clear Deletes all the integers from the online database that the binary is connected to
./MyBinary --isSorted Prints whether all the numbers in the database (since the start/last clear) were added from smallest number to biggest. Output to the standard output: "True" or "False"
./MyBinary --count [[int]] Prints how many times this number is stored in the database (since the start/last clear) Example: ./MyBinary --count 5678 Output to the standard output: "0" or "1" or "2" or .... ./MyBinary --getAll Returns all the numbers stored in the database (the number returns in a random order, not necessarily the order they were inserted). Example output to the standard output: "5 17 3 25 4 25 6 25"
In order to carry out this task, I created Application.py as a mock application, which can run as a CLI application. Then I created TestSuite.py including all the test cases with comments .
Here is what I have done. I would like to know how I might improve it.
Application.py
import click
import csv
import os
FILE = 'data.csv'
@click.command()
@click.option('--add', help='Adds an integer to an online database that the binary is connected to.')
@click.option('--clear',
help='Deletes all the integers from the online database that the binary is connected to')
@click.option('--issorted', help='Prints whether all the numbers in the database')
@click.option('--getall', help='Prints how many times this number is stored in the database')
@click.option('--count', help='Returns all the numbers stored in the database ')
def main(add, clear, issorted, getall, count):
if add:
"""
./MyBinary --add [[int]]
Adds an integer to an online database that the binary is connected to.
Example: ./MyBinary --add 5678
"""
try:
with open(FILE, 'a', newline='') as file:
writer = csv.writer(file)
writer.writerow([add])
file.close()
return
except Exception as er:
return er
elif clear:
"""
./MyBinary --clear
Deletes all the integers from the online database that the binary is connected to
"""
if os.path.exists(FILE):
os.remove(FILE)
elif issorted:
"""
./MyBinary --isSorted
Prints whether all the numbers in the database (since the start/last clear) were added
from smallest number to biggest.
Output to the standard output: "True" or "False"
"""
with open(FILE, 'r') as f:
data = f.read()
l = [int(i) for i in data.split()]
print(all(l[i] <= l[i+1] for i in range(len(l)-1)))
elif count:
"""
./MyBinary --count [[int]]
Prints how many times this number is stored in the database (since the start/last
clear)
Example: ./MyBinary --count 5678
Output to the standard output: "0" or "1" or "2" or ....
"""
try:
results = []
with open(FILE) as file:
reader = csv.reader(file)
for row in reader:
if int(count) == int(row[0]):
results.append(int(row[0]))
file.close()
print(len(results))
except Exception as er:
print(er)
elif getall:
"""
./MyBinary --getAll
Returns all the numbers stored in the database (the number returns in a random
order, not necessarily the order they were inserted).
Example output to the standard output: "5 17 3 25 4 25 6 25"
"""
with open(FILE, 'r') as f:
data = f.read()
print([int(i) for i in data.split()])
if __name__ == '__main__':
main()
TestSuite.py
import pytest
import numpy as np
import subprocess
from subprocess import check_output
class TestSuite:
def __init__(self):
"""
CLI Application path, this should be the exact path to the application executable
"""
self.applicationPath = "mockApplication/Application.py"
def test_add(self, newNumber):
""" Test Case 01
Adds an integer to an online database that the binary is connected to.
Iterate for all array elements of the random array.
No output should be there.
"""
try:
result = subprocess.run(["python", self.applicationPath, "--add", str(newNumber)])
print(result)
assert result.returncode is 0
except Exception as e:
print(e)
def test_clear(self):
""" Test Case 2
Deletes all the integers from the data store. There should not be any value after executing this function.
No output
"""
result = subprocess.run(["python", self.applicationPath, "--clear", " "])
assert result.stdout is None
def test_isSorted(self, expected):
"""
Test Case 3
Prints whether all the numbers in the database were added
from smallest number to biggest.
Output to the standard output: "True" or "False"
"""
out = check_output(["python", self.applicationPath, "--issorted", " "])
assert expected is bool(out.decode('utf-8'))
def test_count(self, number, expected):
"""
Test case 4
Prints how many times a given number is stored in the database.
"""
out = check_output(["python", self.applicationPath, "--count", str(number)])
assert expected is int(out.decode('utf-8'))
def test_getAll(self, expected):
""" Test Case 5
Returns all the numbers stored in the database ,
not necessarily the order they were inserted.
"""
out = check_output(["python", self.applicationPath, "--getall", " "])
str1 = out.decode('utf-8').replace(']', '').replace('[', '').replace("\r\n", "")
arr = str1.split(",")
l = [int(x) for x in arr]
assert expected == l
if __name__ == '__main__':
"""
Generate a random integer array with 5 numbers between 1 and 10
to be used as the test data for this test suite
"""
testData = [1, 2, 3, 4, 10, 100, 1000, 10000]
testDataFinal = [1, 2, 3, 4, 10, 100, 1000, 10000, 10]
specialNumber = 10
# initialize test suite
testSuite = TestSuite()
# clear data
testSuite.test_clear()
# add numbers
for number in testData:
testSuite.test_add(number)
# check number of occurrences for 10, there should be 1
testSuite.test_count(10, 1)
# check is sorted
testSuite.test_isSorted(True)
# add special number
testSuite.test_add(specialNumber)
# check number of occurrences for 10, there should be 2
testSuite.test_count(specialNumber, 2)
1 Answer 1
Application.py
Your mock application has a few problems. There are also some improvements to be made regarding code style and structure.
Bugs
--isSorted
and --getAll
will currently not work with the spelling specified by the problem statement.
@click.option('--issorted', help='...')
@click.option('--getall', help='...')
should become
@click.option('--isSorted', help='...')
@click.option('--getAll', help='...')
--clear
, --isSorted
and --getAll
should not expect arguments. I'm not too familiar with the click
library, so there might be a better solution than mine. I simply set is_flag
for these click.option
s:
@click.option('--clear', is_flag=True, help='...')
@click.option('--isSorted', is_flag=True, help='...')
@click.option('--getAll', is_flag=True, help='...')
--isSorted
, --count
and --getAll
will fail if FILE
does not exist (e.g. after --clear
). If FILE
does not exist:
--isSorted
should print True
--count
should print 0
--getAll
should print an empty line or nothing (not specified in your question)
--getAll
will not print the data in the specified format.
Desired output: 5 17 3 25 4 25 6 25
Current output: [5, 17, 3, 25, 4, 25, 6, 25]
To achieve the desired output, you need to unpack the list inside the print
call like this:
print(*[int(i) for i in data.split()])
# or: print(*list(map(int, data.split())))
Improvements / Suggestions
pathlib
I would recommend the pathlib
module from the standard library for handling files and file paths:
from pathlib import Path
FILE = Path('data.csv')
Checking if a file exists is as simple as:
FILE.exists()
Deleting the file is also really simple:
FILE.unlink(missing_ok=True)
Naming
Stick to either with open(FILE, ...) as file:
or with open(FILE, ...) as f:
. Consistent naming improves readability.
l
is not recommended as variable name, as it might be confused with I
or 1
depending on the font. Same goes for O
and 0
. A descriptive variable name is usually preferable to a single letter (except for well-known conventions, such as for i in range(...)
).
l = [int(i) for i in data.split()]
should become something like
numbers = [int(i) for i in data.split()]
Manually closing files
You're already using context managers. They handle closing the file for you, so there's no need for:
with open(FILE, 'a', newline='') as file:
...
file.close()
csv.writer
I'd say we don't need a csv.writer
for this simple use case. The following should suffice:
with open(FILE, 'a') as file:
print(add, file=file)
# alternative: file.write(f"{add}\n")
Exception handling
Your exception handling is not consistent. If an exception occurs when calling the script with the --add
argument, it returns the exception. When calling with --count
you simply print the exception. All other arguments do not handle exceptions at all.
Depending on the problem statement given to you, exception handling might or might not be necessary. If it is necessary, it should be consistent and provide useful information for the user.
isSorted
Iterating over an index is rather unpythonic. Instead I would recommend using zip
:
print(all(l[i] <= l[i+1] for i in range(len(l)-1)))
becomes
print(all(x <= y for x, y in zip(numbers, numbers[1:])))
read_data()
As we need to read from FILE
and convert it to a list
of int
s multiple times I would put it into its own function:
def read_data():
if not FILE.exists():
return []
with open(FILE, 'r') as f:
data = f.read()
return list(map(int, data.split()))
This makes the rest of the code simpler, shorter and easier to read.
count
There's no need to keep a results
list for counting occurences of a number in our data:
result = 0
with open(FILE) as file:
reader = csv.reader(file)
for row in reader:
if int(count) == int(row[0]):
result += 1
print(result)
Using read_data()
:
numbers = read_data()
result = 0
for number in numbers:
if number == int(count):
result += 1
print(result)
There's also no need to manually count occurences in a list
:
numbers = read_data()
print(numbers.count(int(count)))
Full code
Here's the full code without comments and docstrings. I also removed exception handling for now. As I said I recommend implementing consistent exception handling that fits the problem statement.
import click
from pathlib import Path
FILE = Path('data.csv')
def read_data():
if not FILE.exists():
return []
with open(FILE, 'r') as f:
data = f.read()
return list(map(int, data.split()))
@click.command()
@click.option('--add', help='Adds an integer to an online database that the binary is connected to.')
@click.option('--clear', is_flag=True,
help='Deletes all the integers from the online database that the binary is connected to')
@click.option('--isSorted', is_flag=True, help='Prints whether all the numbers in the database')
@click.option('--getAll', is_flag=True, help='Prints how many times this number is stored in the database')
@click.option('--count', help='Returns all the numbers stored in the database ')
def main(add, clear, issorted, getall, count):
if add:
with open(FILE, 'a') as file:
print(add, file=file)
elif clear:
FILE.unlink(missing_ok=True)
elif issorted:
numbers = read_data()
print(all(x <= y for x, y in zip(numbers, numbers[1:])))
elif count:
numbers = read_data()
print(numbers.count(int(count)))
elif getall:
numbers = read_data()
print(*numbers)
if __name__ == '__main__':
main()
-
\$\begingroup\$ Great answer! Something that stuck out to me was using
x <= y for x, y in zip(numbers, numbers[1:])
overl[i] <= l[i+1] for i in range(len(l)-1)
. Is the first version not slower since we are making a copy of the original list and slicing it? In addition I guess thezip
version fails if you only have one number. E.gx <= y for x, y in zip([0]+numbers, numbers)
should fix it ^^ \$\endgroup\$N3buchadnezzar– N3buchadnezzar2021年06月12日 10:54:26 +00:00Commented Jun 12, 2021 at 10:54 -
1\$\begingroup\$ Thanks for your suggestions! Your assumed bug does not exist, the
zip
version works lists of any size (as[][1:]
and[1][1:]
both prduce an empty list[]
). In fact, prepending[0]
would introduce a bug for sorted lists that include negative numbers. As for the runtime of iterating over indices or azip object
, it depends on the case. The index approach is only faster if the check fails quickly (i.e. very small lists or unsorted elements towards the very front of the list). At least those are the results on my machine. Why exactly that's the case, I do not know. \$\endgroup\$riskypenguin– riskypenguin2021年06月12日 12:14:16 +00:00Commented Jun 12, 2021 at 12:14