The following script is part of a further education I'm currently enrolled into.
Not all of the code is written by myself. Function signatures for example. Therefore I have put the sections, written by myself, put into # -- Own ----
comment-lines.
Moreover you were given three CSV-files with business data from different cities. The imaginary business was a bikeshare-company.
#!/usr/local/bin/python3
import time
import pandas as pd
import numpy as np
# --- Own Start ----------------------------------------------------------
CITY_DATA = { 'chicago': 'chicago.csv',
'new york city': 'new_york_city.csv',
'washington': 'washington.csv' }
feasible_cities = [ "new york city", "chicago", "washington" ]
feasible_months = [ "january", "february", "march", "april", "may", "june", "all" ]
feasible_days = [ "monday", "tuesday", "wednesday", "thursday",
"friday", "saturday", "sunday", "all" ]
def ask_user_selection(options, prompt_message):
answer = ""
while len(answer) == 0:
answer = input(prompt_message)
answer = answer.strip().lower()
if answer in options:
return answer
else:
answer = ""
print("Please enter one of the offered options.\n")
# -- Own END -----------------------------------------------------------------------------------
def get_filters():
"""
Asks user to specify a city, month, and day to analyze.
Returns:
(str) city - name of the city to analyze
(str) month - name of the month to filter by, or "all" to apply no month filter
(str) day - name of the day of week to filter by, or "all" to apply no day filter
"""
print('\n ---- Hello! Let\'s explore some US bikeshare data! ----\n')
# --- Own Start ----------------------------------------------------------
# get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
city = ask_user_selection(
feasible_cities,
"Please enter: 'new york city', 'chicago' or 'washington' > ")
# get user input for month (all, january, february, ... , june)
month = ask_user_selection(
feasible_months,
"Please enter month: 'january', 'february', 'march', 'april', 'may', 'june' or 'all' > ")
# get user input for day of week (all, monday, tuesday, ... sunday)
day = ask_user_selection(
feasible_days,
"Please enter day: 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday' or 'all' > ")
print('-'*40)
return city, month, day
# --- Own End ----------------------------------------------------------
def load_data(city, month, day):
"""
Loads data for the specified city and filters by month and day if applicable.
Args:
(str) city - name of the city to analyze
(str) month - name of the month to filter by, or "all" to apply no month filter
(str) day - name of the day of week to filter by, or "all" to apply no day filter
Returns:
df - Pandas DataFrame containing city data filtered by month and day
"""
# --- Own Start ----------------------------------------------------------
df = pd.read_csv(CITY_DATA[city], index_col = 0)
df['Start Time'] = pd.to_datetime(df['Start Time']) # Casting "Start Time" to datetime.
df["month"] = df['Start Time'].dt.month # Get the weekday out of the "Start Time" value.
df["week_day"] = df['Start Time'].dt.weekday_name # Month-part from "Start Time" value.
df["start_hour"] = df['Start Time'].dt.hour # Hour-part from "Start Time" value.
df["start_end"] = df['Start Station'].astype(str) + ' to ' + df['End Station']
if month != 'all':
month_index = feasible_months.index(month) + 1 # Get the list-index of the month.
df = df[df["month"] == month_index ] # Establish a filter for month.
if day != 'all':
df = df[df["week_day"] == day.title() ] # Establish a filter for week day.
return df
# --- Own End ----------------------------------------------------------
def time_stats(df):
"""Displays statistics on the most frequent times of travel."""
# --- Own Start ----------------------------------------------------------
print('\nCalculating The Most Frequent Times of Travel ... \n')
start_time = time.time()
# display the most common month
month_index = df["month"].mode()[0] - 1
most_common_month = feasible_months[month_index].title()
print("Most common month: ", most_common_month)
# display the most common day of week
most_common_day = df["week_day"].mode()[0]
print("Most common day: ", most_common_day)
# display the most common start hour
most_common_hour = df["start_hour"].mode()[0]
print("Most common hour: ", most_common_hour)
print("\nThis took %s seconds." % (time.time() - start_time))
print('-'*40)
# --- Own End ----------------------------------------------------------
def station_stats(df):
"""Displays statistics on the most popular stations and trip."""
# --- Own Start ----------------------------------------------------------
print('\nCalculating The Most Popular Stations and Trip ...\n')
start_time = time.time()
# display most commonly used start station
most_used_start = df['Start Station'].mode()[0]
print("Most used start: ", most_used_start)
# display most commonly used end station
most_used_end = df['End Station'].mode()[0]
print("Most used end: ", most_used_end)
# display most frequent combination of start station and end station trip
most_common_combination = df["start_end"].mode()[0]
print("Most common used combination concerning start- and end-station: ",
most_common_combination)
print("\nThis took %s seconds." % (time.time() - start_time))
print('-'*40)
# --- Own End ----------------------------------------------------------
def trip_duration_stats(df):
"""Displays statistics on the total and average trip duration."""
# --- Own Start ----------------------------------------------------------
print("\nCalculating Trip Duration ...\n")
start_time = time.time()
# display total travel time
total_travel_time = df["Trip Duration"].sum()
print("Total time of travel: ", total_travel_time)
# display mean travel time
average_time = df["Trip Duration"].mean()
print("The average travel-time: ", average_time)
print("\nThis took %s seconds." % (time.time() - start_time))
print('-'*40)
# --- Own End ----------------------------------------------------------
def user_stats(df):
"""Displays statistics on bikeshare users."""
# --- Own Start ----------------------------------------------------------
print('\nCalculating User Stats ...\n')
start_time = time.time()
# Display counts of user types
print("Count of user types: ",
df["User Type"].value_counts())
# Display counts of gender
if "Gender" in df:
print("\nCounts concerning client`s gender")
print("Male persons: ", df.query("Gender == 'Male'").Gender.count())
print("Female persons: ", df.query("Gender == 'Female'").Gender.count())
# Display earliest, most recent, and most common year of birth
if "Birth Year" in df:
print("\nEarliest year of birth: ", df["Birth Year"].min())
print("Most recent year of birth: ", df["Birth Year"].max())
print("Most common year of birth: ", df["Birth Year"].value_counts().idxmax())
print("\nThis took %s seconds." % (time.time() - start_time))
print('-'*40)
# --- Own End ----------------------------------------------------------
def main():
while True:
city, month, day = get_filters()
df = load_data(city, month, day)
time_stats(df)
station_stats(df)
trip_duration_stats(df)
user_stats(df)
# --- Own Start ----------------------------------------------------------
restart = input('\nWould you like to restart? Enter yes or no.\n')
if restart.lower() != 'yes':
break
# --- Own End ----------------------------------------------------------
if __name__ == "__main__":
main()
Here's a screenshot how it looks on the command line:
The script has passed the review. But nevertheless I would appreciate other opinions.
What have I done well and should keep it up? What could I have done better and why?
1 Answer 1
The ask_user_selection
function could be implemented a bit simpler,
by using a while True:
loop, and an early return:
def ask_user_selection(options, prompt_message):
while True:
answer = input(prompt_message).strip().lower()
if answer in options:
return answer
print("Please enter one of the offered options.\n")