Python script for to evaluate business data

Question 1

The following script is part of a further education I'm currently enrolled into.

Not all of the code is written by myself. Function signatures for example. Therefore I have put the sections, written by myself, put into # -- Own ---- comment-lines.

Moreover you were given three CSV-files with business data from different cities. The imaginary business was a bikeshare-company.

#!/usr/local/bin/python3
import time
import pandas as pd
import numpy as np
# --- Own Start ----------------------------------------------------------
CITY_DATA = { 'chicago': 'chicago.csv',
 'new york city': 'new_york_city.csv',
 'washington': 'washington.csv' }
feasible_cities = [ "new york city", "chicago", "washington" ]
feasible_months = [ "january", "february", "march", "april", "may", "june", "all" ]
feasible_days = [ "monday", "tuesday", "wednesday", "thursday",
 "friday", "saturday", "sunday", "all" ]
def ask_user_selection(options, prompt_message):
 answer = ""
 while len(answer) == 0:
 answer = input(prompt_message)
 answer = answer.strip().lower()
 if answer in options:
 return answer
 else:
 answer = ""
 print("Please enter one of the offered options.\n")
# -- Own END -----------------------------------------------------------------------------------
def get_filters():
 """
 Asks user to specify a city, month, and day to analyze.
 Returns:
 (str) city - name of the city to analyze
 (str) month - name of the month to filter by, or "all" to apply no month filter
 (str) day - name of the day of week to filter by, or "all" to apply no day filter
 """
 print('\n ---- Hello! Let\'s explore some US bikeshare data! ----\n')
 # --- Own Start ----------------------------------------------------------
 # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
 city = ask_user_selection(
 feasible_cities,
 "Please enter: 'new york city', 'chicago' or 'washington' > ")
 # get user input for month (all, january, february, ... , june)
 month = ask_user_selection(
 feasible_months, 
 "Please enter month: 'january', 'february', 'march', 'april', 'may', 'june' or 'all' > ")
 # get user input for day of week (all, monday, tuesday, ... sunday)
 day = ask_user_selection(
 feasible_days,
 "Please enter day: 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday' or 'all' > ")
 print('-'*40)
 return city, month, day
 # --- Own End ----------------------------------------------------------
def load_data(city, month, day):
 """
 Loads data for the specified city and filters by month and day if applicable.
 Args:
 (str) city - name of the city to analyze
 (str) month - name of the month to filter by, or "all" to apply no month filter
 (str) day - name of the day of week to filter by, or "all" to apply no day filter
 Returns:
 df - Pandas DataFrame containing city data filtered by month and day
 """
 # --- Own Start ----------------------------------------------------------
 df = pd.read_csv(CITY_DATA[city], index_col = 0)
 df['Start Time'] = pd.to_datetime(df['Start Time']) # Casting "Start Time" to datetime.
 df["month"] = df['Start Time'].dt.month # Get the weekday out of the "Start Time" value.
 df["week_day"] = df['Start Time'].dt.weekday_name # Month-part from "Start Time" value.
 df["start_hour"] = df['Start Time'].dt.hour # Hour-part from "Start Time" value. 
 df["start_end"] = df['Start Station'].astype(str) + ' to ' + df['End Station']
 if month != 'all':
 month_index = feasible_months.index(month) + 1 # Get the list-index of the month.
 df = df[df["month"] == month_index ] # Establish a filter for month.
 if day != 'all':
 df = df[df["week_day"] == day.title() ] # Establish a filter for week day.
 return df
 # --- Own End ----------------------------------------------------------
def time_stats(df):
 """Displays statistics on the most frequent times of travel."""
 # --- Own Start ----------------------------------------------------------
 print('\nCalculating The Most Frequent Times of Travel ... \n')
 start_time = time.time()
 # display the most common month
 month_index = df["month"].mode()[0] - 1
 most_common_month = feasible_months[month_index].title()
 print("Most common month: ", most_common_month)
 # display the most common day of week
 most_common_day = df["week_day"].mode()[0]
 print("Most common day: ", most_common_day)
 # display the most common start hour
 most_common_hour = df["start_hour"].mode()[0]
 print("Most common hour: ", most_common_hour)
 print("\nThis took %s seconds." % (time.time() - start_time))
 print('-'*40)
 # --- Own End ----------------------------------------------------------
def station_stats(df):
 """Displays statistics on the most popular stations and trip."""
 # --- Own Start ----------------------------------------------------------
 print('\nCalculating The Most Popular Stations and Trip ...\n')
 start_time = time.time()
 # display most commonly used start station
 most_used_start = df['Start Station'].mode()[0]
 print("Most used start: ", most_used_start)
 # display most commonly used end station
 most_used_end = df['End Station'].mode()[0]
 print("Most used end: ", most_used_end)
 # display most frequent combination of start station and end station trip
 most_common_combination = df["start_end"].mode()[0]
 print("Most common used combination concerning start- and end-station: ", 
 most_common_combination)
 print("\nThis took %s seconds." % (time.time() - start_time))
 print('-'*40)
 # --- Own End ----------------------------------------------------------
def trip_duration_stats(df):
 """Displays statistics on the total and average trip duration."""
 # --- Own Start ----------------------------------------------------------
 print("\nCalculating Trip Duration ...\n")
 start_time = time.time()
 # display total travel time
 total_travel_time = df["Trip Duration"].sum()
 print("Total time of travel: ", total_travel_time)
 # display mean travel time
 average_time = df["Trip Duration"].mean()
 print("The average travel-time: ", average_time)
 print("\nThis took %s seconds." % (time.time() - start_time))
 print('-'*40)
 # --- Own End ----------------------------------------------------------
def user_stats(df):
 """Displays statistics on bikeshare users."""
 # --- Own Start ----------------------------------------------------------
 print('\nCalculating User Stats ...\n')
 start_time = time.time()
 # Display counts of user types
 print("Count of user types: ", 
 df["User Type"].value_counts())
 # Display counts of gender
 if "Gender" in df:
 print("\nCounts concerning client`s gender")
 print("Male persons: ", df.query("Gender == 'Male'").Gender.count())
 print("Female persons: ", df.query("Gender == 'Female'").Gender.count())
 # Display earliest, most recent, and most common year of birth
 if "Birth Year" in df:
 print("\nEarliest year of birth: ", df["Birth Year"].min())
 print("Most recent year of birth: ", df["Birth Year"].max())
 print("Most common year of birth: ", df["Birth Year"].value_counts().idxmax())
 print("\nThis took %s seconds." % (time.time() - start_time))
 print('-'*40)
 # --- Own End ----------------------------------------------------------
def main():
 while True:
 city, month, day = get_filters() 
 df = load_data(city, month, day)
 time_stats(df)
 station_stats(df)
 trip_duration_stats(df)
 user_stats(df)
 # --- Own Start ----------------------------------------------------------
 restart = input('\nWould you like to restart? Enter yes or no.\n')
 if restart.lower() != 'yes':
 break
 # --- Own End ----------------------------------------------------------
if __name__ == "__main__":
 main()

Here's a screenshot how it looks on the command line:

enter image description here

The script has passed the review. But nevertheless I would appreciate other opinions.

What have I done well and should keep it up? What could I have done better and why?

Question 2

The ask_user_selection function could be implemented a bit simpler, by using a while True: loop, and an early return:

def ask_user_selection(options, prompt_message):
 while True:
 answer = input(prompt_message).strip().lower()
 if answer in options:
 return answer
 print("Please enter one of the offered options.\n")

janos janos 113k15 gold badges154 silver badges396 bronze badges · Accepted Answer · 2018-12-16 22:26:39Z

The ask_user_selection function could be implemented a bit simpler, by using a while True: loop, and an early return:

def ask_user_selection(options, prompt_message):
 while True:
 answer = input(prompt_message).strip().lower()
 if answer in options:
 return answer
 print("Please enter one of the offered options.\n")

Stack Exchange Network

Python script for to evaluate business data

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Python script for to evaluate business data

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions