Below is a script to find cheating students in a quiz in a daily moodle activity log exported in .xls. It works fine and is written in a procedural way.
Essentially the script isolates the activity log entry in a worksheet for students listed in another worksheet, identifies when they are taking a quiz (looking ahead in the activity log) and if they are taking an unusually long time or doing something different while taking a quiz (they will complete). These different scenarios are then highlighted in a different row colour in an xl file listing all activity for each student in the list. The script also writes to a .txt file, logging a summary for each student and all quizzes completed and the students listed who did not take any quizzes.
The questions are simple:
- How can I make it run faster?
- How can I write it in a OOP paradigm, and will it run faster?
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# The master data set is contained in excel file ('moodle.xls'). The toponomy for the activity is the one in a 2.5+ moodle instance. The colums in excel follow the format
# columns = module name/date,time/(Not used):e.g. IP address/name student/activity type (full)/activity name
# rows = chronological order in time period
# The class (moodlecheat) returns: a log containing a summary of cheats per students, a set of individual xl files with a log per student with quiz question highlighted.
# Each row is colour coded to facilitate detection: yellow/any valid quiz question answered | light yellow/any question which is answered in more than 2 minutes | grey/any non quiz activity during a quiz | Red/any activity taking longer than 5 minutes during a quiz
# The data listed in a log (log.tx) which details the running times per module and the activity for all students highlighting any suspected cheating
# Worksheet 1 = xl report for moodle activity log for the time period e.g. a day
# Worksheet 2 = Name of users/students whose quiz data is to be mined out
# The typical running time for the class on a 8GB Mac with dual processor OSX 10.9.3 is 31 seconds to find the quiz activity of 120 students for 3 quizzes amongst 12k students records
from mmap import mmap, ACCESS_READ
from xlrd import open_workbook, XL_CELL_TEXT, cellname, cellnameabs, colname
import xlwt
from xlwt import Workbook, easyxf
#from xlutils.styles import Styles
from operator import itemgetter
from time import gmtime, strftime, strptime, clock
from datetime import datetime, timedelta
from xlutils.copy import copy
class moodlecheat(object):
mois={'january':'jan','February':'Feb','March':'Mar','April':'Apr','May':'May','June':'Jun','July':'Jul','August':'Aug','September':'Sep','October':'Oct','November':'Nov','December':'Dec'}
def __init__(self, logfilepath, sourcefilepath):
self.sourcefilepath = sourcefilepath
self.logfilepath=logfilepath
self.studentsentries=[]
self.xlout=[]
self.validquizzes=[]
self.donequizzes=[]
self.listofquizzes=[]
self.inactivestudents=[]
self.activestudents=[]
self.stylo = {1:easyxf('pattern: pattern no_fill;' 'font: name Arial, height 260, italic True'),
2:easyxf('pattern: pattern solid_fill, fore_colour yellow;' 'font: name Arial, height 260'),
3:easyxf('pattern: pattern solid_fill, fore_colour gray50;' 'font: name Arial, height 260'),
4:easyxf('pattern: pattern solid_fill, fore_colour light_yellow;' 'font: name Arial, height 260, bold True'),
5:easyxf('pattern: pattern solid_fill, fore_colour light_orange;' 'font: name Arial, height 260')}
# ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
# 0 1 2 3 4 5
# studentsentries module name date,time IP address name student activity type (full) activity name
# xlout name student, time activity name activity type (short) time since previous question xl file colour row
# valid quizzes name student activity name number of question answered in quiz.
# done quizzes name student activity name
# done quizzes name student activity name
# list of quizzes activity name
# inactivestudents name student
# active students name student flag student taking quiz
# 0 1
# /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
# ------------------------------- Functions -------------------------
# function returning True when a student is taking a quiz
def takingquiz(self,item,series):
position = [(i, name.index(item)) for i, name in enumerate(series) if item in name]
return (series[position[0][0]][(position[0][1])+1])
# function to return a moodle date in date.time/python convention <y=01 m=11 d=1 sec=50 msec=00> <- unicode strings/moodle moodle convention <01 September 2001, 11:50AM>
def udconv(self,datein):
d=datein.split(',')[0]
d1=datein.split(',')[1]
d2=d+d1
d3 = moodlecheat.mois[d2.split(' ')[1]]
d4=d2.split(' ')[0]+' '+d3+' '+d2.split(' ')[2]+' '+d2.split(' ')[3]+d2.split(' ')[4]
d5 = datetime.strptime(d4, '%d %b %Y %I:%M%p')
return d5
# function to calculate between 2 dates in unicode string/moodle log format e.g. <'01 September 2001, 11:50AM'> -> time/python convention
def timegap(self,i,j):
self.gap = self.udconv(i) - self.udconv(j)
return self.gap
# ---------------------------- Methods -----------------------------------------------
def openworkbook(self):
self.wb = open_workbook(self.sourcefilepath+'activitylog.xlsx', on_demand=True)
def extractstudentslist(self):
# Extract the list [activestudents] of all students from the 3rd worksheet (list student names) in moodle.xlsx
self.sh = self.wb.sheet_by_index(1)
self.activestudents = [[x,False] for x in self.sh.col_values(0)]
def extractstudentsentries(self):
# Extract activity from the master dataset <studentsentries> for all the students listed in <activestudents>
self.sh = self.wb.sheet_by_index(0)
for r in range(self.sh.nrows):
if [self.sh.cell_value(r,3),False] in self.activestudents:
self.studentsentries.append(self.sh.row_values(r))
def listquizzes(self):
# list all quizes that were completed into <validquizzes>
for x in self.studentsentries:
if x[4][:10]=='quiz close' and [x[3],x[5]] not in self.validquizzes:
self.validquizzes.append([x[3],x[5],-1])
def flip_quiz(self,item,series):
# method to change the student record to show taking a quiz
position = [(i, name.index(item)) for i, name in enumerate(series) if item in name]
(series[position[0][0]][(position[0][1])+1])=not(series[position[0][0]][(position[0][1])+1])
def logtimestamp(self,message):
# writes <message> in log.txt at sourcefilepath
self.message=message
flog=open (self.logfilepath+'log.txt', "a")
flog.write('\n == '+self.message+ ' at '+strftime("%Y-%m-%d %H:%M:%S", gmtime()))
print self.message
def logmessage(self,message):
# writes <message> in log.txt at logfilepath
self.message=message
flog=open (self.logfilepath+'log.txt', "a")
flog.write('\n'+self.message)
def perstudentquizreport(self):
# writes start and completion times for each students
self.logmessage ('\n\nReport on quizzes')
self.logmessage ('=================\n')
self.oldtime = str('25 April 2014, 11:41 AM')
for m in (sorted(reversed(self.studentsentries), key=itemgetter(3))):
self.sc=self.stylo[1]
# If student starting a quiz and quiz has not been completed yet => log the entries into [quizzdb], log the quizz name into [validquizzes], flip the flag to show student is taking a quiz
if (m[4].split(' (')[0]=='quiz continue attempt' or m[4].split(' (')[0]=='quiz attempt' ) and (self.takingquiz(m[3],self.activestudents)==False) and (any(p[0]==m[3] and p[1]==m[5] for p in self.validquizzes)) and not (any(p[0]==m[3] and p[1]==m[5] for p in self.donequizzes)):
self.flip_quiz(m[3],self.activestudents)
self.logmessage (m[3]+' has made first answer in the quiz '+m[5]+ ' at '+m[1].split(', ')[1])
if (m[4].split(' (')[0]=='quiz continue attempt' or m[4].split(' (')[0]=='quiz attempt' ) and (self.takingquiz(m[3],self.activestudents)==False) and (any(p[0]==m[3] and p[1]==m[5] for p in self.validquizzes)) and (any(p[0]==m[3] and p[1]==m[5] for p in self.donequizzes)):
self.flip_quiz(m[3],self.activestudents)
self.logmessage (m[3]+' retook the quiz '+m[5]+ ' at '+m[1].split(', ')[1]+' - This is an issue if the quiz is not a practice quiz')
elif m[4][:10]=='quiz close' and (any(p[0]==m[3] and p[1]==m[5] for p in self.validquizzes)): # finished a quiz
self.donequizzes.append([m[3],m[5]])
self.flip_quiz(m[3],self.activestudents)
self.logmessage(m[3]+' has finished the quiz '+m[5]+ ' at '+m[1].split(', ')[1])
self.logmessage(('-')*130)
if (self.takingquiz(m[3],self.activestudents)==True):
if m[4][:4]!='quiz': # unusual action during the quiz
self.logmessage('!! -- '+unicode(m[4].split('(')[0])+' '+unicode(m[5])+' at '+unicode(m[1].split(', ')[1]))
self.sc = self.stylo[3]
elif (self.timegap(m[1],self.oldtime))>timedelta(minutes=1): # More than 1 minute for an action
self.sc=self.stylo[4]
else:
self.sc=self.stylo[2]# Answering quiz
if (self.timegap(m[1],self.oldtime))>timedelta(minutes=3): # More than 3 minutes for an action
self.logmessage('!! - Time gap between answers is greater than 3 minutes. '+unicode((self.timegap(m[1],self.oldtime)))+' at '+unicode(m[1].split(', ')[1]))
self.sc = self.stylo[5]
if (m[4].split(' (')[0]=='quiz continue attempt' or m[4].split(' (')[0]=='quiz attempt'): #log the number of questions answered
self.quest=[ l[2] for l in self.validquizzes if l[0]==m[3] and l[1]==m[5] ]
self.validquizzes.remove([m[3],m[5],self.quest[0]])
self.validquizzes.append([m[3],m[5],self.quest[0]+1])
self.xlout.append([m[3], m[1].split(', ')[1], m[5], m[4].split('(')[0], self.timegap(m[1],self.oldtime),self.sc]) #(self.takingquiz(m[3],self.activestudents))
self.oldtime = m[1]
def listallquizzes(self):
# report a list of quizzes
self.logmessage('\nList of quizzes submitted by the students')
self.logmessage(('=')*47)
for i in self.donequizzes:
if i[1] not in self.listofquizzes:
self.listofquizzes.append(i[1])
self.logmessage(i[1])
def listinactivestudents(self):
# report a list of students who didn't take a quiz
self.logmessage('\nList of students who did not take any quiz')
self.logmessage(('=')*37)
self.inactivestudents=[x for x in ([i[0] for i in self.activestudents]) if x not in ([j[0] for j in self.donequizzes])]
for i in self.inactivestudents:
self.logmessage(i)
# ------------------------------------- xl writing ---------------------------------------------
def createallstudentfiles(self):
# Create individual xl files for the students
for self.x in [i for i in self.activestudents if i[0] not in self.inactivestudents]:
self.wpath=self.logfilepath+str(self.x[0])+'.xls'
self.w = xlwt.Workbook(encoding="utf-8") # Creates new workbook for the students
self.sh = self.w.add_sheet('Activity')
self.w.save(self.wpath) # Create all the workbooks for that student
def fillingindividualstudentfiles(self):
# Fill in individual xl files for the students
self.name='empty'
self.row=0
for self.x in (sorted(self.xlout, key=itemgetter(0))):
self.row +=1
# row=(open_workbook(wpath).sheet_by_index(0).nrows) use if studentsentries or xlout is not ordered by name in the loop
for self.y in range(1,5):
self.w.get_sheet(0).write(self.row, self.y-1, str(self.x[self.y]),self.x[5])
self.w.get_sheet(0).col(0).width = 256*19
self.w.get_sheet(0).col(1).width = 256*90
self.w.get_sheet(0).col(2).width = 256*30
self.w.get_sheet(0).col(3).width = 256*30
if self.x[0]!=self.name or self.xlout.index(self.x)==(len(self.xlout)-1): #next student in the list or student last in the list
self.w.save(self.wpath)
self.name=self.x[0]
self.row=0
self.wpath=self.logfilepath+str(self.x[0])+'.xls'
self.w=copy(open_workbook(self.wpath, formatting_info=True, on_demand=True)) #Re-open existing workbook for student and makes a copy in w for editing
def main(self):
self.logtimestamp('Importing the data from the workbook started')
self.openworkbook()
self.logtimestamp('Data uploaded')
self.logtimestamp('Uploading students list')
self.extractstudentslist()
self.logtimestamp('Students list uploaded')
self.logtimestamp('Dumping data in holding table started')
self.extractstudentsentries()
self.logtimestamp('Data dumped ')
self.logtimestamp('Creating a list of quizzes completed started')
self.listquizzes()
self.logtimestamp('List of quizzes completed dumped')
self.logtimestamp('Data processing started')
self.perstudentquizreport()
self.logtimestamp('Data processing complete')
self.logtimestamp('Finding missing students and listing quizzes')
self.listallquizzes()
self.listinactivestudents()
self.logtimestamp('Found missing students and listed quizzes')
self.logtimestamp('Writing individual student xl files')
self.createallstudentfiles()
self.fillingindividualstudentfiles()
self.logtimestamp('Students files written')
moo=moodlecheat ('/Users/macadmin1/Desktop/moodle reports/Reports/','/Users/macadmin1/Desktop/moodle reports/')
moo.main()
Any feedback is welcome, especially about defining data structures and classes that would supplant list. I think I understand (to an extent) how to write a simple procedural script like this one in Python and I really want to move onto OOP now.
3 Answers 3
There is quite a bit of code there. I would start with PEP8 formatting as it makes it much easier to read. The other thing that came to mind is using namedtuple() rather than using the indexing all the time, this may help with readability.
As far as performance goes, the best way to confirm may be to do some profiling....
Since I went through much of the PEP8 formatting just to try and see what was going on, I have inlined it here along with the profiling setup at the bottom:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# The master data set is contained in excel file ('moodle.xls'). The toponomy
# for the activity is the one in a 2.5+ moodle instance. The colums in excel
# follow the format
# columns = module name/date,time/(Not used):e.g. IP address/name
# student/activity type (full)/activity name
# rows = chronological order in time period
# The class (moodlecheat) returns: a log containing a summary of cheats per
# students, a set of individual xl files with a log per student with quiz
# question highlighted.
# Each row is colour coded to facilitate detection: yellow/any valid quiz
# question answered | light yellow/any question which is answered in more than
# 2 minutes | grey/any non quiz activity during a quiz | Red/any activity
# taking longer than 5 minutes during a quiz
# The data listed in a log (log.tx) which details the running times per module
# and the activity for all students highlighting any suspected cheating
# Worksheet 1 = xl report for moodle activity log for the time period
# - e.g. a day
# Worksheet 2 = Name of users/students whose quiz data is to be mined out
# The typical running time for the class on a 8GB Mac with dual processor
# OSX 10.9.3 is 31 seconds to find the quiz activity of 120 students for 3
# quizzes amongst 12k students records
from __future__ import print_function
import sys
# from mmap import mmap, ACCESS_READ
from xlrd import open_workbook
# , XL_CELL_TEXT, cellname, cellnameabs, colname
import xlwt
from xlwt import easyxf
# , Workbook
# from xlutils.styles import Styles
from operator import itemgetter
from time import gmtime, strftime
# , strptime, clock
from datetime import datetime, timedelta
from xlutils.copy import copy
if sys.version > '3':
unicode = str
class moodlecheat(object):
mois = {'january': 'jan',
'February': 'Feb',
'March': 'Mar',
'April': 'Apr',
'May': 'May',
'June': 'Jun',
'July': 'Jul',
'August': 'Aug',
'September': 'Sep',
'October': 'Oct',
'November': 'Nov',
'December': 'Dec'}
def __init__(self, logfilepath, sourcefilepath):
self.sourcefilepath = sourcefilepath
self.logfilepath = logfilepath
self.studentsentries = []
self.xlout = []
self.validquizzes = []
self.donequizzes = []
self.listofquizzes = []
self.inactivestudents = []
self.activestudents = []
self.stylo = {
1: easyxf('pattern: pattern no_fill;'
+ 'font: name Arial, height 260, italic True'),
2: easyxf('pattern: pattern solid_fill, fore_colour yellow;'
+ 'font: name Arial, height 260'),
3: easyxf('pattern: pattern solid_fill, fore_colour gray50;'
+ 'font: name Arial, height 260'),
4: easyxf('pattern: pattern solid_fill, '
+ 'fore_colour light_yellow;'
+ 'font: name Arial, height 260, bold True'),
5: easyxf('pattern: pattern solid_fill, '
+ 'fore_colour light_orange;'
+ 'font: name Arial, height 260')}
# ------------------------------- Functions -------------------------
# function returning True when a student is taking a quiz
def takingquiz(self, item, series):
position = [(i, name.index(item)) for i, name in enumerate(series)
if item in name]
return (series[position[0][0]][(position[0][1]) + 1])
# function to return a moodle date in date.time/python convention
# <y=01 m=11 d=1 sec=50 msec=00> <- unicode strings/moodle moodle
# convention <01 September 2001, 11:50AM>
def udconv(self, datein):
d = datein.split(',')[0]
d1 = datein.split(',')[1]
d2 = d + d1
d3 = moodlecheat.mois[d2.split(' ')[1]]
d4 = (d2.split(' ')[0] + ' ' + d3 + ' ' + d2.split(' ')[2] + ' '
+ d2.split(' ')[3] + d2.split(' ')[4])
d5 = datetime.strptime(d4, '%d %b %Y %I:%M%p')
return d5
# function to calculate between 2 dates in unicode string/moodle log
# format e.g. <'01 September 2001, 11:50AM'> -> time/python convention
def timegap(self, i, j):
self.gap = self.udconv(i) - self.udconv(j)
return self.gap
# ---------------------------- Methods ------------------------------------
def openworkbook(self):
self.wb = open_workbook(
self.sourcefilepath + 'activitylog.xlsx',
on_demand=True)
def extractstudentslist(self):
# Extract the list [activestudents] of all students from the 3rd
# worksheet (list student names) in moodle.xlsx
self.sh = self.wb.sheet_by_index(1)
self.activestudents = [[x, False] for x in self.sh.col_values(0)]
def extractstudentsentries(self):
# Extract activity from the master dataset <studentsentries>
# for all the students listed in <activestudents>
self.sh = self.wb.sheet_by_index(0)
for r in range(self.sh.nrows):
if [self.sh.cell_value(r, 3), False] in self.activestudents:
self.studentsentries.append(self.sh.row_values(r))
def listquizzes(self):
# list all quizes that were completed into <validquizzes>
for x in self.studentsentries:
if(x[4][:10] == 'quiz close'
and [x[3], x[5]] not in self.validquizzes):
self.validquizzes.append([x[3], x[5], -1])
def flip_quiz(self, item, series):
# method to change the student record to show taking a quiz
position = [(i, name.index(item)) for i, name in enumerate(series)
if item in name]
series[position[0][0]][(position[0][1]) + 1] = (
not(series[position[0][0]][(position[0][1]) + 1]))
def logtimestamp(self, message):
# writes <message> in log.txt at sourcefilepath
self.message = message
flog = open(self.logfilepath + 'log.txt', "a")
flog.write('\n == ' + self.message + ' at '
+ strftime("%Y-%m-%d %H:%M:%S", gmtime()))
print(self.message)
def logmessage(self, message):
# writes <message> in log.txt at logfilepath
self.message = message
flog = open(self.logfilepath + 'log.txt', "a")
flog.write('\n' + self.message)
def perstudentquizreport(self):
# writes start and completion times for each students
self.logmessage('\n\nReport on quizzes')
self.logmessage('=================\n')
self.oldtime = str('25 April 2014, 11:41 AM')
for m in sorted(reversed(self.studentsentries), key=itemgetter(3)):
self.sc = self.stylo[1]
# If student starting a quiz and quiz has not been completed yet
# => log the entries into [quizzdb],
# log the quizz name into [validquizzes],
# flip the flag to show student is taking a quiz
if((m[4].split(' (')[0] == 'quiz continue attempt'
or m[4].split(' (')[0] == 'quiz attempt')
and self.takingquiz(m[3], self.activestudents) is False
and any(p[0] == m[3]
and p[1] == m[5] for p in self.validquizzes)
and not any(p[0] == m[3]
and p[1] == m[5] for p in self.donequizzes)):
self.flip_quiz(m[3], self.activestudents)
self.logmessage(m[3] + ' has made first answer in the quiz '
+ m[5] + ' at ' + m[1].split(', ')[1])
if((m[4].split(' (')[0] == 'quiz continue attempt'
or m[4].split(' (')[0] == 'quiz attempt')
and self.takingquiz(m[3], self.activestudents) is False
and any(p[0] == m[3]
and p[1] == m[5] for p in self.validquizzes)
and any(p[0] == m[3]
and p[1] == m[5] for p in self.donequizzes)):
self.flip_quiz(m[3], self.activestudents)
self.logmessage(m[3] + ' retook the quiz ' + m[5] + ' at '
+ m[1].split(', ')[1]
+ ' - This is an issue if the quiz is not a practice quiz')
# finished a quiz
elif(m[4][:10] == 'quiz close'
and (any(p[0] == m[3]
and p[1] == m[5] for p in self.validquizzes))):
self.donequizzes.append([m[3], m[5]])
self.flip_quiz(m[3], self.activestudents)
self.logmessage(m[3] + ' has finished the quiz ' + m[5]
+ ' at ' + m[1].split(', ')[1])
self.logmessage(('-') * 130)
if (self.takingquiz(m[3], self.activestudents) is True):
# unusual action during the quiz
if m[4][:4] != 'quiz':
self.logmessage('!! -- ' + unicode(m[4].split('(')[0])
+ ' ' + unicode(m[5]) + ' at '
+ unicode(m[1].split(', ')[1]))
self.sc = self.stylo[3]
# More than 1 minute for an action
elif((self.timegap(m[1], self.oldtime))
> timedelta(minutes=1)):
self.sc = self.stylo[4]
# Answering quiz
else:
self.sc = self.stylo[2]
# Answering quiz
if (self.timegap(m[1], self.oldtime)) > timedelta(minutes=3):
self.logmessage('!! - Time gap between answers is greater'
+ 'than 3 minutes. '
+ unicode((self.timegap(m[1], self.oldtime)))
+ ' at ' + unicode(m[1].split(', ')[1]))
self.sc = self.stylo[5]
# Answering quiz
if(m[4].split(' (')[0] == 'quiz continue attempt'
or m[4].split(' (')[0] == 'quiz attempt'):
self.quest = [l[2] for l in self.validquizzes
if l[0] == m[3] and l[1] == m[5]]
self.validquizzes.remove([m[3], m[5], self.quest[0]])
self.validquizzes.append([m[3], m[5], self.quest[0] + 1])
self.xlout.append([m[3], m[1].split(', ')[1],
m[5], m[4].split('(')[0],
self.timegap(m[1], self.oldtime), self.sc])
# (self.takingquiz(m[3], self.activestudents))
self.oldtime = m[1]
def listallquizzes(self):
# report a list of quizzes
self.logmessage('\nList of quizzes submitted by the students')
self.logmessage(('=') * 47)
for i in self.donequizzes:
if i[1] not in self.listofquizzes:
self.listofquizzes.append(i[1])
self.logmessage(i[1])
def listinactivestudents(self):
# report a list of students who didn't take a quiz
self.logmessage('\nList of students who did not take any quiz')
self.logmessage(('=') * 37)
self.inactivestudents = [x for x in
([i[0] for i in self.activestudents])
if x not in ([j[0] for j in self.donequizzes])]
for i in self.inactivestudents:
self.logmessage(i)
# ------------------------------------- xl writing -----------------------
def createallstudentfiles(self):
# Create individual xl files for the students
for self.x in [i for i in self.activestudents
if i[0] not in self.inactivestudents]:
self.wpath = self.logfilepath + str(self.x[0]) + '.xls'
# Creates new workbook for the students
self.w = xlwt.Workbook(encoding="utf-8")
self.sh = self.w.add_sheet('Activity')
# Create all the workbooks for that student
self.w.save(self.wpath)
def fillingindividualstudentfiles(self):
# Fill in individual xl files for the students
self.name = 'empty'
self.row = 0
for self.x in (sorted(self.xlout, key=itemgetter(0))):
self.row += 1
# row=(open_workbook(wpath).sheet_by_index(0).nrows) use if
# studentsentries or xlout is not ordered by name in the loop
for self.y in range(1, 5):
self.w.get_sheet(0).write(self.row, self.y - 1,
str(self.x[self.y]), self.x[5])
self.w.get_sheet(0).col(0).width = 256 * 19
self.w.get_sheet(0).col(1).width = 256 * 90
self.w.get_sheet(0).col(2).width = 256 * 30
self.w.get_sheet(0).col(3).width = 256 * 30
# next student in the list or student last in the list
if(self.x[0] != self.name
or self.xlout.index(self.x) == (len(self.xlout) - 1)):
self.w.save(self.wpath)
self.name = self.x[0]
self.row = 0
self.wpath = self.logfilepath + str(self.x[0]) + '.xls'
# Re-open existing workbook for student
# and makes a copy in w for editing
self.w = copy(open_workbook(
self.wpath, formatting_info=True, on_demand=True))
def main(self):
self.logtimestamp('Importing the data from the workbook started')
self.openworkbook()
self.logtimestamp('Data uploaded')
self.logtimestamp('Uploading students list')
self.extractstudentslist()
self.logtimestamp('Students list uploaded')
self.logtimestamp('Dumping data in holding table started')
self.extractstudentsentries()
self.logtimestamp('Data dumped ')
self.logtimestamp('Creating a list of quizzes completed started')
self.listquizzes()
self.logtimestamp('List of quizzes completed dumped')
self.logtimestamp('Data processing started')
self.perstudentquizreport()
self.logtimestamp('Data processing complete')
self.logtimestamp('Finding missing students and listing quizzes')
self.listallquizzes()
self.listinactivestudents()
self.logtimestamp('Found missing students and listed quizzes')
self.logtimestamp('Writing individual student xl files')
self.createallstudentfiles()
self.fillingindividualstudentfiles()
self.logtimestamp('Students files written')
if __name__ == '__main__':
import cProfile
moo = moodlecheat('/Users/macadmin1/Desktop/moodle reports/Reports/',
'/Users/macadmin1/Desktop/moodle reports/')
# moo.main()
cProfile.run('moo.main()')
-
\$\begingroup\$ Thank you so much for this. I will follow PEP8 from now on. I'll tuplelise the script where possible and post it back here. \$\endgroup\$user2718378– user27183782014年06月10日 07:20:41 +00:00Commented Jun 10, 2014 at 7:20
As the other answer mentioned, PEP8 will help your readability. Because you already have been informed about PEP8, I (hopefully) won't spend too long on it. Instead I'll just list some points to really look for in that document:
- Naming conventions:
PascalCase
for classes andunderscores_in_names
for basically everything else. - Descriptive variable names. Even if they are temporary variables, give them relatively descriptive names.
- Space is your friend: a single space between methods and to split up logical sections of code. However, take care not to overdo it.
- Try to stick to 80 characters per line. This is probably the least necessary of the style suggestions. It really is only matters for people who have really small screens.
There are more style suggestions I can make, but as you indicated in a comment above, you plan on reading and following PEP8, so I won't inundate you with its conventions right away.
Onto, improvements.
ALWAYS USE
with
when dealing with files. You currently useopen
. This technically is fine, however it much more prone to bugs. You are actually guilty of one of the bugs: youopen
a file pointer, but do not callclose
on it:def logmessage(self,message): # writes <message> in log.txt at logfilepath self.message=message flog=open (self.logfilepath+'log.txt', "a") flog.write('\n'+self.message)
Using
with
yourlogmessage
function (PEP8-ed) looks as such:def log_message(self, message): self.message = message with open(os.path.join(self.log_filepath, 'log.txt'), 'a') as file: file.write('\n' + self.message)
In the point above, notice how I used
os.path.join
. Use this function when you are creating filepaths as it automatically uses the OS-specific separator when joining the arguments.Use docstrings. You have quite a few comments that describe your methods and what they do. This is nice. However, if you use docstrings, you get some added functionality:
>>>def foo(): ... '''This is a sample docstring.''' ... print('Hello World!') >>>help(foo) Help on function foo in module __main__: foo() This is a sample docstring
You do a lot of string concatenation. While the performance of concatenation against string formatting is debatable, the Pythonic way to embed variable information into string is to use the
str.format
function. Example:# Here is how you do this. self.logmessage (m[3]+' retook the quiz '+m[5]+ ' at '+m[1].split(', ')[1]+' - This is an issue if the quiz is not a practice quiz') # Here is how `format` does this. self.logmessage('{} retook the quiz {} at {} - This is an issue if the quiz is not a practice quiz'.format(m[3], m[5], m[1]))
Also, when printing out lists and the delimiter stays the same between each element, use the
join
function:# Here is how you do this. d4=d2.split(' ')[0]+' '+d3+' '+d2.split(' ')[2]+' '+d2.split(' ')[3]+d2.split(' ')[4] # Here is how `join` does this. d4 = ' '.join(d2.split())
Variables are nice. You do a lot of this:
foo.split()[0] + ' ' + foo.split()[1] + ' ' ...
This can get quite intensive because you, theoretically, are splitting your string
n
times, wheren
is the number of spaces in your string. Instead of splitting the string each time you are accessing a new index, simply store the list once:split_foo = foo.split()
As for OOP suggestions, take a look at your code and anywhere you use specific indices in a list to represent a constant form of data, create (or merge it into) a class.
Example: In your activestudents
list you hold a list of two values: the student's name and a flag that says if they are doing an activity. This can be combined with your inactivestudents
list into a single class:
class Student(object):
def __init__(self, name, active=True, taking_quiz=False):
self._name = name
self._active = active
self._taking_quiz = taking_quiz
def is_active(self):
return self._active
def is_taking_quiz(self):
return self._taking_quiz
Now you can have a single list of students and to work on only the active ones simply do:
active_students = [student for student in self.students if student.is_active()]
You could also create a Quiz
class and have the Student
class hold a list of Quiz
es with the number of questions they have completed.
-
\$\begingroup\$ Great. Thank you. This is incredibly useful. Give me a few days to work on this. I Like the flip-flop class a lot. \$\endgroup\$user2718378– user27183782014年06月10日 17:07:21 +00:00Commented Jun 10, 2014 at 17:07
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function
import sys
import os
import re
from xlrd import open_workbook
import xlwt
from xlwt import easyxf
from operator import itemgetter, attrgetter
from time import gmtime, strftime
from datetime import datetime, timedelta
from xlutils.copy import copy
if sys.version > '3':
unicode = str
class MoodleCheat(object):
def __init__(self, log_filepath, source_filepath):
self.source_filepath = source_filepath
self.log_filepath = log_filepath
self.students_entries = []
self.xl_out = []
self.valid_quizzes = []
self.done_quizzes = []
self.list_of_quizzes = []
self.inlisted_students = []
self.listed_students = []
self.stylo = {
'normal': easyxf('pattern: pattern no_fill;'
+ 'font: name Arial,'
+ 'height 260, italic True'
),
'answer': easyxf('pattern: pattern solid_fill,'
+ 'fore_colour yellow;'
+ 'font: name Arial,'
+ 'height 260'
),
'problem': easyxf('pattern: pattern solid_fill,'
+ 'fore_colour gray50;'
+ 'font: name Arial,'
+ 'height 260'
),
'too long': easyxf('pattern: pattern solid_fill, '
+ 'fore_colour light_yellow;'
+ 'font: name Arial,'
+ 'height 260,'
+ 'bold True'
),
'way too long': easyxf('pattern: pattern solid_fill, '
+ 'fore_colour light_orange;'
+ 'font: name Arial,'
+ 'height 260'
)}
def moodledateconvert(self, date_in):
""" function to return a moodle date in date.time/python convention """
split_date = ' '.join(re.findall(r"[\w']+", date_in))
return datetime.strptime(split_date, '%d %B %Y %I %M %p')
def timegap(self, time_late, time_early):
""" calculates between the difference between 2 dates
format e.g. <'01 September 2001, 11:50AM'> -> time/python convention"""
return self.moodledateconvert(time_late) - self.moodledateconvert(time_early)
def openworkbook(self):
""" open the workbook containing the activity entries. """
self.work_book = open_workbook(os.path.join(self.source_filepath + 'activitylog.xlsx'),
on_demand=True)
def extractstudentslist(self):
""" extract the list of all students from the 2nd worksheet.
Keyword arguments:
listed_students -- list of students, a flag showing taking a quiz
"""
self.sheet = self.work_book.sheet_by_index(1)
self.listed_students = [Student(_student) for _student in self.sheet.col_values(0)]
self.listed_students.sort(key=attrgetter('_name'))
def extractstudents_entries(self):
""" Extract all entries for the students listed in [listed_students].
Keyword arguments:
students_entries -- all entries for the students listed
row -- row number in the data entries worksheet
"""
self.sheet = self.work_book.sheet_by_index(0)
for row in range(self.sheet.nrows):
for x in self.listed_students:
if x._name == self.sheet.cell_value(row, 3):
self.students_entries.append(Entry(*self.sheet.row_values(row)))
break
self.students_entries = sorted(self.students_entries,
key=attrgetter('_name'))
self.students_entries.reverse()
def listquizzes(self):
""" list all quizes that were completed.
Keyword arguments:
valid_quizzes -- list of tuples (student name, quizzes completed)
_entry -- an entry in the [students_entries] list
"""
for _entry in self.students_entries:
if ( _entry._action[:10] == 'quiz close'
and (_entry._name, _entry._info)
not in self.valid_quizzes
):
self.valid_quizzes.append((_entry._name, _entry._info))
def fliptaking(self, _searched_student):
""" Changes the attribute for the student class to show name is taking quiz."""
for _student in self.listed_students:
if _student._name ==_searched_student:
_student._taking_quiz = not (_student._taking_quiz)
break
def flipactive(self, _searched_student):
""" Changes the flag for the student to show is taking quiz."""
for _student in self.listed_students:
if _student._name ==_searched_student:
_student._active = True
break
def istaking(self, _searched_student):
""" Changes the flag for the student to show has completed a quiz."""
for _student in self.listed_students:
if _student._name ==_searched_student:
return _student._taking_quiz
def logtimestamp(self, message):
""" Writes a message with time stamp in sourcefilepath/log.txt."""
self.message = message
with open(os.path.join(self.log_filepath, 'log.txt'), 'a') as file:
file.write('\n == {} at {}'.
format(
self.message,
strftime('%Y-%m-%d %H:%M:%S',gmtime())
))
def logmessage(self, message):
""" Writes a message in log.txt in sourcefilepath/log.txt."""
self.message = message
with open(os.path.join(self.log_filepath, 'log.txt'), 'a') as file:
file.write('\n' + self.message)
def p(self, name):
""" for test, returns the value of .taking_quiz attribute \
for _searched_student.
"""
for x in self.listed_students:
if x._name == n._name:
print (x._name, x._taking_quiz)
def perstudentquizreport(self):
""" logs quiz information for each student and xl ouput table.
Keyword arguments:
xl_out -- list of tuples (student name, quizzes completed)
_entry -- an entry in the [students_entries] list
"""
self.logmessage('\n\nReport on quizzes')
self.logmessage('=================\n')
self.old_time = str('25 April 2014, 11:41 AM')
# Main loop
for _record in self.students_entries:
# log messages and set flags for each entry when in quiz
self.sc = self.stylo['normal']
if (( _record._action.split(' (')[0] == 'quiz continue attempt' \
or _record._action.split(' (')[0] == 'quiz attempt')) \
and (_record._name, _record._info) not in self.done_quizzes \
and self.istaking(_record._name) == False \
and ((_record._name, _record._info) in self.valid_quizzes
):
self.fliptaking(_record._name)
self.logmessage( '{} has made first answer in the quiz {} at {} '
.format(
_record._name,
_record._info,
_record._time.split(',')[1]
))
if ( (_record._action.split(' (')[0] == 'quiz continue attempt' \
or _record._action.split(' (')[0] == 'quiz attempt'
)) \
and (_record._name, _record._info) in self.done_quizzes \
and self.istaking(_record._name) == False \
and ((_record._name, _record._info) in self.valid_quizzes
):
self.fliptaking(_record._name)
self.logmessage('{} retook the quiz {} at {} -This is an issue' +
' if the quiz is not a practice quiz'
.format(
_record._name,
_record._info,
_record._time.split(',')[1]
))
elif _record._action[:10] == 'quiz close':
self.done_quizzes.append((_record._name, _record._info))
self.fliptaking(_record._name)
self.flipactive(_record._name)
self.logmessage('{} has finished the quiz {} at {}'
.format(
_record._name,
_record._info,
_record._time.split(',')[1]
))
self.logmessage(('-') * 130)
# unusual actions during a valid quiz
if self.istaking(_record._name) == True:
if _record._action[:4] != 'quiz':
self.logmessage('!! -- {} {} at {}'
.format(
_record._action.split('('),
_record._info,
_record._time.split(',')[1]
))
self.sc = self.stylo['problem']
elif ((self.timegap(_record._time, self.old_time))
> timedelta(minutes=1)):
self.sc = self.stylo['too long']
else:
self.sc = self.stylo['answer']
if (self.timegap(_record._time, self.old_time)) > timedelta(minutes=3):
self.logmessage('!! - Time gap between answers is greater' +
' than 3 minutes. ({}) at {}'
.format(
self.timegap(_record._time, self.old_time),
_record._time.split(',')[1]
))
self.sc = self.stylo['way too long']
self.xl_out.append([_record._name,
_record._time.split(', ')[1],
_record._info,
_record._action.split('(')[0],
self.timegap(_record._time, self.old_time),
self.sc]
)
self.old_time = _record._time
def listallquizzes(self):
""" Creates a list of all quizzes taken by students."""
self.logmessage('\nList of quizzes submitted by the students')
self.logmessage(('=') * 47)
for i in self.done_quizzes:
if i[1] not in self.list_of_quizzes:
self.list_of_quizzes.append(i[1])
self.logmessage(i[1])
def listinlisted_students(self):
""" Write the names of students who didn't take a quiz in log.txt. """
self.logmessage('\nList of students who did not take any quiz')
self.logmessage(('=') * 37)
for x in self.listed_students:
if x._active == False:
self.logmessage(x._name)
def createallstudentfiles(self):
""" Creates an excel file for each student. """
for self.x in self.listed_students:
self.wpath = os.path.join((self.log_filepath),str(self.x._name) +'.xls')
# Creates a workbook w in memory for a student
self.w = xlwt.Workbook(encoding="utf-8")
self.sheet = self.w.add_sheet('Activity')
# Writes the workbook w at wpath file for that student
self.w.save(self.wpath)
def fillingindividualstudentfiles(self):
""" Fill in each workbook created for a student with xl_out data. """
self.name = 'empty'
self.row = 0
for self.x in self.xl_out:
self.row += 1
for self.y in range(1, 5):
self.w.get_sheet(0).write(
self.row, self.y - 1,
str(self.x[self.y]), self.x[5]
)
self.w.get_sheet(0).col(0).width = 256 * 19
self.w.get_sheet(0).col(1).width = 256 * 90
self.w.get_sheet(0).col(2).width = 256 * 30
self.w.get_sheet(0).col(3).width = 256 * 30
# next student in the list or last student in the list?
if(self.x[0] != self.name \
or self.xl_out.index(self.x) == (len(self.xl_out) - 1)):
self.w.save(self.wpath)
self.name = self.x[0]
self.row = 0
self.wpath = os.path.join((self.log_filepath),
str(self.x[0]) +'.xls'
)
# re-open existing workbook for student
self.w = copy(open_workbook(self.wpath,
formatting_info=True,
on_demand=True
))
def main(self):
self.logtimestamp('Importing the data from the workbook started')
self.openworkbook()
self.logtimestamp('Data uploaded')
self.logtimestamp('Uploading students list')
self.extractstudentslist()
self.logtimestamp('Students list uploaded')
self.logtimestamp('Dumping data in holding table started')
self.extractstudents_entries()
self.logtimestamp('Data dumped ')
self.logtimestamp('Creating a list of quizzes completed started')
self.listquizzes()
self.logtimestamp('List of quizzes completed dumped')
self.logtimestamp('Data processing started')
self.perstudentquizreport()
self.logtimestamp('Data processing complete')
self.logtimestamp('Finding missing students and listing quizzes')
self.listallquizzes()
self.listinlisted_students()
self.logtimestamp('Found missing students and listed quizzes')
self.logtimestamp('Writing individual student xl files')
self.createallstudentfiles()
self.fillingindividualstudentfiles()
self.logtimestamp('Students files written')
class Student(MoodleCheat):
""" class describing individual students with name and flags. """
def __init__(self, name, active=False, taking_quiz=False):
self._name = name
self._active = active
self._taking_quiz = taking_quiz
## For further optimisations
## def is_active(self):
## return self._active
##
## def is_taking_quiz(self):
## return self._taking_quiz
class Entry(MoodleCheat):
""" class describing master data set in source excel file. """
def __init__(self, module, time, ip_addr, name, action, info):
self._module = module
self._time = time
self._ip_addr = ip_addr
self._name = name
self._action = action
self._info = info
## For optimisation
##if __name__ == '__main__':
## import cProfile
moo = MoodleCheat('/Users/macadmin1/Desktop/moodle reports/Reports/',
'/Users/macadmin1/Desktop/moodle reports/')
moo.main()
I have tweaked to make more OOP/Pythonic (so i hope) with 2 new classes to deal with the data. I have tried to follow PEP. Any advice on more optimisation, comments or help would be greatly appreciated. Thank you again to all for all this.
Explore related questions
See similar questions with these tags.
main
and not the detail lines. \$\endgroup\$