I have to implement a "filter" in a Excel file which deletes/shows particular strings found in specified columns. I save the rows numbers that have to be deleted in a list. I have my filters in several dictionaries containing a list. The column index is mapped to a list containing every string that has to be filtered (in that column, obviously). So I scan every defaultdict(list)
to populate the self.row_nums_to_delete
list.
Here's the variables definition snippet:
self.row_nums_to_delete = []
self.col_filter_delete_strict = defaultdict(list)
self.col_filter_show_strict = defaultdict(list)
self.col_filter_show_loose = defaultdict(list)
self.col_filter_delete_loose = defaultdict(list)
Here's how I fill self.row_nums_to_delete
(I know that this is done horribly!!):
def populaterownumstodelete(self):
# checking user selected rows
if len(self.col_filter_delete_strict) > 0:
for row in range(1, self.sheet.nrows):
for col in range(0, self.sheet.ncols):
if col in self.col_filter_delete_strict and self.parseandgetcellvalue(row, col) in \
self.col_filter_delete_strict[col]:
self.row_nums_to_delete.append(row)
# checking strict filter
if len(self.col_filter_show_strict) > 0:
for row in range(1, self.sheet.nrows):
for col in range(0, self.sheet.ncols):
if col in self.col_filter_show_strict and not self.parseandgetcellvalue(row, col) in \
self.col_filter_show_strict[
col]:
self.row_nums_to_delete.append(row)
if len(self.col_filter_show_loose) > 0:
for row in range(1, self.sheet.nrows):
for col in range(0, self.sheet.ncols):
if col in self.col_filter_show_loose:
for name in self.col_filter_show_loose[col]:
if name not in self.parseandgetcellvalue(row, col):
self.row_nums_to_delete.append(row)
if len(self.col_filter_delete_loose) > 0:
for row in range(1, self.sheet.nrows):
for col in range(0, self.sheet.ncols):
if col in self.col_filter_delete_loose:
for name in self.col_filter_delete_loose[col]:
if name in self.parseandgetcellvalue(row, col):
self.row_nums_to_delete.append(row)
EDIT:
This is how the code is right now:
def populaterownumstodelete(self):
if self.col_filter_delete_strict:
for row in range(1, self.sheet.nrows):
for col in (cols for cols in range(self.sheet.ncols) if
cols in self.col_filter_delete_strict and self.parseandgetcellvalue(row, cols) in
self.col_filter_delete_strict[cols]):
self.row_nums_to_delete.append(row)
if self.col_filter_show_strict:
for row in range(1, self.sheet.nrows):
for col in (cols for cols in range(self.sheet.ncols) if
cols in self.col_filter_show_strict and not self.parseandgetcellvalue(row, cols) in
self.col_filter_show_strict[cols]):
self.row_nums_to_delete.append(row)
if self.col_filter_show_loose:
for row in range(1, self.sheet.nrows):
for col in (cols for cols in range(self.sheet.ncols) if cols in self.col_filter_show_loose):
for string in (strings for strings in self.col_filter_show_loose[col] if
strings not in self.parseandgetcellvalue(row, col)):
self.row_nums_to_delete.append(row)
if self.col_filter_delete_loose:
for row in range(1, self.sheet.nrows):
for col in (cols for cols in range(self.sheet.ncols) if cols in self.col_filter_delete_loose):
for string in (strings for strings in self.col_filter_delete_loose[col] if
strings in self.parseandgetcellvalue(row, col)):
self.row_nums_to_delete.append(row)
and here's parseandgetcellvalue()
:
def parseandgetcellvalue(self, row, col):
cell = self.sheet.cell(row, col)
cell_value = cell.value
# checks whether an value may be a int value and converts it (e.g 2.0 -> 2)
if cell.ctype in (2, 3) and int(cell_value) == cell_value:
cell_value = int(cell_value)
return unicode(cell_value)
EDIT 2:
Here's the code now:
def must_delete(self, value, col):
if self.col_filter_delete_strict:
if value in self.col_filter_delete_strict[col]:
return True
if self.col_filter_show_strict:
if value not in self.col_filter_show_strict[col]:
return True
# CHECKING LOOSE SEARCH
if self.col_filter_show_loose or self.col_filter_delete_loose:
for string in self.col_filter_delete_loose[col]:
if len(value.lower().split(string.lower())) > 1:
return True
for string in self.col_filter_show_loose[col]:
if len(value.lower().split(string.lower())) == 1:
return True
return False
def populaterownumstodelete(self):
cols_to_check = (set(self.col_filter_delete_strict)
.union(self.col_filter_show_strict)
.union(self.col_filter_show_loose)
.union(self.col_filter_delete_loose)
.intersection(range(self.sheet.ncols)))
for row in range(1, self.sheet.nrows):
for col in cols_to_check:
value = self.parseandgetcellvalue(row, col)
if self.must_delete(value, col):
self.row_nums_to_delete.append(row)
I used the split()
method to check whether a string is contained in another one since string in list
didn't work well (sometimes it got single characters and not "strings").
By the way, my full application is hosted in this Github Repo just if someone would like to help me clean my code.
-
\$\begingroup\$ Why not use Excel's built in filter? Surely it would be more efficient than rolling your own. \$\endgroup\$RubberDuck– RubberDuck2014年12月07日 16:07:46 +00:00Commented Dec 7, 2014 at 16:07
1 Answer 1
- If
parseandgetcellvalue
is an expensive operation, it would be good to organize code so that it is called at most once for each cell. - Before any looping, you could figure out which columns you need to check. This is easy using set operations.
- If you do all the checks in a single loop over rows and columns, you may be able to avoid some checking because you only need to process each row until you find one reason to delete it.
The above suggest this code for the looping. The actual checking would be in another function must_delete
.
def populaterownumstodelete(self):
cols_to_check = (set(self.col_filter_delete_strict)
.union(self.col_filter_show_strict)
.union(self.col_filter_show_loose)
.union(self.col_filter_delete_loose)
.intersection(range(self.sheet.ncols)))
for row in range(1, self.sheet.nrows):
for col in cols_to_check:
value = self.parseandgetcellvalue(row, col)
if must_delete(value, col):
self.row_nums_to_delete.append(row)
break
-
\$\begingroup\$ FWIW,
set
'sunion
is variadic. You only need to call it once. \$\endgroup\$Veedrac– Veedrac2014年12月08日 14:40:33 +00:00Commented Dec 8, 2014 at 14:40 -
\$\begingroup\$ @Veedrac Yes, good point. I think I'll leave it like it is, though, for readability's sake. \$\endgroup\$Janne Karila– Janne Karila2014年12月08日 16:20:32 +00:00Commented Dec 8, 2014 at 16:20
-
\$\begingroup\$ Thank you for your answer, I've just edited my question so maybe my code's got better now. \$\endgroup\$peperunas– peperunas2014年12月10日 13:24:23 +00:00Commented Dec 10, 2014 at 13:24