I am deleting rows based on a date.
I am loading the entire sheet into an array and doing the evaluation and building a string containing the rows I want to delete.
Option Explicit
Sub arrmaker()
ThisWorkbook.Activate
Dim i_LastRow, i_LastColumn, i_FirstRow, ictr, ictr2, i_row, i_col As Long
Dim Da_Array(), Da_Other_Array(), Da_Third_Array, Da_Dates(), da_Names() As Variant
Dim Da_Builder() As String
Dim splitter() As String
Dim s_rows As String
Dim v_builder As Variant
Dim da_Range, rng_Deletes As Range
Dim prodhold As Long
Dim i_init, i_sec, i_Main As Long
Dim i_hold1, i_hold2, i_hold3 As Long
Sheets(1).Select
Dim da_date As Date
da_date = Date
ictr = 16
Sheets(1).Range("_FilterDatabase").Select
i_FirstRow = Sheets(1).Range("_FilterDatabase").Row
i_LastRow = Last(1, Sheets(1).Cells)
i_LastColumn = Last(2, Sheets(1).Cells)
i_FirstRow = Sheets(1).Range("_FilterDatabase").Row
i_LastColumn = Last(2, Sheets(1).Cells)
Sheets(1).Activate
Call Sort_Em(i_LastRow)
Da_Array = Range(Cells(i_FirstRow, 1), Cells(i_LastRow, i_LastColumn))
'i_FirstRow
s_rows = ""
For ictr = LBound(Da_Array) To UBound(Da_Array) - 1
If ictr > i_FirstRow And Da_Array(ictr, 42) = Da_Array(ictr + 1, 42) And Da_Array(ictr, 1) = Da_Array(ictr + 1, 1) _
And Da_Array(ictr, 40) <> Da_Array(ictr + 1, 40) Then
splitter = Split(Da_Array(ictr, i_LastColumn), "-")
If CDate(RetMonthNum(splitter(1)) & "/" & splitter(0) & "/" & splitter(2)) < da_date Then
s_rows = s_rows & ictr + i_FirstRow & ":" & ictr + i_FirstRow & ","
End If
End If
Next ictr
s_rows = Left(s_rows, Len(s_rows) - 1)
v_builder = strChk(s_rows)
Set rng_Deletes = Range(v_builder(LBound(v_builder)))
For ictr = LBound(v_builder) + 1 To UBound(v_builder)
Set rng_Deletes = Union(rng_Deletes, Range(v_builder(ictr)))
Next ictr
rng_Deletes.Select
Selection.Delete
Da_Array = Range(Cells(i_FirstRow, 1), Cells(i_LastRow, i_LastColumn))
Da_Other_Array() = Application.WorksheetFunction.Index(Da_Array, 0, 42)
Sheets("tmp").Range(Cells(1, 1).Address, Cells(UBound(Da_Other_Array), 1).Address).Value = Da_Other_Array
Sheets("tmp").Range("$A1ドル:$A$" & UBound(Da_Other_Array)).RemoveDuplicates Columns:=1
i_LastRow = Last(1, Sheets("tmp").Cells)
Sheets("tmp").Activate
Da_Third_Array = Range(Cells(2, 1), Cells(i_LastRow, 1))
ReDim Da_Builder(LBound(Da_Array) To UBound(Da_Array), 1 To UBound(Da_Array, 2) + 2 * UBound(Da_Third_Array))
For i_init = 1 To UBound(Da_Array, 2)
Da_Builder(1, i_init) = Da_Array(1, i_init)
Next i_init
'MsgBox UBound(Da_Builder, 2)
i_hold1 = 0
For i_sec = 1 To UBound(Da_Third_Array)
i_hold2 = UBound(Da_Array, 2)
Da_Builder(1, i_sec + i_hold1 + i_hold2) = Da_Third_Array(i_sec, 1)
Da_Builder(1, i_sec + i_hold1 + 1 + i_hold2) = "cur"
i_hold1 = i_hold1 + 1
Next i_sec
Sheets("tmp2").Range(Cells(1, 1).Address, Cells(1, UBound(Da_Builder, 2)).Address).Value = Da_Builder
i_LastRow = Last(1, Sheets(2).Cells)
Dim rng As Range
Set rng = Sheets(2).Range(Cells(2, 1), Cells(i_LastRow, 1))
da_Names = rng.Value
Sheets("hold").Select
Sheets("hold").Range(Cells(1, 1).Address, Cells(UBound(Da_Array), UBound(Da_Array, 2)).Address) = Da_Array
Columns("A:A").Select
ActiveSheet.Range("$A1ドル:$BC25525ドル").RemoveDuplicates Columns:=1, Header:=xlYes
Sheets("hold").Range(Cells(1, 1).Address, Cells(1, UBound(Da_Builder, 2)).Address).Value = Da_Builder
i_LastRow = Last(1, Sheets("hold").Cells)
ictr2 = 2
prodhold = Cells(ictr2, 1)
For ictr = LBound(Da_Array) + 1 To UBound(Da_Array)
If CLng(Da_Array(ictr, 1)) <> prodhold Then
ictr2 = ictr2 + 1
prodhold = Cells(ictr2, 1)
End If
Set rng = Worksheets("hold").UsedRange.Find(Da_Array(ictr, 42), LookIn:=xlValues, lookat:=xlWhole)
' Set rng = Sheets("hold").Range("A1:A" & UBound(Da_Builder, 2)).Find(Da_Array(ictr, 42), LookIn:=xlValues, lookat:=xlWhole)
Cells(ictr2, rng.Column) = Da_Array(ictr, 48)
Cells(ictr2, rng.Column + 1) = Da_Array(ictr, 49)
Next ictr
End Sub
This is fed to break the string into an array of strings, each less than 255:
Function strChk(s_rows) As Variant
Dim l_point, l_ctr, l_hold, l_start As Long
Dim OutArray() As String
l_ctr = 0
l_start = 1
l_point = 1
'ReDim Preserve OutArray(l_ctr)
Do While l_point <> 0
If l_point - l_start > 255 Then
ReDim Preserve OutArray(l_ctr)
l_point = l_hold
OutArray(l_ctr) = Mid(s_rows, l_start, l_point - l_start)
l_ctr = l_ctr + 1
l_start = l_point + 1
Else
l_hold = l_point
End If
l_point = InStr(l_point + 1, s_rows, ",")
Loop
strChk = OutArray
End Function
Is there a more efficient way to build a range for deletion, and if so, how would I go about it?
More on the delete criteria. If the date in column 55 is less than the current date, it needs to be deleted.
Note:
I cannot simply sort by date because there is an additional criterion. I delete the row only if the current date is greater than the date on the sheet, and the version in column 42 has no previous versions. So, if 42 has no previous version, then keep it even if expired.
-
5\$\begingroup\$ Please explain in more detail what you mean by "deleting rows based on a date". \$\endgroup\$200_success– 200_success2016年03月16日 14:12:04 +00:00Commented Mar 16, 2016 at 14:12
-
2\$\begingroup\$ Is the sheet sorted by date so that all the rows to be deleted are contiguous? If so, then you could avoid creating the array by finding the first and last occurrences of what needs to be deleted and create your range with the first and last items. \$\endgroup\$pacmaninbw– pacmaninbw ♦2016年03月16日 14:29:55 +00:00Commented Mar 16, 2016 at 14:29
-
2\$\begingroup\$ @Raystafarian Original sorting before I took over: 8+ hours. First iteration of my code: 15 minutes. (sorting/deleting on the spreadsheet). Second iteration of my code: five minutes (identifying rows on the sheet and building the range to delete) current time is about 50 seconds. \$\endgroup\$user97873– user978732016年03月16日 14:38:06 +00:00Commented Mar 16, 2016 at 14:38
-
2\$\begingroup\$ How are you telling if there's a previous version? \$\endgroup\$Raystafarian– Raystafarian2016年03月16日 15:17:53 +00:00Commented Mar 16, 2016 at 15:17
-
2\$\begingroup\$ That would be the way I'd describe it in the question then. It answers a lot about the structure of the cells and what other approaches are feasible. Unless you want to stick with the string of rows. \$\endgroup\$Raystafarian– Raystafarian2016年03月16日 16:07:05 +00:00Commented Mar 16, 2016 at 16:07
3 Answers 3
Rule #1:
Be explicit with your declarations:
You already have Option explicit
which forces you to be explicit about variable names but not types. You have to declare a type for every variable.
This:
Dim i_LastRow, i_LastColumn, i_FirstRow, ictr, ictr2, i_row, i_col As Long
Does not declare 7 Long
Variables. It is actually doing this:
Dim i_LastRow [As Variant], i_LastColumn [As Variant], i_FirstRow [As Variant], ictr [As Variant], ictr2 [As Variant], i_row [As Variant], i_col As Long
Rule #2:
Naming
Names should be Descriptive, Clear, Unambiguous and Concise. In that order.
Your prefixes tell me nothing about your variables.
i_LastRow
- what does the i_
mean? I think you meant to use l_
for Long
but frankly, it's just distracting. If I see a variable called lastRow
I already know it's going to be a row number.
Read This article on variable naming and why type-prefixes are a terrible idea.
Stick to standard naming conventions
Namely:
Local Variables: Written in
camelCase
.
Dim localVariable As String
includes method arguments.Module / Global Variables: Written in
PascalCase
.
Private ModuleVariable As String
Global PublicVariable As Long
Method Names: Verbs. Written in
PascalCase
Private Function ReturnThisValue() As Long
Public Sub DoThisThing()
Constants: Written in
SHOUTY_SNAKE_CASE
Public Const CONSTANT_VALUE As String = "This Value Never Changes"
Some suggested substitutions:
ictr
--> counter
or just the standard i, j, k
Da_Array()
--> sheetArray()
or maybe sheetData()
splitter
--> splitHolder()
rng_deletes
--> deleteRange
any time you find yourself writing variable, variable2, variable3
etc. It's a great sign that you need to rethink your structure. Probably involving some kind of loop.
Rule 3:
Declare your variables as close to their usage as possible
In general, try to get as much information as possible as close to where it is used / applicable.
By the time I've finished reading the last of your variable declarations I've already forgotten most of the list. I don't want to have to keep scrolling back to the start of your procedure to find what things are. Just move them to where they'll actually be used:
Dim currentDate As Date
currentDate = Date
Dim dataSheet As Worksheet
Set dataSheet = Sheets(1)
dataSheet.Activate
Dim firstRow As Long, lastRow As Long, lastCol As Long
firstRow = dataSheet.Range("_FilterDatabase").Row
lastRow = Last(1, dataSheet.Cells)
lastCol = Last(2, dataSheet.Cells)
Added bonus: the more you move things to where they're actually used, the more your code naturally separates into loosely-connected "sections" which can then easily be refactored into their own Subs/Functions.
Refactoring
Take all the above (and the refactoring for your row deletion in my previous answer) and the first half of your code now looks like this:
ThisWorkbook.Activate
Dim currentDate As Date
currentDate = Date
Dim dataSheet As Worksheet
Set dataSheet = Sheets(1)
dataSheet.Activate
Dim firstRow As Long, lastRow As Long, lastCol As Long
firstRow = dataSheet.Range("_FilterDatabase").Row
lastRow = Last(1, dataSheet.Cells)
lastCol = Last(2, dataSheet.Cells)
Dim ix As Long
Dim deleteRange As Range, rCell As Range
For ix = LBound(sheetarray, 1) To UBound(sheetarray, 1) - 1 '/ -1 because of Look-Ahead conditions in DeleteRow()
If DeleteRow(sheetarray, ix, firstRow, currentDate) Then
Set rCell = Cells(firstRow + ix, 1)
If deleteRange Is Nothing Then Set deleteRange = rCell Else Set deleteRange = Union(deleteRange, rCell)
End If
Next ix
deleteRange.EntireRow.Delete
hmm. suddenly this looks like its' own, completely separate thing.
Maybe we should refactor it into its' own sub?
Private Sub DeleteUndesiredRows()
ThisWorkbook.Activate
Dim currentDate As Date
currentDate = Date
Dim dataSheet As Worksheet
Set dataSheet = Sheets(1)
dataSheet.Activate
Dim firstRow As Long, lastRow As Long, lastCol As Long
firstRow = dataSheet.Range("_FilterDatabase").Row
lastRow = Last(1, dataSheet.Cells)
lastCol = Last(2, dataSheet.Cells)
Dim ix As Long
Dim deleteRange As Range, rCell As Range
For ix = LBound(sheetarray, 1) To UBound(sheetarray, 1) - 1 '/ -1 because of Look-Ahead conditions in DeleteRow()
If DeleteRow(sheetarray, ix, firstRow, currentDate) Then
Set rCell = Cells(firstRow + ix, 1)
If deleteRange Is Nothing Then Set deleteRange = rCell Else Set deleteRange = Union(deleteRange, rCell)
End If
Next ix
deleteRange.EntireRow.Delete
End Sub
And now we can just call that at the start of our main sub and, should we need to change the row-deletion section specifically, we know exactly where to find it.
-
\$\begingroup\$ I would love to add more, but I have to sign off for today. \$\endgroup\$Kaz– Kaz2016年03月16日 17:03:34 +00:00Commented Mar 16, 2016 at 17:03
-
\$\begingroup\$ great advice. I date back to the mainframe days, and we were taught to declare all of our variables at the top. Thank you for your detailed and very thorough explanations of everything. \$\endgroup\$user97873– user978732016年03月16日 17:38:59 +00:00Commented Mar 16, 2016 at 17:38
Note: I answered prior to edit 5 so some of this might be obsolete.
Let's talk about some things first -
You didn't provide a sub
that your procedure is in, but I will assume it's in a 'sub'. That being said, it doesn't seem that Option Explicit
is turned on because you aren't declaring your variables.
Always turn on Option Explicit
. You can have it automatically by going to Tools -> Options in the VBE and checking the Require Variable Declaration option. This way if you have any variables not defined, the compiler will let you know.
When you don't define your variable, VBA will declare it as a Variant, which are objects:
Performance. A variable you declare with the Object type is flexible enough to contain a reference to any object. However, when you invoke a method or property on such a variable, you always incur late binding (at run time). To force early binding (at compile time) and better performance, declare the variable with a specific class name, or cast it to the specific data type.
By not declaring variables, you could possibly be paying a penalty.
Speaking of variables - your naming scheme is confusing. Give your variables meaningful names. Something like ictr
isn't very descriptive. It could just be counter
. The same goes for s_rows
and v_builder
- I can see that they are supposed to be a string and a variant, but the name gives me no indication as to what they do. Why not concatenatedRows
and deletionArray
.
Standard VBA naming conventions have camelCase
for local variables and PascalCase
for other variables and names.
It would end up more explanatory like
Option Explicit
Sub LoadData()
Dim counter As Long
Dim dataArray As Variant
Dim firstRow As Long
Dim lastColumn As Long
Dim splitter As String
Dim rangeToDelete As Range
Dim splitRows As String
Dim builtArray As Variant
Dim myDate As Date
For counter = LBound(dataArray) To UBound(dataArray) - 1
If counter > firstRow And dataArray(counter, 42) = dataArray(counter + 1, 42) _
And dataArray(counter, 1) = dataArray(counter + 1, 1) _
And dataArray(counter, 40) <> dataArray(counter + 1, 40) Then
splitter = Split(dataArray(counter, lastColumn), "-")
If CDate(RetMonthNum(splitter(1)) & "/" & splitter(0) & "/" & splitter(2)) < myDate Then
splitRows = splitRows & counter + firstRow & ":" & counter + firstRow & ","
End If
End If
Next counter
splitRows = Left(splitRows, Len(splitRows) - 1)
builtArray = strChk(splitRows)
Set rangeToDelete = Range(builtArray(LBound(builtArray)))
For counter = LBound(builtArray) + 1 To UBound(builtArray)
Set rangeToDelete = Union(rangeToDelete, Range(builtArray(counter)))
Next counter
rangeToDelete.Select
Selection.Delete
End Sub
You'll also note I changed the code structure. It's good practice to indent all of your code that way Labels
will stick out as obvious.
Now I see that you are selecting the range to delete. Why not just rangeToDelete.Delete
.
Be sure to avoid things like .Select
- it just slows the code down by needing to fiddle with the spreadsheet while doing everything else behind the scenes. There's a good question on StackOverflow addressing this - https://stackoverflow.com/questions/10714251/how-to-avoid-using-select-in-excel-vba-macros .
For your function, you are requiring an input but it's being passed byRef by default. I don't see that it needs to go back, so you can just pass it byVal
for increased speed.
Function CheckStringValue(ByVal splitRows as String) As Variant
The function - does it need to be a variant or could it be an array of strings?
Another good way to speed up your macro is to use Application.Screenupdating = False
and Application.Calculation = xlManual
and Application.EnableEvents = False
. Just be sure to return them to True
and xlAutomatic
and True
before exiting the sub.
Within your function you put Dim l_point, l_ctr, l_hold, l_start As Long
- this is only declaring l_start
as long
type. In VBA everything needs to be given a type -
Private Function strChk(splitRows) As String
Dim l_point As Long
Dim l_ctr As Long
Dim l_hold As Long
Dim l_start As Long
Dim OutArray() As String
Your function doesn't appear to need a larger scope, so make it private. The other variables l_
- I already went over naming so I won't beat a dead horse.
You are also using a delimiter string, why not make it a constant?
Private Function CheckStringValue(ByVal splitRows As String) As String
Dim delimitPoint As Long
Dim arrayCounter As Long
Dim hold As Long
Dim startPoint As Long
Dim OutArray() As String
Const DELIMITER As String = ","
arrayCounter = 0
startPoint = 1
delimitPoint = 1
'ReDim Preserve OutArray(arrayCounter)
Do While delimitPoint <> 0
If delimitPoint - startPoint > 255 Then
ReDim Preserve OutArray(arrayCounter)
delimitPoint = hold
OutArray(arrayCounter) = Mid(splitRows, startPoint, delimitPoint - startPoint)
arrayCounter = arrayCounter + 1
startPoint = delimitPoint + 1
Else
hold = delimitPoint
End If
delimitPoint = InStr(delimitPoint + 1, splitRows, DELIMITER)
Loop
CheckStringValue = OutArray
End Function
Now everything is much more legible and speed has been increased if you've changed the Application
attributes.
It looks like in the main sub you're using another function splitter
- what is that? I also don't see where da_Date
is assigned?
-
\$\begingroup\$ Option explicit is on, just didn't show that part, the whole procedure is about 4 times as long as what I posted. \$\endgroup\$user97873– user978732016年03月16日 14:50:12 +00:00Commented Mar 16, 2016 at 14:50
-
\$\begingroup\$ It could be an array of strings. The previous code was taking 8+ hours. I threw this together to address an emergency, so it's not my best. \$\endgroup\$user97873– user978732016年03月16日 14:59:30 +00:00Commented Mar 16, 2016 at 14:59
-
\$\begingroup\$ No worries, I've definitely had worse. \$\endgroup\$Raystafarian– Raystafarian2016年03月16日 15:10:37 +00:00Commented Mar 16, 2016 at 15:10
-
\$\begingroup\$ @RichardU If It's only 4 times as long, you could just post the whole thing. \$\endgroup\$Kaz– Kaz2016年03月16日 15:34:39 +00:00Commented Mar 16, 2016 at 15:34
Note: I answered prior to edit 5 so some of this might be obsolete.
@Raystafarian has touched on the horribly obfuscated naming, so I won't repeat that advice.
This whole building a string array of range references is entirely unnecesary. Just build your rng_deletes
union as you go:
'/ Note the conversion of "ictr" to "ix". Personally I like ix, iy, iz etc. for index iterators but that's just a personal preference.
'/ Also, "Da_Array" --> "sheetArray", "i_FirstRow" --> "firstRow"
For ix = LBound(sheetArray) To UBound(sheetArray) - 1
If ix > firstRow And sheetArray(ix, 42) = sheetArray(ix + 1, 42) And sheetArray(ix, 1) = sheetArray(ix + 1, 1) _
And sheetArray(ix, 40) <> sheetArray(ix + 1, 40) Then
splitter = Split(sheetArray(ix, i_LastColumn), "-")
If CDate(RetMonthNum(splitter(1)) & "/" & splitter(0) & "/" & splitter(2)) < da_date Then
If rng_Deletes Is Nothing Then Set rng_Deletes = Cells(firstRow + ix, 1) Else Set rng_Deletes = Union(rng_Deletes, Cells(firstRow + ix, 1))
End If
End If
Next ix
rng_Deletes.EntireRow.Delete
And immediately the other half of you code can be removed.
LBound(sheetArray)
This is implicitly doing Lbound(sheetArray, 1)
. Never let your code do things implicitly, be explicit:
For ix = LBound(sheetArray, 1) To UBound(sheetArray, 1) - 1
Now, why - 1
? Off-by-one errors are some of the most ubiquitous, difficult programming problems. If you need to add +-1
modifiers, always leave a note explaining why:
For ix = LBound(sheetArray) To UBound(sheetArray) - 1 '/ -1 because of Look-Ahead conditions
It takes 2 seconds but will save you (or somebody else) from making costly mistakes and errors down the line.
Now, this:
If ix > firstRow And sheetArray(ix, 42) = sheetArray(ix + 1, 42) And sheetArray(ix, 1) = sheetArray(ix + 1, 1) _ And sheetArray(ix, 40) <> sheetArray(ix + 1, 40) Then splitter = Split(sheetArray(ix, i_LastColumn), "-") If CDate(RetMonthNum(splitter(1)) & "/" & splitter(0) & "/" & splitter(2)) < da_date Then
is messy and difficult to read/understand. You should have your delete check as a separate piece of business logic. Because you need all of these conditions to be met for a row to be deleted, you can also short-circuit after each check if it's false for a performance increase:
For ix = LBound(sheetarray) To UBound(sheetarray) - 1
If DeleteRow(sheetarray, ix, firstRow, thresholdDate) Then
If rng_Deletes Is Nothing Then Set rng_Deletes = Cells(firstRow + ix, 1) Else Set rng_Deletes = Union(rng_Deletes, Cells(firstRow + ix, 1))
End If
Next ix
rng_Deletes.EntireRow.Delete
With:
Private Function DeleteRow(ByRef dataArray As Variant, ByVal ix As Long, ByVal firstRow As Long, ByVal thresholdDate As Date) As Boolean
Dim result As Boolean
Dim splitter As Variant
result = False
If ix > firstRow Then
If sheetarray(ix, 42) = sheetarray(ix + 1, 42) Then
If sheetarray(ix, 1) = sheetarray(ix + 1, 1) Then
If sheetarray(ix, 40) <> sheetarray(ix + 1, 40) Then
splitter = Split(sheetarray(ix, i_LastColumn), "-")
If CDate(RetMonthNum(splitter(1)) & "/" & splitter(0) & "/" & splitter(2)) < thresholdDate Then
result = True
End If
End If
End If
End If
End If
DeleteRow = result
End Function
-
\$\begingroup\$ Thank you for your help. Yes, it's messy, I'm in the process of cleaning it up. \$\endgroup\$user97873– user978732016年03月16日 16:31:48 +00:00Commented Mar 16, 2016 at 16:31