4
\$\begingroup\$

I am deleting rows based on a date.

I am loading the entire sheet into an array and doing the evaluation and building a string containing the rows I want to delete.

Option Explicit
Sub arrmaker()
ThisWorkbook.Activate
Dim i_LastRow, i_LastColumn, i_FirstRow, ictr, ictr2, i_row, i_col As Long
Dim Da_Array(), Da_Other_Array(), Da_Third_Array, Da_Dates(), da_Names() As Variant
Dim Da_Builder() As String
Dim splitter() As String
Dim s_rows As String
Dim v_builder As Variant
Dim da_Range, rng_Deletes As Range
Dim prodhold As Long
Dim i_init, i_sec, i_Main As Long
Dim i_hold1, i_hold2, i_hold3 As Long
Sheets(1).Select
Dim da_date As Date
da_date = Date
ictr = 16
Sheets(1).Range("_FilterDatabase").Select
i_FirstRow = Sheets(1).Range("_FilterDatabase").Row
i_LastRow = Last(1, Sheets(1).Cells)
i_LastColumn = Last(2, Sheets(1).Cells)
i_FirstRow = Sheets(1).Range("_FilterDatabase").Row
i_LastColumn = Last(2, Sheets(1).Cells)
Sheets(1).Activate
Call Sort_Em(i_LastRow)
Da_Array = Range(Cells(i_FirstRow, 1), Cells(i_LastRow, i_LastColumn))
'i_FirstRow
s_rows = ""
For ictr = LBound(Da_Array) To UBound(Da_Array) - 1
 If ictr > i_FirstRow And Da_Array(ictr, 42) = Da_Array(ictr + 1, 42) And Da_Array(ictr, 1) = Da_Array(ictr + 1, 1) _
 And Da_Array(ictr, 40) <> Da_Array(ictr + 1, 40) Then
 splitter = Split(Da_Array(ictr, i_LastColumn), "-")
 If CDate(RetMonthNum(splitter(1)) & "/" & splitter(0) & "/" & splitter(2)) < da_date Then
 s_rows = s_rows & ictr + i_FirstRow & ":" & ictr + i_FirstRow & ","
 End If
 End If
Next ictr
s_rows = Left(s_rows, Len(s_rows) - 1)
v_builder = strChk(s_rows)
Set rng_Deletes = Range(v_builder(LBound(v_builder)))
For ictr = LBound(v_builder) + 1 To UBound(v_builder)
 Set rng_Deletes = Union(rng_Deletes, Range(v_builder(ictr)))
Next ictr
rng_Deletes.Select
Selection.Delete
Da_Array = Range(Cells(i_FirstRow, 1), Cells(i_LastRow, i_LastColumn))
Da_Other_Array() = Application.WorksheetFunction.Index(Da_Array, 0, 42)
Sheets("tmp").Range(Cells(1, 1).Address, Cells(UBound(Da_Other_Array), 1).Address).Value = Da_Other_Array
Sheets("tmp").Range("$A1ドル:$A$" & UBound(Da_Other_Array)).RemoveDuplicates Columns:=1
i_LastRow = Last(1, Sheets("tmp").Cells)
Sheets("tmp").Activate
Da_Third_Array = Range(Cells(2, 1), Cells(i_LastRow, 1))
ReDim Da_Builder(LBound(Da_Array) To UBound(Da_Array), 1 To UBound(Da_Array, 2) + 2 * UBound(Da_Third_Array))
For i_init = 1 To UBound(Da_Array, 2)
 Da_Builder(1, i_init) = Da_Array(1, i_init)
Next i_init
'MsgBox UBound(Da_Builder, 2)
i_hold1 = 0
For i_sec = 1 To UBound(Da_Third_Array)
i_hold2 = UBound(Da_Array, 2)
 Da_Builder(1, i_sec + i_hold1 + i_hold2) = Da_Third_Array(i_sec, 1)
 Da_Builder(1, i_sec + i_hold1 + 1 + i_hold2) = "cur"
 i_hold1 = i_hold1 + 1
Next i_sec
Sheets("tmp2").Range(Cells(1, 1).Address, Cells(1, UBound(Da_Builder, 2)).Address).Value = Da_Builder
i_LastRow = Last(1, Sheets(2).Cells)
Dim rng As Range
Set rng = Sheets(2).Range(Cells(2, 1), Cells(i_LastRow, 1))
da_Names = rng.Value
Sheets("hold").Select
Sheets("hold").Range(Cells(1, 1).Address, Cells(UBound(Da_Array), UBound(Da_Array, 2)).Address) = Da_Array
 Columns("A:A").Select
 ActiveSheet.Range("$A1ドル:$BC25525ドル").RemoveDuplicates Columns:=1, Header:=xlYes
Sheets("hold").Range(Cells(1, 1).Address, Cells(1, UBound(Da_Builder, 2)).Address).Value = Da_Builder
i_LastRow = Last(1, Sheets("hold").Cells)
ictr2 = 2
prodhold = Cells(ictr2, 1)
For ictr = LBound(Da_Array) + 1 To UBound(Da_Array)
 If CLng(Da_Array(ictr, 1)) <> prodhold Then
 ictr2 = ictr2 + 1
 prodhold = Cells(ictr2, 1)
 End If
 Set rng = Worksheets("hold").UsedRange.Find(Da_Array(ictr, 42), LookIn:=xlValues, lookat:=xlWhole)
 ' Set rng = Sheets("hold").Range("A1:A" & UBound(Da_Builder, 2)).Find(Da_Array(ictr, 42), LookIn:=xlValues, lookat:=xlWhole)
 Cells(ictr2, rng.Column) = Da_Array(ictr, 48)
 Cells(ictr2, rng.Column + 1) = Da_Array(ictr, 49)
Next ictr
End Sub

This is fed to break the string into an array of strings, each less than 255:

Function strChk(s_rows) As Variant
Dim l_point, l_ctr, l_hold, l_start As Long
Dim OutArray() As String
l_ctr = 0
l_start = 1
l_point = 1
'ReDim Preserve OutArray(l_ctr)
Do While l_point <> 0
 If l_point - l_start > 255 Then
 ReDim Preserve OutArray(l_ctr)
 l_point = l_hold
 OutArray(l_ctr) = Mid(s_rows, l_start, l_point - l_start)
 l_ctr = l_ctr + 1
 l_start = l_point + 1
 Else
 l_hold = l_point
 End If
 l_point = InStr(l_point + 1, s_rows, ",")
Loop
strChk = OutArray
End Function

Is there a more efficient way to build a range for deletion, and if so, how would I go about it?

More on the delete criteria. If the date in column 55 is less than the current date, it needs to be deleted.

Note:

I cannot simply sort by date because there is an additional criterion. I delete the row only if the current date is greater than the date on the sheet, and the version in column 42 has no previous versions. So, if 42 has no previous version, then keep it even if expired.

asked Mar 16, 2016 at 13:59
\$\endgroup\$
11
  • 5
    \$\begingroup\$ Please explain in more detail what you mean by "deleting rows based on a date". \$\endgroup\$ Commented Mar 16, 2016 at 14:12
  • 2
    \$\begingroup\$ Is the sheet sorted by date so that all the rows to be deleted are contiguous? If so, then you could avoid creating the array by finding the first and last occurrences of what needs to be deleted and create your range with the first and last items. \$\endgroup\$ Commented Mar 16, 2016 at 14:29
  • 2
    \$\begingroup\$ @Raystafarian Original sorting before I took over: 8+ hours. First iteration of my code: 15 minutes. (sorting/deleting on the spreadsheet). Second iteration of my code: five minutes (identifying rows on the sheet and building the range to delete) current time is about 50 seconds. \$\endgroup\$ Commented Mar 16, 2016 at 14:38
  • 2
    \$\begingroup\$ How are you telling if there's a previous version? \$\endgroup\$ Commented Mar 16, 2016 at 15:17
  • 2
    \$\begingroup\$ That would be the way I'd describe it in the question then. It answers a lot about the structure of the cells and what other approaches are feasible. Unless you want to stick with the string of rows. \$\endgroup\$ Commented Mar 16, 2016 at 16:07

3 Answers 3

2
\$\begingroup\$

Rule #1:

Be explicit with your declarations:

You already have Option explicit which forces you to be explicit about variable names but not types. You have to declare a type for every variable.

This:

 Dim i_LastRow, i_LastColumn, i_FirstRow, ictr, ictr2, i_row, i_col As Long

Does not declare 7 Long Variables. It is actually doing this:

 Dim i_LastRow [As Variant], i_LastColumn [As Variant], i_FirstRow [As Variant], ictr [As Variant], ictr2 [As Variant], i_row [As Variant], i_col As Long

Rule #2:

Naming

Names should be Descriptive, Clear, Unambiguous and Concise. In that order.


Your prefixes tell me nothing about your variables.

i_LastRow - what does the i_ mean? I think you meant to use l_ for Long but frankly, it's just distracting. If I see a variable called lastRow I already know it's going to be a row number.

Read This article on variable naming and why type-prefixes are a terrible idea.


Stick to standard naming conventions

Namely:

Local Variables: Written in camelCase.

Dim localVariable As String
includes method arguments.

Module / Global Variables: Written in PascalCase.

Private ModuleVariable As String
Global PublicVariable As Long

Method Names: Verbs. Written in PascalCase

Private Function ReturnThisValue() As Long
Public Sub DoThisThing()

Constants: Written in SHOUTY_SNAKE_CASE

Public Const CONSTANT_VALUE As String = "This Value Never Changes"


Some suggested substitutions:

ictr --> counter or just the standard i, j, k
Da_Array() --> sheetArray() or maybe sheetData()
splitter --> splitHolder()
rng_deletes --> deleteRange

any time you find yourself writing variable, variable2, variable3 etc. It's a great sign that you need to rethink your structure. Probably involving some kind of loop.


Rule 3:

Declare your variables as close to their usage as possible

In general, try to get as much information as possible as close to where it is used / applicable.

By the time I've finished reading the last of your variable declarations I've already forgotten most of the list. I don't want to have to keep scrolling back to the start of your procedure to find what things are. Just move them to where they'll actually be used:

 Dim currentDate As Date
 currentDate = Date
 Dim dataSheet As Worksheet
 Set dataSheet = Sheets(1)
 dataSheet.Activate
 Dim firstRow As Long, lastRow As Long, lastCol As Long
 firstRow = dataSheet.Range("_FilterDatabase").Row
 lastRow = Last(1, dataSheet.Cells)
 lastCol = Last(2, dataSheet.Cells)

Added bonus: the more you move things to where they're actually used, the more your code naturally separates into loosely-connected "sections" which can then easily be refactored into their own Subs/Functions.

Refactoring

Take all the above (and the refactoring for your row deletion in my previous answer) and the first half of your code now looks like this:

 ThisWorkbook.Activate
 Dim currentDate As Date
 currentDate = Date
 Dim dataSheet As Worksheet
 Set dataSheet = Sheets(1)
 dataSheet.Activate
 Dim firstRow As Long, lastRow As Long, lastCol As Long
 firstRow = dataSheet.Range("_FilterDatabase").Row
 lastRow = Last(1, dataSheet.Cells)
 lastCol = Last(2, dataSheet.Cells)
 Dim ix As Long
 Dim deleteRange As Range, rCell As Range
 For ix = LBound(sheetarray, 1) To UBound(sheetarray, 1) - 1 '/ -1 because of Look-Ahead conditions in DeleteRow()
 If DeleteRow(sheetarray, ix, firstRow, currentDate) Then
 Set rCell = Cells(firstRow + ix, 1)
 If deleteRange Is Nothing Then Set deleteRange = rCell Else Set deleteRange = Union(deleteRange, rCell)
 End If
 Next ix
 deleteRange.EntireRow.Delete

hmm. suddenly this looks like its' own, completely separate thing.
Maybe we should refactor it into its' own sub?

Private Sub DeleteUndesiredRows()
 ThisWorkbook.Activate
 Dim currentDate As Date
 currentDate = Date
 Dim dataSheet As Worksheet
 Set dataSheet = Sheets(1)
 dataSheet.Activate
 Dim firstRow As Long, lastRow As Long, lastCol As Long
 firstRow = dataSheet.Range("_FilterDatabase").Row
 lastRow = Last(1, dataSheet.Cells)
 lastCol = Last(2, dataSheet.Cells)
 Dim ix As Long
 Dim deleteRange As Range, rCell As Range
 For ix = LBound(sheetarray, 1) To UBound(sheetarray, 1) - 1 '/ -1 because of Look-Ahead conditions in DeleteRow()
 If DeleteRow(sheetarray, ix, firstRow, currentDate) Then
 Set rCell = Cells(firstRow + ix, 1)
 If deleteRange Is Nothing Then Set deleteRange = rCell Else Set deleteRange = Union(deleteRange, rCell)
 End If
 Next ix
 deleteRange.EntireRow.Delete
End Sub

And now we can just call that at the start of our main sub and, should we need to change the row-deletion section specifically, we know exactly where to find it.

answered Mar 16, 2016 at 17:00
\$\endgroup\$
2
  • \$\begingroup\$ I would love to add more, but I have to sign off for today. \$\endgroup\$ Commented Mar 16, 2016 at 17:03
  • \$\begingroup\$ great advice. I date back to the mainframe days, and we were taught to declare all of our variables at the top. Thank you for your detailed and very thorough explanations of everything. \$\endgroup\$ Commented Mar 16, 2016 at 17:38
3
\$\begingroup\$

Note: I answered prior to edit 5 so some of this might be obsolete.


Let's talk about some things first -

You didn't provide a sub that your procedure is in, but I will assume it's in a 'sub'. That being said, it doesn't seem that Option Explicit is turned on because you aren't declaring your variables.

Always turn on Option Explicit. You can have it automatically by going to Tools -> Options in the VBE and checking the Require Variable Declaration option. This way if you have any variables not defined, the compiler will let you know.

When you don't define your variable, VBA will declare it as a Variant, which are objects:

Performance. A variable you declare with the Object type is flexible enough to contain a reference to any object. However, when you invoke a method or property on such a variable, you always incur late binding (at run time). To force early binding (at compile time) and better performance, declare the variable with a specific class name, or cast it to the specific data type.

By not declaring variables, you could possibly be paying a penalty.


Speaking of variables - your naming scheme is confusing. Give your variables meaningful names. Something like ictr isn't very descriptive. It could just be counter. The same goes for s_rows and v_builder - I can see that they are supposed to be a string and a variant, but the name gives me no indication as to what they do. Why not concatenatedRows and deletionArray.

Standard VBA naming conventions have camelCase for local variables and PascalCase for other variables and names.

It would end up more explanatory like

Option Explicit
Sub LoadData()
 Dim counter As Long
 Dim dataArray As Variant
 Dim firstRow As Long
 Dim lastColumn As Long
 Dim splitter As String
 Dim rangeToDelete As Range
 Dim splitRows As String
 Dim builtArray As Variant
 Dim myDate As Date
 For counter = LBound(dataArray) To UBound(dataArray) - 1
 If counter > firstRow And dataArray(counter, 42) = dataArray(counter + 1, 42) _
 And dataArray(counter, 1) = dataArray(counter + 1, 1) _
 And dataArray(counter, 40) <> dataArray(counter + 1, 40) Then
 splitter = Split(dataArray(counter, lastColumn), "-")
 If CDate(RetMonthNum(splitter(1)) & "/" & splitter(0) & "/" & splitter(2)) < myDate Then
 splitRows = splitRows & counter + firstRow & ":" & counter + firstRow & ","
 End If
 End If
 Next counter
 splitRows = Left(splitRows, Len(splitRows) - 1)
 builtArray = strChk(splitRows)
 Set rangeToDelete = Range(builtArray(LBound(builtArray)))
 For counter = LBound(builtArray) + 1 To UBound(builtArray)
 Set rangeToDelete = Union(rangeToDelete, Range(builtArray(counter)))
 Next counter
 rangeToDelete.Select
 Selection.Delete
End Sub

You'll also note I changed the code structure. It's good practice to indent all of your code that way Labels will stick out as obvious.

Now I see that you are selecting the range to delete. Why not just rangeToDelete.Delete.

Be sure to avoid things like .Select - it just slows the code down by needing to fiddle with the spreadsheet while doing everything else behind the scenes. There's a good question on StackOverflow addressing this - https://stackoverflow.com/questions/10714251/how-to-avoid-using-select-in-excel-vba-macros .


For your function, you are requiring an input but it's being passed byRef by default. I don't see that it needs to go back, so you can just pass it byVal for increased speed.

Function CheckStringValue(ByVal splitRows as String) As Variant

The function - does it need to be a variant or could it be an array of strings?


Another good way to speed up your macro is to use Application.Screenupdating = False and Application.Calculation = xlManual and Application.EnableEvents = False. Just be sure to return them to True and xlAutomatic and True before exiting the sub.


Within your function you put Dim l_point, l_ctr, l_hold, l_start As Long - this is only declaring l_start as long type. In VBA everything needs to be given a type -

Private Function strChk(splitRows) As String
 Dim l_point As Long
 Dim l_ctr As Long
 Dim l_hold As Long
 Dim l_start As Long
 Dim OutArray() As String

Your function doesn't appear to need a larger scope, so make it private. The other variables l_ - I already went over naming so I won't beat a dead horse.

You are also using a delimiter string, why not make it a constant?

Private Function CheckStringValue(ByVal splitRows As String) As String
 Dim delimitPoint As Long
 Dim arrayCounter As Long
 Dim hold As Long
 Dim startPoint As Long
 Dim OutArray() As String
 Const DELIMITER As String = ","
 arrayCounter = 0
 startPoint = 1
 delimitPoint = 1
 'ReDim Preserve OutArray(arrayCounter)
 Do While delimitPoint <> 0
 If delimitPoint - startPoint > 255 Then
 ReDim Preserve OutArray(arrayCounter)
 delimitPoint = hold
 OutArray(arrayCounter) = Mid(splitRows, startPoint, delimitPoint - startPoint)
 arrayCounter = arrayCounter + 1
 startPoint = delimitPoint + 1
 Else
 hold = delimitPoint
 End If
 delimitPoint = InStr(delimitPoint + 1, splitRows, DELIMITER)
 Loop
 CheckStringValue = OutArray
End Function

Now everything is much more legible and speed has been increased if you've changed the Application attributes.

It looks like in the main sub you're using another function splitter - what is that? I also don't see where da_Date is assigned?

answered Mar 16, 2016 at 14:47
\$\endgroup\$
4
  • \$\begingroup\$ Option explicit is on, just didn't show that part, the whole procedure is about 4 times as long as what I posted. \$\endgroup\$ Commented Mar 16, 2016 at 14:50
  • \$\begingroup\$ It could be an array of strings. The previous code was taking 8+ hours. I threw this together to address an emergency, so it's not my best. \$\endgroup\$ Commented Mar 16, 2016 at 14:59
  • \$\begingroup\$ No worries, I've definitely had worse. \$\endgroup\$ Commented Mar 16, 2016 at 15:10
  • \$\begingroup\$ @RichardU If It's only 4 times as long, you could just post the whole thing. \$\endgroup\$ Commented Mar 16, 2016 at 15:34
1
\$\begingroup\$

Note: I answered prior to edit 5 so some of this might be obsolete.


@Raystafarian has touched on the horribly obfuscated naming, so I won't repeat that advice.

This whole building a string array of range references is entirely unnecesary. Just build your rng_deletes union as you go:

'/ Note the conversion of "ictr" to "ix". Personally I like ix, iy, iz etc. for index iterators but that's just a personal preference.
'/ Also, "Da_Array" --> "sheetArray", "i_FirstRow" --> "firstRow"
For ix = LBound(sheetArray) To UBound(sheetArray) - 1
 If ix > firstRow And sheetArray(ix, 42) = sheetArray(ix + 1, 42) And sheetArray(ix, 1) = sheetArray(ix + 1, 1) _
 And sheetArray(ix, 40) <> sheetArray(ix + 1, 40) Then
 splitter = Split(sheetArray(ix, i_LastColumn), "-")
 If CDate(RetMonthNum(splitter(1)) & "/" & splitter(0) & "/" & splitter(2)) < da_date Then
 If rng_Deletes Is Nothing Then Set rng_Deletes = Cells(firstRow + ix, 1) Else Set rng_Deletes = Union(rng_Deletes, Cells(firstRow + ix, 1))
 End If
 End If
Next ix
rng_Deletes.EntireRow.Delete

And immediately the other half of you code can be removed.


LBound(sheetArray)

This is implicitly doing Lbound(sheetArray, 1). Never let your code do things implicitly, be explicit:

For ix = LBound(sheetArray, 1) To UBound(sheetArray, 1) - 1

Now, why - 1? Off-by-one errors are some of the most ubiquitous, difficult programming problems. If you need to add +-1 modifiers, always leave a note explaining why:

For ix = LBound(sheetArray) To UBound(sheetArray) - 1 '/ -1 because of Look-Ahead conditions

It takes 2 seconds but will save you (or somebody else) from making costly mistakes and errors down the line.


Now, this:

 If ix > firstRow And sheetArray(ix, 42) = sheetArray(ix + 1, 42) And sheetArray(ix, 1) = sheetArray(ix + 1, 1) _
 And sheetArray(ix, 40) <> sheetArray(ix + 1, 40) Then
 splitter = Split(sheetArray(ix, i_LastColumn), "-")
 If CDate(RetMonthNum(splitter(1)) & "/" & splitter(0) & "/" & splitter(2)) < da_date Then

is messy and difficult to read/understand. You should have your delete check as a separate piece of business logic. Because you need all of these conditions to be met for a row to be deleted, you can also short-circuit after each check if it's false for a performance increase:

 For ix = LBound(sheetarray) To UBound(sheetarray) - 1
 If DeleteRow(sheetarray, ix, firstRow, thresholdDate) Then
 If rng_Deletes Is Nothing Then Set rng_Deletes = Cells(firstRow + ix, 1) Else Set rng_Deletes = Union(rng_Deletes, Cells(firstRow + ix, 1))
 End If
 Next ix
 rng_Deletes.EntireRow.Delete

With:

Private Function DeleteRow(ByRef dataArray As Variant, ByVal ix As Long, ByVal firstRow As Long, ByVal thresholdDate As Date) As Boolean
 Dim result As Boolean
 Dim splitter As Variant
 result = False
 If ix > firstRow Then
 If sheetarray(ix, 42) = sheetarray(ix + 1, 42) Then
 If sheetarray(ix, 1) = sheetarray(ix + 1, 1) Then
 If sheetarray(ix, 40) <> sheetarray(ix + 1, 40) Then
 splitter = Split(sheetarray(ix, i_LastColumn), "-")
 If CDate(RetMonthNum(splitter(1)) & "/" & splitter(0) & "/" & splitter(2)) < thresholdDate Then
 result = True
 End If
 End If
 End If
 End If
 End If
 DeleteRow = result
End Function
answered Mar 16, 2016 at 16:13
\$\endgroup\$
1
  • \$\begingroup\$ Thank you for your help. Yes, it's messy, I'm in the process of cleaning it up. \$\endgroup\$ Commented Mar 16, 2016 at 16:31

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.