My code here loops through my folder and my files inside the folder and use .find
and copy the strings into a new Excel sheet and then with all the data in the new sheet, I will use .Find(What:=)
to find the strings inside the new worksheet and then either split or use Right, Left, Mid functions, sometimes using replace, however 68 files requires 1 minute 30 seconds run time using this code and I need help making it more efficient.
Option Explicit
Sub GenerateData()
Application.ScreenUpdating = False
Dim wks As Worksheet
Dim wkb As Workbook
Set wkb = ActiveWorkbook
Set wks = wkb.Worksheets.Add(After:=wkb.Worksheets(wkb.Worksheets.Count), Type:=xlWorksheet)
' Add headers data
With wks
.Range("A1:K1") = Array("Test", "Temp", "Type", "Start", "FileName", "No", "End", _
"Month", "Smart", "Errors", "ErrorCellAddress")
End With
' Early Binding - Add "Microsoft Scripting Runtime" Reference
Dim FSO As New Scripting.FileSystemObject
' Set FolderPath
Dim FolderPath As String
FolderPath = "c:\Users\Desktop\Tryout\"
' Set Folder FSO
Dim Folder As Scripting.Folder
Set Folder = FSO.GetFolder(FolderPath)
' Loop thru each file
Dim File As Scripting.File
Dim a As Range, b As Range, c As Range, d As Range, e As Range, f As Range, g As Range, h As Range, l As Range
For Each File In Folder.Files
Dim wkbData As Workbook
Set wkbData = Workbooks.Open(File.Path)
Dim wksData As Worksheet
ActiveSheet.Name = "Control"
Set wksData = wkbData.Worksheets("Control") ' -> Assume this file has only 1 worksheet
'Format of the data
Dim BlankRow As Long
BlankRow = wks.Range("A" & wks.Rows.Count).End(xlUp).Row + 1
' Write filename in col E,F,G
wks.Cells(BlankRow, 5).Value = File.Name
wks.Cells(BlankRow, 6).Value = File.Name
wks.Cells(BlankRow, 7).Value = File.Name
'Find Testtest
Set a = wksData.Columns("A:A").Find(" testtest : ", LookIn:=xlValues)
If Not a Is Nothing Then
wks.Cells(BlankRow, 1).Value = a.Value
End If
'Find Temp
Set b = wksData.Columns("A:A").Find(" testflyy : ", LookIn:=xlValues)
If Not b Is Nothing Then
wks.Cells(BlankRow, 2).Value = b.Value
End If
'Find Type
Set d = wksData.Columns("A:A").Find(" testflyy : ", LookIn:=xlValues)
If Not d Is Nothing Then
wks.Cells(BlankRow, 3).Value = d.Value
End If
'Find start
Set l = wksData.Columns("A:A").Find(" Started at: ", LookIn:=xlValues)
If Not l Is Nothing Then
wks.Cells(BlankRow, 4).Value = l.Value
End If
'Find Smart
Set c = wksData.Columns("A:A").Find("SmartABC ", LookIn:=xlValues)
If Not c Is Nothing Then
wks.Cells(BlankRow, 9).Value = c.Value
Else
Set f = wksData.Columns("A:A").Find("SmarABCD Version ", LookIn:=xlValues)
If Not f Is Nothing Then
wks.Cells(BlankRow, 9).Value = f.Value
Else
Set g = wksData.Columns("A:A").Find("smarabcd_efg revision", LookIn:=xlValues)
If Not g Is Nothing Then
wks.Cells(BlankRow, 9).Value = g.Value
End If
End If
End If
'Find Errors
Set e = wksData.Columns("A:A").Find("ERROR: ABC", LookIn:=xlValues)
If Not e Is Nothing Then
wks.Cells(BlankRow, 10).Value = e.Value
wks.Cells(BlankRow, 11).Value = e.Address
Else
Set h = wksData.Columns("A:A").Find("ERROR: EFG", LookIn:=xlValues)
If Not h Is Nothing Then
wks.Cells(BlankRow, 10).Value = h.Value
End If
End If
' Trim and tidy up Data
'Trim Testtest RowA(1)
wks.Cells(BlankRow, 1).Replace What:="testtest : ", Replacement:="", LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
'Trim StartTime RowD(4)
wks.Cells(BlankRow, 4).Replace What:=" Started at: ", Replacement:="", LookAt:=xlPart, _
SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _
ReplaceFormat:=False
'Trim Temp RowB(2)
Dim strSearchB As String, strSearchC As String
Dim bCell As Range, cCell As Range
Dim s2 As String, s3 As String
strSearchB = " testflow : B"
strSearchC = " testflow : M"
Set bCell = wks.Cells(BlankRow, 2).Find(What:=strSearchB, LookIn:=xlFormulas, _
LookAt:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, _
MatchCase:=False, SearchFormat:=False)
If Not bCell Is Nothing Then
s2 = Split(bCell.Value, ":")(1)
s2 = Mid(s2, 1, 3)
bCell.Value = s2
Else
Set cCell = wks.Cells(BlankRow, 2).Find(What:=strSearchC, LookIn:=xlFormulas, _
LookAt:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, _
MatchCase:=False, SearchFormat:=False)
If Not cCell Is Nothing Then
s3 = Split(cCell.Value, ":")(1)
s3 = Mid(s3, 10, 2)
cCell.Value = s3
End If
End If
'Trim Type RowC(3)
Dim strSearchD As String, strSearchE As String
Dim dCell As Range, eCell As Range
Dim s4 As String, s5 As String
strSearchD = " testflow : B"
strSearchE = " testflow : M1947"
Set dCell = wks.Cells(BlankRow, 3).Find(What:=strSearchD, LookIn:=xlFormulas, _
LookAt:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, _
MatchCase:=False, SearchFormat:=False)
If Not dCell Is Nothing Then
s4 = Split(dCell.Value, "_", 3)(2)
s4 = Mid(s4, 1, 3)
dCell.Value = s4
Else
Set eCell = wks.Cells(BlankRow, 3).Find(What:=strSearchE, LookIn:=xlFormulas, _
LookAt:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, _
MatchCase:=False, SearchFormat:=False)
If Not eCell Is Nothing Then
eCell.Value = "123"
End If
End If
'Trim No RowF(6)
Dim strSearchF As String
Dim fCell As Range
Dim s6 As String
strSearchF = "homebeestrash_archivetreser"
Set fCell = wks.Cells(BlankRow, 6).Find(What:=strSearchF, LookIn:=xlFormulas, _
LookAt:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, _
MatchCase:=False, SearchFormat:=False)
If Not fCell Is Nothing Then
s6 = Split(fCell.Value, "treser")(1)
s6 = Mid(s6, 1, 2)
fCell.Value = s6
End If
'Trim EndDate RowG(7)
Dim strSearchG As String
Dim gCell As Range
Dim s7 As String
strSearchG = "homebeestrash_archivetreser"
Set gCell = wks.Cells(BlankRow, 7).Find(What:=strSearchG, LookIn:=xlFormulas, _
LookAt:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, _
MatchCase:=False, SearchFormat:=False)
If Not gCell Is Nothing Then
s7 = Split(gCell.Value, "reports")(1)
s7 = Split(s7, "Report")(0)
s7 = Left(s7, 8) & " " & Right(s7, 6)
'Month Row H(8)
wks.Cells(BlankRow, 8).Value = WorksheetFunction.Transpose(Left(s7, 4) & "-" & Mid(s7, 5, 2) & "-" & Mid(s7, 7, 2))
gCell.Value = s7
End If
wks.Cells(BlankRow, 8).NumberFormat = "[$-en-US]mmmm d, yyyy;@" 'Set Date format
'Trim Smart
Dim strSearchST As String
Dim stCell As Range
Dim s8 As String
strSearchST = "This is "
Set stCell = wks.Cells(BlankRow, 9).Find(What:=strSearchST, LookIn:=xlFormulas, _
LookAt:=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, _
MatchCase:=False, SearchFormat:=False)
If Not stCell Is Nothing Then
s8 = Split(stCell.Value, "This is ")(1)
s8 = Mid(s8, 1, 29)
stCell.Value = s8
End If
wkbData.Close False
Next File
'Add AutoFilter
Dim StartDate As Long, EndDate As Long
With wks.Cells(BlankRow, 8)
StartDate = DateSerial(Year(.Value), Month(.Value), 1)
EndDate = DateSerial(Year(.Value), Month(.Value) + 1, 0)
End With
wks.Cells(BlankRow, 8).AutoFilter Field:=5, Criteria1:=">=" & StartDate, Operator:=xlAnd, Criteria2:="<=" & EndDate
If ActiveSheet.FilterMode Then ActiveSheet.ShowAllData
End Sub
```
1 Answer 1
Disclaimer: Rubberduck is a free, open-source VBIDE add-in project I manage and cheerlead, that's proudly & actively maintained by members of this community. I have zero personal interests in your clicks, but I do own the project's website, contributed and/or reviewed the content, and I've written the articles on the project's blog - covers all links you'll find in this post (well, except the docs.microsoft.com one).
I like starting at the top:
Sub GenerateData()
The procedure is implicitly Public; in most other languages including VB.NET, a member without an explicit access modifier is Private
- regardless of what the language defaults are, using explicit access modifiers everywhere is best practice.
Kudos for avoiding the Sub A()
or Sub Macro1()
trap and actually using a somewhat meaningful name for the procedure. "Generate data" is a bit vague, but naming a procedure that's well over 200 lines of code is never easy, because such procedures tend to do a lot of things: accurately naming smaller, more specialized procedures is much easier!
' Add headers data With wks .Range("A1:K1") = Array("Test", "Temp", "Type", "Start", "FileName", "No", "End", _ "Month", "Smart", "Errors", "ErrorCellAddress") End With
Whenever I encounter a comment like this ("below code does XYZ"), I see a missed opportunity for a procedure named DoXYZ
. In this case, something like this:
Private Sub AddColumnHeaders(ByVal sheet As Worksheet)
sheet.Range("A1:K1") = Array( _
"Test", "Temp", "Type", "Start", _
"FileName", "No", "End", "Month", _
"Smart", "Errors", "ErrorCellAddress")
End Sub
And just like that we've replaced 5 lines with one, and eliminated a comment that says what (always let the code itself say that, not comments), leaving room for a better comment that says why, if needed:
AddColumnHeaders wks
But we could go one step further, and take a Workbook
parameter instead, and return a new, worksheet with the column headers - and since the only code that needs a Workbook
is the code that's creating the worksheet, we eliminate the need for a local Workbook
variable:
Dim sheet As Worksheet
Set sheet = CreateOutputSheet(ActiveWorkbook)
And the CreateOutputSheet
function might look like this:
Private Function CreateOutputSheet(ByVal book As Workbook) As Worksheet
Dim sheet As Worksheet
Set sheet = book.Worksheets.Add(After:=book.Worksheets(book.Worksheets.Count))
AddColumnHeaders sheet
Set CreateOutputSheet = sheet
End Function
Note that the Type
argument is redundant when it's an xlWorksheet
that you're adding; it's the default.
Also note, the crystal-clear distinction between book
and sheet
; compare to the single-character difference between wks
and wkb
. Vowels exist, and should be used: don't arbitrarily strip them from identifiers, we're not in 1985 anymore, variables can, and should have meaningful, pronounceable names.
' Early Binding - Add "Microsoft Scripting Runtime" Reference Dim FSO As New Scripting.FileSystemObject ' Set FolderPath Dim FolderPath As String FolderPath = "c:\Users\Desktop\Tryout\" ' Set Folder FSO Dim Folder As Scripting.Folder Set Folder = FSO.GetFolder(FolderPath)
Note that adding a reference to the Scripting
library isn't what makes the FSO
early bound: while useful (informs of a dependency) that comment is somewhat misleading - if you declared the FSO
with As Object
, member calls against it would still be late-bound, even if the library is referenced. See late binding for more info.
Of particular note, implicit late binding, like you have here and in every single one of your Range.Find
calls:
Set a = wksData.Columns("A:A").Find(" testtest : ", LookIn:=xlValues)
Range.Columns
returns a Range
(early-bound), but doesn't take any parameters: the ("A:A")
argument list is going to Range.[_Default]
, which returns a Variant
- hence, any member call chained to it, can only be resolved at run-time. This means your Range.Find
member call isn't validated at compile-time, you wrote it blindfolded without parameter quick-info, and if you made a typo, Option Explicit
can't save you from it. That implicit late binding is pretty insiduous, and should be given much more importance and attention than this early-bound declaration.
Keep in mind that Dim
statements aren't executable, and As New
creates an auto-instantiated object that will not behave the way you normally expect objects to behave. This FSO
object is live and well for the entire duration of the procedure, and yet it's only used in one single instruction. Consider limiting its lifetime to a bare minimum. All variables should be as tightly scoped as possible, and no object needs to stick around if it's not needed.
Consider:
Dim basePath As String
basePath = Environ$("USERPROFILE") & "\Desktop\Tryout\"
Dim baseFolder As Scripting.Folder
With New Scripting.FileSystemObject
Set baseFolder = .GetFolder(basePath)
End With
The indentation, somewhat consistent up to that point (I find the offset assignment vs declaration quite off-putting and rather hard to apply consistently, but that could be just me), starts going south and feel pretty much random here. This is a definitive sign of indentation gone terribly wrong:
End If End If End If
These same-level End If
tokens show up in multiple places, and that is a problem: if I quickly glance at the code top-to-bottom, I might miss the For
loop buried in a bunch of declarations that really belong elsewhere, and I'll have to work very hard to locate the corresponding Next
token, like I just did. Proper and consistent indentation is the solution, and you can use Rubberduck's Smart Indenter port, or any other VBIDE add-in that features an indenter for that.
These variables have a rather strong smell:
Dim a As Range, b As Range, c As Range, d As Range, e As Range, f As Range, g As Range, h As Range, l As Range
Ignoring the fact that they are meaningless single-letter identifiers (and that using lowercase-L as a variable name is outright criminal), the sequence feels like it could just as well be rng1
, rng2
, rng3
, and so on - and that is what's wrong with them: these variables are saying "we're all doing the same thing", and indeed, looking at how they're used, they're literally all the same.
Every single one of these blocks:
'Find Testtest Set a = wksData.Columns("A:A").Find(" testtest : ", LookIn:=xlValues) If Not a Is Nothing Then wks.Cells(BlankRow, 1).Value = a.Value End If
Is identical, save what's being searched, and what column it's being written to: this block should be its own parameterized procedure. Whenever you find yourself selecting a block of code and hitting Ctrl+C, stop and think of how you can avoid duplicating code by introducing a small, specialized procedure.
This comment makes no sense:
'Format of the data Dim BlankRow As Long BlankRow = wks.Range("A" & wks.Rows.Count).End(xlUp).Row + 1
Working out the row to write in wouldn't be needed if your Range
happened to be a ListObject
(aka table). With the ListObject
API, you would be getting the table to dump the data into:
Dim table As ListObject
Set table = wks.ListObjects("TableName")
Dim newRow As ListRow
Set newRow = table.ListRows.Add
And done. Now all you need to do is populate the Range
of this newRow
.
Private Function PopulateIfFound(ByVal source As Range, ByVal value As String, ByVal row As ListRow, ByVal writeToColumn As Long, Optional ByVal writeAddressToNextColumn As Boolean = False) As Boolean
Dim result As Range
Set result = source.Find(value, LookIn:=xlValues)
If Not result Is Nothing Then
Dim cell As Range
Set cell = row.Range.Cells(ColumnIndex:=writeToColumn)
cell.Value = result.Value
If writeAddressToNextColumn Then
cell.Offset(ColumnOffset:=1).Value = result.Address
End If
PopulateIfFound = True
End If
End Function
And now the repetitive searches would look like this:
Dim source As Range
Set source = wksData.Range("A:A")
PopulateIfFound source, " testtest : ", newRow, 1
PopulateIfFound source, " testflyy : ", newRow, 2
PopulateIfFound source, " testflyy : ", newRow, 3
PopulateIfFound source, " Started at: ", newRow, 4
If Not PopulateIfFound(source, "SmartABC ", newRow, 9) Then
PopulateIfFound source, "smarabcd_efg revision", newRow, 9
End If
If Not PopulateIfFound(source, "ERROR: ABC", newRow, 10, True) Then
PopulateIfFound source, "ERROR: EFG", newRow, 10
End If
I note that your searches include a dangerous amount of seemingly-arbitrary whitespace; this makes the code extremely frail, because a single missing or additional space is all it takes to make any of these searches fail.
This is interesting:
' Write filename in col E,F,G wks.Cells(BlankRow, 5).Value = File.Name wks.Cells(BlankRow, 6).Value = File.Name wks.Cells(BlankRow, 7).Value = File.Name
I can see that it's writing the file name in 3 consecutive columns (what). What I'd like this comment to tell me, is why the file name needs to be in 3 places right next to each others. Strikes me as redundant/superfluous otherwise.
This shouldn't need to happen:
' Trim and tidy up Data 'Trim Testtest RowA(1) wks.Cells(BlankRow, 1).Replace What:="testtest : ", Replacement:="", LookAt:=xlPart, _ SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _ ReplaceFormat:=False 'Trim StartTime RowD(4) wks.Cells(BlankRow, 4).Replace What:=" Started at: ", Replacement:="", LookAt:=xlPart, _ SearchOrder:=xlByRows, MatchCase:=False, SearchFormat:=False, _ ReplaceFormat:=False
You're the one writing to wks.Cells(BlankRow, n)
: you shouldn't need to make another pass to clean up anything after the data was written - just clean it up as you write it. Everything that's happening from here on, up to wkbData.Close False
, is reprocessing data that was already written. If your destination were a ListObject
, you wouldn't need to worry about number formats, since the formatting would be identical for every row, and automatically carried onto any newRow
you add to it, along with the formula for any calculated column.
This is a potential issue:
Dim StartDate As Long, EndDate As Long With wks.Cells(BlankRow, 8) StartDate = DateSerial(Year(.value), Month(.value), 1) EndDate = DateSerial(Year(.value), Month(.value) + 1, 0) End With
Treat dates as Date
. They're not strings, and they're not Long
integers either. If you really want to use a numeric type for them, then use a Double
. But StartDate As AnythingButDate
is just wrong: since this is for filtering, a numeric value is more sane than a string, so kudos for that - I'd go with Dim fromDateValue As Double
and Dim toDateValue As Double
.
If ActiveSheet.FilterMode Then ActiveSheet.ShowAllData
Because ActiveSheet
is Object
, these member calls are implicitly late-bound. This is bad, first because up to that point we didn't care at all what the ActiveSheet
was (except for that part where we assume the opened workbook doesn't already have a "Control"
sheet as its one and only worksheet... this may be a bad assumption; such assumptions are typically made explicit with Debug.Assert
calls, e.g. Debug.Assert wkbData.Worksheets.Count = 1
, and execution stops if the assertion isn't true).
I'd have an explicit reference to a proper Worksheet
variable here: the code does not need to care what sheet is active, and shouldn't either.
Don't worry about performance. Worry about redundant and repetitive code. Once your code is all cleaned up, see how it runs. Identify what double-processing is being done, remove it.
As others have mentioned, the single largest bottleneck here, is I/O overhead: opening and closing workbooks is very expensive, and if that's what your code needs to do, then that's the overhead it needs to cope with. Consider implementing a progress indicator to make the long-running operation more bearable.
Maybe replacing the Range.Find
calls with some WorksheetFunction.Index
/WorksheetFunction.Match
combo might be faster, but that depends on what your data really looks like. Besides, the first thing to do is to constraint the search range to the actually meaningful cells in column A, rather than searching across the million-or-so cells in that column.
Toggling Application.Calculation
to xlCalculationManual
would stop Excel from recalculating dependents whenever you write a cell value; if you don't need to recalculate anything until you're completely done with a file, then that's a good idea - toggling calculation back to xlCalculationAutomatic
will trigger a recalc, so no need to explicitly calculate anything.
Toggling Application.EnableEvents
to False
would stop Excel from raising Workbook
and Worksheet
events every time you write to a cell; if you have event handlers for worksheet Change
events, this is a must. Otherwise, toggling it off can still make Excel work more on executing your VBA code and less on trying to keep the workbook in a consistent state.
Toggling Application.ScreenUpdating
to False
like you did helps a tiny little bit too, but since you're not constantly activating and selecting things, the effect is marginal.
Note that whenever you toggle this global Application
state, you absolutely want to handle any & all run-time errors that might happen while your procedure is running, such as to guarantee that whatever happens, whether it blows up or not, the global Application
state is exactly what it was before you ran the procedure.
-
\$\begingroup\$ Seriously! i am loss for words, thanks so much for the effort that you put in, it is my first time using private sub and functions, i have read up and researched about them, but still have no idea how to make use of them. Could you please enlighten me about it? If possible the combination of the codes provided by you so i can have a better idea how i can use/call functions and how to run private subs and regarding to why i repeatedly find and write the same strings in different columns is because sometimes just one string consists of information for column 2 or 3 thus the 3lines of
File.Name
\$\endgroup\$vbanoob– vbanoob2019年12月10日 01:59:56 +00:00Commented Dec 10, 2019 at 1:59 -
\$\begingroup\$ Code is a series of executable instructions that have such or such side effect on the document(s) it's working with. Right? Now what if code were the expression of ideas, descriptions of what's going on, and the deeper you dive into it, the finer the details get. When you write code you get to craft a language with verbs (procedures), nouns (objects, properties), even adverbs (arguments)! The key is abstraction. Higher abstraction at the top, gory details at the bottom; small, specialized procedures that do one thing, and that do it well, are more reliable and easier to maintain & debug. \$\endgroup\$Mathieu Guindon– Mathieu Guindon2019年12月10日 02:08:44 +00:00Commented Dec 10, 2019 at 2:08
-
\$\begingroup\$ Start with pseudo-code: what are the steps? Name them; each one is its own named procedure. Only one needs to be
Public
: the one that's meant to be visible from the calling code's point of view. If that's in Module1 and I'm in another module, then when I typeModule1.
the autocompletion list only includes the public members. The private ones are implementation details, at a lower abstraction level than the caller needs to care for. \$\endgroup\$Mathieu Guindon– Mathieu Guindon2019年12月10日 02:12:52 +00:00Commented Dec 10, 2019 at 2:12 -
\$\begingroup\$ sorry to bother you again but when i was trying out the code i kept getting an error like "Object required" or "Object Variable or With Block variable not set" from the line
Dim table As ListObject Set table = wks.ListObjects("TableName") Dim newRow As ListRow Set newRow = table.ListRows.Add
i have tried many other ways like creating a function for it and adding object to it but all of it can't work...could you mind helping me out abit? Thanks once again :) \$\endgroup\$vbanoob– vbanoob2019年12月11日 08:05:48 +00:00Commented Dec 11, 2019 at 8:05 -
\$\begingroup\$ Is
wks
defined? It needs to be aWorksheet
object. This error message can also indicate thatOption Explicit
is missing from the module, so the code happily compiles and runs with undefined variables. Ifwks
is undefined, then at run-time it's aVariant/Empty
and that's why making a member call against it raises an "object required" error. \$\endgroup\$Mathieu Guindon– Mathieu Guindon2019年12月11日 13:38:17 +00:00Commented Dec 11, 2019 at 13:38
Workbook
object. As a test, strip out all of the.Find
sections of your code and just open and close all the workbooks. This minimal test will give you a good idea of the best possible time to execute your code. \$\endgroup\$'Find Temp
then'Find Type
, yet the code following those comments are both searching fortestflyy
and assigning the results tob
andd
. Is this a typo when copying the code here? If not, just useb
for both because your result will be the same. \$\endgroup\$