I have this Excel/VBA code and here is what it does:
- I have 3 sheets, where first sheet has two columns that will be used (A,C).
- 2nd Sheet is just the raw data file that will be imported from external source.
- 3rd sheet is the final output.
- Column A in the first sheet is "Raw Column Headings" where the column headers from raw data table are copied, transposed, and pasted. Third column in the first sheet is the final headers that I need the raw ones to be changed to. Then, I will run a macro which will copy the final headers into the first row of second sheet and then copy/paste the entire columns with the certain headers specified in the code to the final output.
The problem is that this process takes about 20~30 seconds per file and I have so many files to be processed. Can anyone take a look at the code and tell me how it can be done faster?
Option Explicit
Private Function GetHeaders() As Collection
Dim result As New Collection
With result
.Add "Account_ID"
.Add "Claim_ID"
.Add "Account_Name"
.Add "Claim_Type"
.Add "Coverage"
.Add "Claim_Level"
.Add "Claim_Count"
.Add "File_Date"
.Add "File_Year"
.Add "Resolution_Date"
.Add "Resolution_Year"
.Add "Claim_Status"
.Add "Indemnity_Paid"
.Add "Disease_Category"
.Add "State_Filed"
.Add "First_Exposure_Date"
.Add "Last_Exposure_Date"
.Add "Claimant_Employee"
.Add "Claimant_DOB"
.Add "Claimant_Deceased"
.Add "Claimant_Name"
.Add "Claimant_DOD"
.Add "Claimant_Diagnosis_Date"
.Add "Product_Type"
.Add "Product_Line"
.Add "Company/Entity/PC"
.Add "Plaintiff_Law_Firm"
.Add "Asbestos_Type"
.Add "Evaluation_Date"
.Add "Tier"
.Add "Data_Source"
.Add "Data_Source_Category"
.Add "Jurisdiction/County"
.Add "Settlement_Demand"
.Add "Jury_Verdict"
.Add "Exposure_Site"
.Add "National_Defendant_Firm"
.Add "Local_Defendant_Firm"
.Add "Expense_Amount"
.Add "NCC_Expense_Amount"
.Add "Non_NCC_Expense_Amount"
End With
Set GetHeaders = result
End Function
Private Function FindHeaderRange(ByVal ws As Worksheet, ByVal header As String) As Range
Set FindHeaderRange = ws.Cells.Find(header, , , xlWhole)
End Function
Private Function BuildMessage(ByVal currentMessage As String, ByVal ws As Worksheet, ByVal header As String) As String
BuildMessage = currentMessage & vbLf & header & Space(1) & ws.Name
End Function
Public Sub ProjectionTemplateFormat()
On Error GoTo ExitSub
Sheets(1).Range("C2", Cells(Rows.Count, "C").End(xlUp)).Copy
Sheets(2).Range("A1").PasteSpecial transpose:=True
Range("A1").ClearOutline
Dim headers As Collection
Set headers = GetHeaders
Dim msg As String
Dim wsImport As Worksheet, wsMain As Worksheet
Set wsImport = ThisWorkbook.Sheets(2)
Set wsMain = ThisWorkbook.Sheets(3)
Application.ScreenUpdating = False
Dim header As Variant
Dim source As Range
Dim dest As Range
For Each header In headers
Set source = FindHeaderRange(wsImport, header)
If source Is Nothing Then
msg = BuildMessage(msg, wsImport, header)
Else
Set dest = FindHeaderRange(wsMain, header)
If dest Is Nothing Then
msg = BuildMessage(msg, wsMain, header)
Else
wsImport.Range(source.Offset(1), wsImport.Cells(Rows.Count, source.Column).End(xlUp)).Copy _
wsMain.Cells(Rows.Count, dest.Column).End(xlUp)(2)
End If
End If
Next
With wsMain
.Columns("A:AO").AutoFit
.Cells.ClearFormats
.Rows(1).Font.Bold = True
.Cells.Font.Name = "Georgia"
.Cells.Font.Color = RGB(0, 0, 225)
.Cells.Resize(wsMain.Rows.Count - 1).Offset(1).Interior.Color = RGB(216, 228, 188)
End With
'Apply Style
Dim ws As Worksheet
For Each ws In Worksheets
ws.Select
ActiveWindow.Zoom = 85
Next ws
Dim rng As Range
Set rng = wsMain.Cells
With rng.Borders
.LineStyle = xlDot
.Weight = xlThin
End With
Dim cell As Range
For Each cell In Range("$A2ドル:" & Range("$A2ドル").SpecialCells(xlLastCell).Address)
If Len(cell) > 0 Then cell = UCase(cell)
Next cell
ExitSub:
Application.ScreenUpdating = True
End Sub
-
\$\begingroup\$ How big is the data set? \$\endgroup\$RubberDuck– RubberDuck2015年06月04日 17:07:05 +00:00Commented Jun 4, 2015 at 17:07
3 Answers 3
Previous answer is getting a bit long, so here is the rest of the code and the flow I'd suggest:
.
Option Explicit
Public Sub mainProcess()
ProjectionTemplateFormat
End Sub
Private Function GetHeaders() As Collection
'...
End Function
Private Sub ProjectionTemplateFormat()
GetHeaders
'...
applyFormat ThisWorkbook.Worksheets(1)
End Sub
Private Sub applyFormat(ByRef sh As Worksheet)
With sh.UsedRange
.Columns.AutoFit
.ClearFormats
With .Font
.Name = "Georgia"
.Color = RGB(0, 0, 225)
End With
With .Borders
.LineStyle = xlDot
.Weight = xlThin
End With
.Interior.Color = RGB(216, 228, 188)
With .Rows(1)
.Font.Bold = True
.ColorIndex = xlAutomatic
End With
End With
Dim ws As Worksheet
For Each ws In Worksheets
ws.Zoom = 85
Next
allUpper1 sh 'or allUpper2 sh
End Sub
Private Sub allUpper1(ByRef sh As Worksheet) 'FOR loop, with VBA UCase()
Dim arr As Variant, i As Long, j As Long
If WorksheetFunction.CountA(sh.UsedRange) > 0 Then
arr = sh.UsedRange 'one interaction with the sheet
For i = 2 To UBound(arr, 1) 'each "row"
For j = 1 To UBound(arr, 2) 'each "col"
arr(i, j) = UCase(arr(i, j))
Next
Next
sh.UsedRange = arr 'second interaction with the sheet
End If
End Sub
Private Sub allUpper2(ByRef sh As Worksheet) 'No loop, with Excel UPPER()
Const FIRST_ROW As Long = 2
Dim lRow As Long
Dim lCol As Long
Dim usedRng As Range
Dim tempRng As Range
If WorksheetFunction.CountA(sh.UsedRange) > 0 Then
Set usedRng = sh.UsedRange
With usedRng
lRow = .Rows.Count
lCol = .Columns.Count
End With
'remove header row from working range
Set usedRng = usedRng.Offset(1, 0).Resize(lRow - 1, lCol)
'offset cell: 2 collumns to the right of 1st cell in used range
Set tempRng = sh.Cells(FIRST_ROW, lCol + 2)
With tempRng
'apply formula to offset cell: UPPER(A2)
.Formula = "=Upper(" & sh.Cells(FIRST_ROW, 1).Address(0, 0) & ")"
'fill down the column
.AutoFill Destination:=sh.Range(tempRng, .Offset(lRow, 0)), Type:=xlFillDefault
'fill right all rows
sh.Range(tempRng, .Offset(lRow, 0)).AutoFill Destination:=sh.Range(tempRng, .Offset(lRow, lCol)), Type:=xlFillDefault
With sh.Range(tempRng, .Offset(lRow, lCol))
usedRng.Value2 = .Value2 'copy upper case values back
.EntireColumn.Delete 'remove temp range
End With
End With
End If
End Sub
.
As a performance reference - measurement between UCase(), and UPPER() methods:
FOR loop method: 6.61 seconds
Excel formula method: 15.29 seconds
With 100,000 rows, and 26 columns
Most text cells contain "Test Cell 1", "Test Cell 2", "Test Cell 3",..., and 11 cells with numbers, 5 in the first few rows and 6 in the last rows
-
1\$\begingroup\$ This really would have been better in the other answer... :-/ Too late now: this one got accepted, and the other one has the votes. It's a mess that could have been avoided. Next time, please use just one answer if they are really about the same line of logic. \$\endgroup\$janos– janos2015年09月23日 14:00:19 +00:00Commented Sep 23, 2015 at 14:00
-
\$\begingroup\$ @janos - Noted and thanks (this was my first post on CR, and I won't make this mistake again) \$\endgroup\$paul bica– paul bica2015年09月23日 15:01:24 +00:00Commented Sep 23, 2015 at 15:01
Your code has tremendously improved since the first time I saw it - good job!
This particular line is hard to parse:
wsImport.Range(source.Offset(1), wsImport.Cells(Rows.Count, source.Column).End(xlUp)).Copy _
wsMain.Cells(Rows.Count, dest.Column).End(xlUp)(2)
Literally: it's crashing the rubberduck parser!
You could introduce a local variable here:
Dim target As Range
Set target = wsMain.Cells(Rows.Count, dest.Column).End(xlUp)
wsImport.Range(source.Offset(1), wsImport.Cells(Rows.Count, source.Column).End(xlUp)) _
.Copy target(RowIndex:=2)
Notice how the line continuation is placed so that no instruction is split; rubberduck doesn't care about line continuations, but it's much easier for the human eye to see what function calls return the arguments for which procedure if you don't split an instruction between the name of a procedure and its arguments - vertically lining up .Range
and .Copy
also make it clearer that .Copy
operates on the result of .Range
.
Dim wsImport As Worksheet, wsMain As Worksheet
Is this really buying you anything? Multiple declarations on a single line make it harder to locate declarations at a glance. Compare to:
Dim wsImport As Worksheet
Dim wsMain As Worksheet
My eye sees Dim
, my brain sees "variable declaration here" - two Dim
s, two variables. And I read the variable name at pretty much the exact same millisecond as the one I notice the Dim
statement, because I don't need to mentally scroll horizontally and locate the comma. Two variables isn't too bad, but more than that would be problematic. Better avoid multiple declarations on a single line.
There's a redundant reference to wsMain
in this With
block:
With wsMain
.Columns("A:AO").AutoFit
.Cells.ClearFormats
.Rows(1).Font.Bold = True
.Cells.Font.Name = "Georgia"
.Cells.Font.Color = RGB(0, 0, 225)
.Cells.Resize(wsMain.Rows.Count - 1).Offset(1).Interior.Color = RGB(216, 228, 188)
End With
See it? Right here:
.Cells.Resize(wsMain.Rows.Count - 1).Offset(1).Interior.Color = RGB(216, 228, 188)
Could be
.Cells.Resize(.Rows.Count - 1).Offset(1).Interior.Color = RGB(216, 228, 188)
The extraneous empty lines before End With
should be removed, too.
Your indentation isn't consistent.
Dim ws As Worksheet
For Each ws In Worksheets
ws.Select
ActiveWindow.Zoom = 85
Next ws
The only thing that should be at the same indentation level as Public Sub
/End Sub
, is line labels (which the VBE forces to start at column 1 anyway).
@Mat's Mug made good suggestions (all), but in particular: redundant or implicit references:
Sheets(1).Range("C2", Cells(Rows.Count, "C")
Try to always be explicit as possible - the above line has one (semi) explicit reference Sheets(1)
and 3 implicit ones:
Rows.Count implies "ActiveSheet.Cells.Rows.Count"
Cells(Rows.Count, "C") = ActiveSheet.Cells(Rows.Count, "C")
Sheets(1)
should beThisWorkbook.Worksheets(1)
All explicit:
Sheets(1).Range("C2", Sheets(1).Cells(Sheets(1).Cells.Rows.Count, "C")
Or
With Sheets(1)
.Range("C2", .Cells(.Cells.Rows.Count, "C")
End With
As Mat pointed out: Sheets(1) is as also an implicit reference to ThisWorkbook.Worksheets(1)
So the complete reference is
With ThisWorkbook.Worksheets(1)
.Range("C2", .Cells(.Cells.Rows.Count, "C")
End With
A few suggestions that might improve performance:
In function
FindHeaderRange()
, replace theFind
method withMatch
.From
Set FindHeaderRange = ws.Cells.Find(header, , , xlWhole) ... Set source = FindHeaderRange(wsImport, header) If source Is Nothing Then...
To
FindHeaderRange = Application.Match(header, ws.UsedRange.Rows(1), 0)
FindHeaderRange()
's return type will change fromRange
toVariant
-Match
returns aDouble
if it finds something and error if not, so checking the result changes to:If Not IsError(FindHeaderRange) Then ...
(if ok,
FindHeaderRange
will be 3, 4, 99, etc because we're searching only Row 1)Match
is significantly faster thanFind
.In function
BuildMessage()
: replaceSpace(1)
with" "
.Space(1)
is a function call not worth calling for just one space.Collections (and dictionaries) are great in reducing the amount of code, and retrieving items, but are quite slow at loading data.
When using the "headers" collection you're not reducing code too much, compared to "headers" as an array; arrays are very fast in both loading and retrieving; more complex code can get convoluted and hard to follow with multidimensional arrays, but in your case a 1-dimensional array would not change the structure: For Each element in Collection = For Each Element in array.
Your code is very well organized, modularized, and easy to read - great work! However, there is a price to pay for over-modularizing.
Function are meant to isolate blocks of code for easier maintenance and to make logical distinction between different functionalities, but calls to functions can get expensive, especially when they're very frequent and have a small number of lines; in-line code is much faster to execute than the extra memory navigation to the function called.
Your 2 small (and neat) functions
FindHeaderRange()
andBuildMessage()
can be executed directly, without negatively impacting readability.
Another important performance improvement:
The most time consuming line in your For
loop:
wsImport.Range(source.Offset(1), wsImport.Cells(Rows.Count, source.Column).End(xlUp)).Copy _
wsMain.Cells(Rows.Count, dest.Column).End(xlUp)(2)
This is meant to copy and paste the used range part of the current column.
Finding current column used range can be optimized (and explicit):
With wsImport
Set fromCol = .Range( source.Offset(1), .Cells(.UsedRange.Rows.Count + 1, source.Column).End(xlUp))
End With
With wsMain
Set toCol = .Cells(.UsedRange.Rows.Count + 1, dest.Column).End(xlUp)(2)
End With
fromCol.Copy toCol
Do you need to dynamically find the used range in wsMain
in order to remove previous data?
If so, it would be faster to delete the contents of the entire column:
wsMain.UsedRange.Column(dest.Column).Value2 = vbNullString
Then, if all columns in wsImport
are the same size you can get the last used row before the For
loop (just once) and use that value inside the loop.
Edit: tested version of the code:
.
Option Explicit
Public Sub projectionTemplateFormat()
Dim t1 As Double, t2 As Double
xlSpeed True
t1 = Timer
mainProcess
t2 = Timer
xlSpeed False
MsgBox "Duration: " & t2 - t1 & " seconds"
End Sub
Private Sub mainProcess()
Const SPACE_DELIM As String = " "
Dim wsIndex As Worksheet
Dim wsImport As Worksheet 'Raw
Dim wsFinal As Worksheet
Dim importHeaderRng As Range
Dim importColRng As Range
Dim importHeaderFound As Variant
Dim importLastRow As Long
Dim finalHeaderRng As Range
Dim finalColRng As Range
Dim finalHeaderRow As Variant
Dim finalHeaderFound As Variant
Dim indexHeaderCol As Range
Dim header As Variant 'Each item in the FOR loop
Dim msg As String
Set wsIndex = aIndex 'This is the Code Name; top-left pane: aIndex (Index)
Set wsImport = bImport 'Direct reference to Code Name: bImport.Range("A1")
Set wsFinal = cFinal 'Reference using Sheets collection: ThisWorkbook.Worksheets("Final")
With wsImport.UsedRange
Set importHeaderRng = .Rows(1) 'Import - Headers
importLastRow = .Rows.Count 'Import - Total Rows
End With
With wsFinal.UsedRange
finalHeaderRow = .Rows(1) 'Final - Headers (as Array)
Set finalHeaderRng = .Rows(1) 'Final - Headers (as Range)
End With
With wsIndex.UsedRange 'Transpose col 3 from Index (without the header), as column names in Import
Set indexHeaderCol = .Columns(3).Offset(1, 0).Resize(.Rows.Count - 1, 1)
wsImport.Range(wsImport.Cells(1, 1), wsImport.Cells(1, .Rows.Count - 1)).Value2 = Application.Transpose(indexHeaderCol)
End With
If Len(bImport.Cells(2, 2).Value2) > 0 Then 'if Import sheet is not empty (excluding header row)
With Application
For Each header In finalHeaderRow 'Loop through all headers in Final
If Len(Trim(header)) > 0 Then 'If the Final heade is not empty
importHeaderFound = .Match(header, importHeaderRng, 0) 'Find header in Import sheet
If IsError(importHeaderFound) Then
msg = msg & vbLf & header & SPACE_DELIM & wsImport.Name 'Import doesn't have current header
Else
finalHeaderFound = .Match(header, finalHeaderRng, 0) 'Find header in Final sheet
With wsImport
Set importColRng = .UsedRange.Columns(importHeaderFound).Offset(1, 0).Resize(.UsedRange.Rows.Count - 1, 1)
End With
With wsFinal
Set finalColRng = .Range(.Cells(2, finalHeaderFound), .Cells(importLastRow, finalHeaderFound))
finalColRng.Value2 = vbNullString 'Delete previous values (entire column)
End With
finalColRng.Value2 = importColRng.Value2 'Copy Import data in Final columns
End If
End If
Next header
End With
allUpper wsFinal
'wsFinal.UsedRange.AutoFilter
applyFormat wsFinal.Range(wsFinal.Cells(1, 1), wsFinal.Cells(importLastRow, wsFinal.UsedRange.Columns.Count))
Dim ws As Worksheet
For Each ws In Worksheets
ws.Activate
ActiveWindow.Zoom = 85
ws.Cells(2, 2).Activate
ActiveWindow.FreezePanes = True
ws.Cells(1, 1).Activate
Next
Else
MsgBox "Missing raw data (Sheet 2 - 'Import')", vbInformation, " Missing Raw Data"
End If
End Sub
Private Sub applyFormat(ByRef rng As Range)
With rng
.ClearFormats
With .Font
.Name = "Georgia"
.Color = RGB(0, 0, 225)
End With
.Interior.Color = RGB(216, 228, 188)
With .Rows(1)
.Font.Bold = True
.Interior.ColorIndex = xlAutomatic
End With
With .Borders
.LineStyle = xlDot 'xlContinuous
.ColorIndex = xlAutomatic
.Weight = xlThin
End With
End With
refit rng
End Sub
Private Sub allUpper(ByRef sh As Worksheet)
Dim arr As Variant, i As Long, j As Long
If WorksheetFunction.CountA(sh.UsedRange) > 0 Then
arr = sh.UsedRange
For i = 2 To UBound(arr, 1) 'each "row"
For j = 1 To UBound(arr, 2) 'each "col"
arr(i, j) = UCase(RTrim(Replace(arr(i, j), Chr(10), vbNullString)))
Next
Next
sh.UsedRange = arr
End If
End Sub
Public Sub allImportTrim()
Dim arr As Variant, i As Long, j As Long, sh As Worksheet
Set sh = bImport
If WorksheetFunction.CountA(sh.UsedRange) > 0 Then
arr = sh.UsedRange
For i = 2 To UBound(arr, 1) 'each "row"
For j = 1 To UBound(arr, 2) 'each "col"
arr(i, j) = RTrim(Replace(arr(i, j), Chr(10), vbNullString))
Next
Next
sh.UsedRange = arr
End If
refit sh.UsedRange
End Sub
Private Sub refit(ByRef rng As Range)
With rng
.WrapText = False
.HorizontalAlignment = xlGeneral
.VerticalAlignment = xlCenter
.Columns.EntireColumn.AutoFit
.Rows.EntireRow.AutoFit
End With
End Sub
-
\$\begingroup\$ Nice answer. Welcome to Code Review! We'll have to disagree about the call to
Space
though. I think it offers readability over the literal. \$\endgroup\$RubberDuck– RubberDuck2015年06月06日 22:55:46 +00:00Commented Jun 6, 2015 at 22:55 -
\$\begingroup\$ Welcome to CR! That is a beautiful answer, I hope you stick around! \$\endgroup\$Mathieu Guindon– Mathieu Guindon2015年06月06日 23:03:08 +00:00Commented Jun 6, 2015 at 23:03
-
\$\begingroup\$ Ahh. I just noticed that you used A constant. Nice choice! \$\endgroup\$RubberDuck– RubberDuck2015年06月06日 23:29:45 +00:00Commented Jun 6, 2015 at 23:29
-
\$\begingroup\$ @RubberDuck and Mat - thank you both (or all 3 - don't know who else upvoted) - I've already noticed your work and it's very impressive ! \$\endgroup\$paul bica– paul bica2015年06月07日 00:12:04 +00:00Commented Jun 7, 2015 at 0:12
-
2\$\begingroup\$ I count
Sheet1(1)
as an implicit reference toThisWorkbook.Sheets
, which is probably intended to beThisWorkbook.Worksheets
. \$\endgroup\$Mathieu Guindon– Mathieu Guindon2015年06月07日 20:42:20 +00:00Commented Jun 7, 2015 at 20:42