This is a direct follow up to Parsing cells containing Line Feed Characters.
Link to sanitized xls on dropbox if test data is needed
Essentially the reports I work with aren't bad -
The issue is the way it exports to excel -
With the problem being that these cells are filled with LF characters breaking apart the data entries in the cells (usually a listing of employees in format empID / emp name
. There's really no rhyme or reason as to where it places the LFs - sometimes there are three in a row.
A lot of the time for analysis I need to use this data but first I need each person to have their own data (the reports get a lot bigger).
I implemented most changes in the answers from last time, but I've never worked with Enum
before or custom error handling. Also, this was initially developed in 2010, but this time I wrote it in 2016, if that matters.
Code
All one module. Two procedures - parse into columns and parse into rows.
Private Enum ParseError
InputRangeIsNothing = vbObjectError + 42
MultipleColumnsSelected = vbObjectError + 43
ProcessCancelled = vbObjectError + 44
NoOverwrite = vbObjectError + 45
NoData = vbObjectError + 46
End Enum
Public Sub ParseIntoColumns()
'Parse column to the right (text to columns)
On Error GoTo ErrHandler
Dim confirmOverwrite As String
confirmOverwrite = MsgBox("Do you want to overwrite all data to the right of your selection?", vbYesNo)
If confirmOverwrite = vbNo Then Err.Raise ParseError.NoOverwrite
Dim lastRow As Long
lastRow = 1
Dim workingRange As Range
Set workingRange = UserSelectRange(lastRow)
If workingRange Is Nothing Then Err.Raise ParseError.InputRangeIsNothing
Dim workingSheet As Worksheet
Set workingSheet = workingRange.Parent
Dim workingColumn As Long
workingColumn = workingRange.Column
Application.ScreenUpdating = False
Application.DisplayAlerts = False
workingRange.TextToColumns _
Destination:=workingRange, _
DataType:=xlDelimited, _
TextQualifier:=xlTextQualifierNone, _
ConsecutiveDelimiter:=True, _
Tab:=False, _
Semicolon:=False, _
Comma:=False, _
Space:=False, _
Other:=True, OtherChar:=vbLf
Application.DisplayAlerts = True
With workingSheet.UsedRange
.WrapText = False
.EntireColumn.AutoFit
End With
Application.ScreenUpdating = True
Exit Sub
ErrHandler:
Select Case Err.Number
Case ParseError.InputRangeIsNothing
MsgBox "Process cancelled: You have not selected a range.", vbExclamation
Case ParseError.MultipleColumnsSelected
MsgBox "Process cancelled: You may not select more than 1 column at a time", vbExclamation
Case ParseError.ProcessCancelled
MsgBox "Process cancelled", vbExclamation
Case ParseError.NoOverwrite
MsgBox "Process cancelled: Please alter your data structure to allow overwriting cells to the right of your selection.", vbExclamation
Case ParseError.NoData
MsgBox "Process cancelled: your selection does not have data to parse", vbExclamation
Case Else
MsgBox "An error has occured: " & Err.Number & "- " & Err.Description, vbCritical
End Select
End Sub
Public Sub ParseIntoRows()
'Parse column downward, inserting rows
On Error GoTo ErrHandler
Dim lastRow As Long
lastRow = 1
Dim workingRange As Range
Set workingRange = UserSelectRange(lastRow)
If workingRange Is Nothing Then Err.Raise ParseError.InputRangeIsNothing
Application.ScreenUpdating = False
Dim workingColumn As Long
workingColumn = workingRange.Column
Dim currentRow As Long
Dim cellToParse As Range
Dim stringParts() As String
Dim cellContent As String
Dim replacementCellContent As String
Dim numberOfParts As Long
For currentRow = lastRow To 2 Step -1
Set cellToParse = Cells(currentRow, workingColumn)
If Not IsEmpty(cellToParse) Then
cellContent = cellToParse.Value
replacementCellContent = Replace(cellContent, vbLf & vbLf, vbLf)
Do Until cellContent = replacementCellContent
cellContent = replacementCellContent
replacementCellContent = Replace(cellContent, vbLf & vbLf, vbLf)
Loop
stringParts = Split(cellContent, vbLf)
numberOfParts = UBound(stringParts) - LBound(stringParts) + 1
If numberOfParts > 1 Then CreateNewRows stringParts(), numberOfParts, cellToParse
End If
Next currentRow
Application.CutCopyMode = False
Application.ScreenUpdating = True
Exit Sub
ErrHandler:
Select Case Err.Number
Case ParseError.InputRangeIsNothing
MsgBox "Process cancelled: You have not selected a range.", vbExclamation
Case ParseError.MultipleColumnsSelected
MsgBox "Process cancelled: You may not select more than 1 column at a time", vbExclamation
Case ParseError.ProcessCancelled
MsgBox "Process cancelled", vbExclamation
Case ParseError.NoOverwrite
MsgBox "Process cancelled: Please alter your data structure to allow overwriting cells to the right of your selection.", vbExclamation
Case ParseError.NoData
MsgBox "Process cancelled: your selection does not have data to parse", vbExclamation
Case Else
MsgBox "An error has occured: " & Err.Number & "- " & Err.Description, vbCritical
End Select
End Sub
Supporting cast:
Private Sub CreateNewRows(ByRef partsOfString() As String, ByVal numberOfParts As Long, ByVal cellToParse As Range)
With cellToParse
.EntireRow.Copy
.Offset(1, 0).Resize(numberOfParts - 1, 1).EntireRow.Insert
.Resize(numberOfParts, 1).Value = Application.WorksheetFunction.Transpose(partsOfString)
End With
End Sub
Private Function UserSelectRange(ByRef lastRow As Long) As Range
Set UserSelectRange = Nothing
Dim columnToParse As Range
Set columnToParse = GetUserInputRange
If columnToParse Is Nothing Then Err.Raise ParseError.InputRangeIsNothing
If columnToParse.Columns.Count > 1 Then Err.Raise ParseError.MultipleColumnsSelected
lastRow = Cells(Rows.Count, columnToParse.Column).End(xlUp).Row
If lastRow < 2 Then Err.Raise ParseError.NoData
Dim columnLetter As String
columnLetter = ColumnNumberToLetter(columnToParse)
Dim result As String
result = MsgBox("The column you've selected to parse is column " & columnLetter, vbOKCancel)
If result = vbCancel Then Err.Raise ParseError.ProcessCancelled
Set UserSelectRange = Range(Cells(2, columnToParse.Column), Cells(lastRow, columnToParse.Column))
End Function
Private Function GetUserInputRange() As Range
'This is segregated because of how excel handles cancelling a range input
Dim userAnswer As Range
On Error GoTo InputError
Set userAnswer = Application.InputBox("Please click a cell in the column to parse", "Column Parser", Type:=8)
Set GetUserInputRange = userAnswer
Exit Function
InputError:
Set GetUserInputRange = Nothing
End Function
Private Function ColumnNumberToLetter(ByVal selectedRange As Range) As String
'Convert column number to column letter representation
Dim rowBeginningPosition As Long
rowBeginningPosition = InStr(2, selectedRange.Address, "$")
Dim columnLetter As String
columnLetter = Mid(selectedRange.Address, 2, rowBeginningPosition - 2)
'Handles if the user selected an entire column
If Right(columnLetter, 1) = ":" Then
ColumnNumberToLetter = Left(columnLetter, Len(columnLetter) - 1)
Else: ColumnNumberToLetter = columnLetter
End If
End Function
3 Answers 3
General
1 - The first line in UserSelectedRange
is setting the return value to its default. At this point in the function it is already Nothing
:
Private Function UserSelectRangeO(ByRef lastRow As Long) As Range
Set UserSelectRange = Nothing '<- Does nothing
Similarly, in GetUserInputRange()
you do this if Application.InputBox
throws:
'...
Exit Function
InputError:
Set GetUserInputRange = Nothing
End Function
But if it throws, GetUserInputRange
is never set. This function can be simplified to...
Private Function GetUserInputRange() As Range
'This is segregated because of how excel handles cancelling a range input
On Error Resume Next
Set GetUserInputRange = Application.InputBox("Please click a cell in the column to parse", _
"Column Parser", Type:=8)
End Function
...and at that point I'm not sure I see why you wouldn't just inline it because you are using the return value of Nothing
to throw a different error anyway:
If columnToParse Is Nothing Then Err.Raise ParseError.InputRangeIsNothing
2 - There isn't any need for UserSelectedRange
to return lastRow
by reference. You can simply get the last row from the selected Range
itself. Since you aren't even using lastRow
in ParseIntoColumns
, it allows you to get rid of this dead code in that procedure:
Dim lastRow As Long
lastRow = 1
3 - MsgBox
returns a VbMsgBoxResult
, which is an Integer
. When you make tests of the return value, you are implicitly cast it to a String
, then comparing it to an Integer
(vbCancel
), which implicitly casts it back to an Integer
:
Dim result As String
result = MsgBox("The column you've selected to parse is column " & columnLetter, vbOKCancel)
If result = vbCancel Then Err.Raise ParseError.ProcessCancelled
If you need to store the return value, declare it as the appropriate type:
Dim result As VbMsgBoxResult
If you don't (for example if you're only testing it once), you can simply omit the variable declaration and test the return value directly:
If MsgBox("The column you've selected to parse is column " & columnLetter, _
vbOKCancel) = vbCancel Then
Err.Raise ParseError.ProcessCancelled
End If
4 - I'd put your ParseError
enumeration in its own module and make it public instead of private. That way if you have other procedures that use custom error numbers you both easily can reuse them and avoid the possibility of collisions in error numbers.
5 - Named parameters after line continuations should be indented consistently. This is incredibly difficult to read:
workingRange.TextToColumns _
Destination:=workingRange, _
DataType:=xlDelimited, _
TextQualifier:=xlTextQualifierNone, _
ConsecutiveDelimiter:=True, _
Tab:=False, _
Semicolon:=False, _
Comma:=False, _
Space:=False, _
Other:=True, OtherChar:=vbLf
6 - Consider using a regular expression to remove duplicate line feeds in ParseIntoRows
. This can also avoid the possible bug if the data contains a vbCr
. Since you immediately split the result, I'd use a function like this...
'Needs a reference to Microsoft VBScript Regular Expressions x.x
Private Function SplitLinesNoEmpties(target As String) As String()
With New RegExp
.Pattern = "[\n]+"
.MultiLine = True
.Global = True
SplitLinesNoEmpties = Split(.Replace(Replace$(target, vbCr, vbLf), vbLf), vbLf)
End With
End Function
...instead of: Do Until cellContent = replacementCellContent
Then you can simply use stringParts = SplitLinesNoEmpties(cellContent)
to get your array.
7 - Guard clauses should be in the procedure that they guard - not in the calling procedure. I'd move this code...
stringParts = Split(cellContent, vbLf)
numberOfParts = UBound(stringParts) - LBound(stringParts) + 1
If numberOfParts > 1 Then CreateNewRows stringParts(), numberOfParts, cellToParse
...to Sub CreateNewRows
:
Private Sub CreateNewRows(ByRef partsOfString() As String, ByVal cellToParse As Range)
Dim bottom As Long
Dim top As Long
bottom = LBound(partsOfString)
top = UBound(partsOfString)
If top <= bottom Then Exit Sub
With cellToParse
.EntireRow.Copy
.Offset(1, 0).Resize(top - bottom, 1).EntireRow.Insert
.Resize(numberOfParts, 1).Value = Application.WorksheetFunction.Transpose(partsOfString)
End With
End Sub
Note that this does a couple things - it avoids the need to add one to the UBound
- LBound
calculation and then just subtract it again. If you're testing to see if an array has at least 2 elements, UBound
> LBound
is sufficient (and protects from cases where LBound
and\or UBound
is negative). It also explicitly protects against the case of UBound(Split(vbNullString))
, which returns -1. This leads me to...
8 - Your guard clauses have a very subtle bug. Before you process the cell, you use this test:
Set cellToParse = Cells(currentRow, workingColumn)
If Not IsEmpty(cellToParse) Then
cellContent = cellToParse.Value
'...
IsEmpty
isn't doing what you think it is here. It doesn't test whether a cell is empty - it tests whether the Variant
passed to it is equal to vbEmpty
.
Private Sub TleBug()
Cells(1, 1).Formula = "=" & Chr$(34) & Chr$(34) ' =""
Debug.Print IsEmpty(Cells(1, 1)) 'False
Debug.Print Cells(1, 1).Value = vbNullString 'True
End Sub
If you need to test whether a cell evaluates to vbNullString
, do it explicitly:
Set cellToParse = Cells(currentRow, workingColumn)
cellContent = cellToParse.Value
If cellToParse <> vbNullString Then
'...
9 - You have another (less) subtle bug. If you use Application.InputBox
to have the user select the range to work with, you can't use the global Range
or Cells
collections - they have to be qualified. The reason is that you yield control to the user, who is free to select a cell in a different Workbook than the one that was active when the macro started.
Private Sub TleBugTwo()
Dim target As Range
'User selects a cell in a different Workbook
Set target = Application.InputBox("Select cell", "Input", Type:=8)
Dim globalRange As Range
Set globalRange = Range("A1")
Debug.Print globalRange.Worksheet Is ActiveSheet 'True
Debug.Print target.Worksheet Is globalRange.Worksheet 'False
End Sub
User Interface
1. GetUserInputRange()
doesn't display appropriate errors
If the user simply hits "OK" when the Application.InputBox
is displayed, Excel shows this error dialog:
2. The range selection interface duplicates Excel functionality
Note that this is more a matter of personal preference than anything, but if Excel already provides an interface to select a cell or range of cells, why duplicate that? I'd simply use the existing Selection
object when the macro starts. You're already prompting the user to confirm that the Range
that they selected when prompted is the one they want to work on, so why not just skip that entire process and use the Selection
object instead?
Errors
@Zak already addressed the big issue with the error handling, so I'll nitpick a little instead.
1. Duplicated code
Your error handlers in ParseIntoColumns
and ParseIntoRows
are identical, and only display the error condition to the user. I'd recommend extracting that section to it's own Sub:
Private Sub DisplayErrorMessage(Err As Object)
Select Case Err.Number
Case ParseError.InputRangeIsNothing
MsgBox "Process cancelled: You have not selected a range.", vbExclamation
Case ParseError.MultipleColumnsSelected
MsgBox "Process cancelled: You may not select more than 1 column at a time", vbExclamation
Case ParseError.ProcessCancelled
MsgBox "Process cancelled", vbExclamation
Case ParseError.NoOverwrite
MsgBox "Process cancelled: Please alter your data structure to allow overwriting cells to the right of your selection.", vbExclamation
Case ParseError.NoData
MsgBox "Process cancelled: your selection does not have data to parse", vbExclamation
Case Else
MsgBox "An error has occured: " & Err.Number & "- " & Err.Description, vbCritical
End Select
End Sub
Then you can simply do this for your error handlers:
CleanUp:
'Do stuff
Exit Sub
ErrHandler:
DisplayErrorMessage Err
Resume CleanUp
2. User cancellation is not an error condition
I'd consider this section to be an abuse of the error handler:
Dim confirmOverwrite As String
confirmOverwrite = MsgBox("Do you want to overwrite all data to the right of your selection?", vbYesNo)
If confirmOverwrite = vbNo Then Err.Raise ParseError.NoOverwrite
I'm not even sure that you need to display any sort of confirmation that the process has been cancelled. My personal expectation would be that it would simply exit after I told it not to continue:
If confirmOverwrite = vbNo Then Exit Sub
-
\$\begingroup\$ @Raystafarian - Re #2 I was referring to
ParseIntoColumns()
- lastRow isn't used in that procedure. Since you are only using it inParseIntoRows()
, you can grab thelastRow
from the returned range there with something like:lastRow = workingRange.Cells(workingRange.Parent.Rows.Count, 1).End(xlUp).Row
\$\endgroup\$Comintern– Comintern2016年08月09日 17:43:06 +00:00Commented Aug 9, 2016 at 17:43 -
\$\begingroup\$ This is a lot to go over, than you. Re: 2 - it's passed back to
parseintorows
. 3 - I had no idea I could declare that type thanks! 7: I was avoiding the call of the procedure, but maybe that call costs less than your suggestion. \$\endgroup\$Raystafarian– Raystafarian2016年08月09日 17:48:38 +00:00Commented Aug 9, 2016 at 17:48 -
\$\begingroup\$ 8 - if a cell has
LF
in it,isempty(range)
istrue
whereas setting it to a variable andisempty(var)
isfalse
. 9 - I see what you mean, how is that corrected? \$\endgroup\$Raystafarian– Raystafarian2016年08月09日 17:49:31 +00:00Commented Aug 9, 2016 at 17:49 -
\$\begingroup\$ @Raystafarian - I'm not sure I understand
Cells(1, 1) = vbLf: Debug.Print IsEmpty(Cells(1, 1))
givesFalse
. My point was thatIsEmpty()
is testing whether a given variable, cast to aVariant
, has a VARTYPE of VT_EMPTY. It isn't a reliable way of testing to see if Range.Value evaluates tovbNullString
because aRange
isn't empty if it contains a formula. \$\endgroup\$Comintern– Comintern2016年08月09日 17:55:58 +00:00Commented Aug 9, 2016 at 17:55 -
\$\begingroup\$ So if a report, for instance, spits out a (invisible) vbLF into every cell and I want to avoid processing those, how would I manage that? I think I understand you, but this is new information to me \$\endgroup\$Raystafarian– Raystafarian2016年08月09日 18:02:39 +00:00Commented Aug 9, 2016 at 18:02
Your error-handling is going to cause more errors
The relevant principle here is Single Point of Exit.
In essence, there should only ever be one place where execution of any method stops.
Your problem is here:
Application.CutCopyMode = False
Application.ScreenUpdating = True
Exit Sub
ErrHandler:
Select Case Err.Number
Case ParseError.InputRangeIsNothing
MsgBox "Process cancelled: You have not selected a range.", vbExclamation
Case ParseError.MultipleColumnsSelected
MsgBox "Process cancelled: You may not select more than 1 column at a time", vbExclamation
Case ParseError.ProcessCancelled
MsgBox "Process cancelled", vbExclamation
Case ParseError.NoOverwrite
MsgBox "Process cancelled: Please alter your data structure to allow overwriting cells to the right of your selection.", vbExclamation
Case ParseError.NoData
MsgBox "Process cancelled: your selection does not have data to parse", vbExclamation
Case Else
MsgBox "An error has occured: " & Err.Number & "- " & Err.Description, vbCritical
End Select
End Sub
If your sub runs without errors, these operations will happen:
Application.CutCopyMode = False
Application.ScreenUpdating = True
But, if you goto ErrHandler
, then you will get a messagebox, and then the Sub will exit without running those lines.
Now, here it's only CutCopyMode
that will persist. Not the end of the world. But it could easily have been EnableEvents
or Calculation
.
Personally, I prefer the following Structure for these situations:
Sub DoThing()
On Error Goto CleanFail
[Code]
[Code]
[Code]
[Code]
CleanExit:
[Clean Up]
Exit Sub
CleanFail:
[Error Handling]
[Error Handling]
Resume CleanExit
End Sub
This way, we can always guarantee that the code will exit the procedure via CleanExit
and so any clean-up code we put there will always be called.
ColumnNumberToLetter
What happens if the user selects an entire row? Your function will currently return a row number. In cases where an entire row is selected, you can't even work around it by using selectedRange.EntireColumn.Address
as Excel actually returns 1ドル:1048576ドル
?!?!
Once you handle the special case of an entire row selection, you can avoid the need to special-case an entire column selection by using InStrRev
(note the arguments are passed in a different order).
address = selectedRange.EntireColumn.Address
colBeginPosition = InStrRev(address, "$", 2) + 1
columnLetter = Mid(address, colBeginPosition)
Persistence
Your code is explicitly disabling and re-enabling properties like DisplayAlerts
and ScreenUpdating
. If your code ever forms part of a larger process, you might be resetting the properties prematurely. You should try to persist the values at the outset, and restore to those values, rather than explicitly setting them back to defaults or any specific value. Take a look at the class I posted that helps with persisting and restoring these properties.
Block Syntax and statement separators
My preference is to never use single-line If
statements, but I understand the usage in limited cases. You also have some If..Else
blocks where the If
is not single-lined, but the Else
uses a statement separator. Your code can quickly become unreadable with this syntax, I'd suggest changing to:
If Right(columnLetter, 1) = ":" Then
ColumnNumberToLetter = Left(columnLetter, Len(columnLetter) - 1)
Else
ColumnNumberToLetter = columnLetter
End If
-
\$\begingroup\$ That EntireColumn thing baffled me, so I posted a SO question stackoverflow.com/questions/38777170/… \$\endgroup\$ThunderFrame– ThunderFrame2016年08月04日 20:51:11 +00:00Commented Aug 4, 2016 at 20:51
-
\$\begingroup\$ I'm confused about the row argument, it should break on the selection containing more than one column, so a row can't be passed, can it? Re: persistence - thanks, I will take a look at that. \$\endgroup\$Raystafarian– Raystafarian2016年08月09日 17:27:19 +00:00Commented Aug 9, 2016 at 17:27
-
\$\begingroup\$ Emphasis added: Once you handle the special case of an entire row selection. i.e. I haven't handled that case (or the case of all cells, or all columns), and you'll have to do something like
If Not IsNumeric(columnLetter) Then
\$\endgroup\$ThunderFrame– ThunderFrame2016年08月10日 06:58:01 +00:00Commented Aug 10, 2016 at 6:58