20
\$\begingroup\$

A while ago I implemented .net's string.Format() method in VB6; it works amazingly well, but I'm sure there has to be a way to make it more efficient.

I'll start by listing a simple class called EscapeSequence:

Private Type tEscapeSequence
 EscapeString As String
 ReplacementString As String
End Type
Private this As tEscapeSequence
Option Explicit
Public Property Get EscapeString() As String
 EscapeString = this.EscapeString
End Property
Friend Property Let EscapeString(value As String)
 this.EscapeString = value
End Property
Public Property Get ReplacementString() As String
 ReplacementString = this.ReplacementString
End Property
Friend Property Let ReplacementString(value As String)
 this.ReplacementString = value
End Property
'Lord I wish VB6 had constructors!
Public Function Create(escape As String, replacement As String) As EscapeSequence
 Dim result As New EscapeSequence
 result.EscapeString = escape
 result.ReplacementString = replacement
 Set Create = result
End Function

...and the actual StringFormat function - there's a global variable PADDING_CHAR involved, which I'd love to find a way to specify and de-globalize:

Public Function StringFormat(format_string As String, ParamArray values()) As String
'VB6 implementation of .net String.Format(), slightly customized.
 Dim return_value As String
 Dim values_count As Integer
 'some error-handling constants:
 Const ERR_FORMAT_EXCEPTION As Long = vbObjectError Or 9001
 Const ERR_ARGUMENT_NULL_EXCEPTION As Long = vbObjectError Or 9002
 Const ERR_ARGUMENT_EXCEPTION As Long = vbObjectError Or 9003
 Const ERR_SOURCE As String = "StringFormat"
 Const ERR_MSG_INVALID_FORMAT_STRING As String = "Invalid format string."
 Const ERR_MSG_FORMAT_EXCEPTION As String = "The number indicating an argument to format is less than zero, or greater than or equal to the length of the args array."
 Const ERR_MSG_NUMBER_ARGUMENT_EXCEPTION As String = "Invalid number argument."
 'use SPACE as default padding character
 If PADDING_CHAR = vbNullString Then PADDING_CHAR = Chr$(32)
 'figure out number of passed values:
 values_count = UBound(values) + 1
 Dim regex As RegExp
 Dim matches As MatchCollection
 Dim thisMatch As Match
 Dim thisString As String
 Dim thisFormat As String
 Dim useLiteral As Boolean 'when format_string starts with "@", escapes are not replaced (string is treated as a literal string with placeholders)
 Dim escapeHex As Boolean 'indicates whether HEX specifier "0x" is to be escaped or not
 'validate string_format:
 Set regex = New RegExp
 regex.pattern = "{({{)*(\w+)(,-?\d+)?(:[^}]+)?}(}})*"
 regex.IgnoreCase = True
 regex.Global = True
 Set matches = regex.Execute(format_string)
 'determine if values_count matches number of unique regex matches:
 Dim uniqueCount As Integer
 Dim tmpCSV As String
 For Each thisMatch In matches
 If Not StringContains(tmpCSV, thisMatch.SubMatches(1)) Then
 uniqueCount = uniqueCount + 1
 tmpCSV = tmpCSV & thisMatch.SubMatches(1) & ","
 End If
 Next
 'unique indices count must match values_count:
 If matches.Count > 0 And uniqueCount <> values_count Then
 Err.Raise ERR_FORMAT_EXCEPTION, _
 ERR_SOURCE, ERR_MSG_FORMAT_EXCEPTION
 End If
 useLiteral = StringStartsWith("@", format_string)
 If useLiteral Then format_string = Right(format_string, Len(format_string) - 1) 'remove the "@" literal specifier
 If Not useLiteral And StringContains(format_string, "\\") Then _
 format_string = Replace(format_string, "\\", Chr$(27))
 If matches.Count = 0 And format_string <> vbNullString And UBound(values) = -1 Then
 'only format_string was specified: skip to checking escape sequences:
 return_value = format_string
 GoTo checkEscapes
 ElseIf UBound(values) = -1 And matches.Count > 0 Then
 Err.Raise ERR_ARGUMENT_NULL_EXCEPTION, _
 ERR_SOURCE, ERR_MSG_FORMAT_EXCEPTION
 End If
 return_value = format_string
 'dissect format_string:
 Dim i As Integer, v As String, p As String 'i: iterator; v: value; p: placeholder
 Dim alignmentGroup As String, alignmentSpecifier As String
 Dim formattedValue As String, alignmentPadding As Integer
 'iterate regex matches (each match is a placeholder):
 For i = 0 To matches.Count - 1
 'get the placeholder specified index:
 Set thisMatch = matches(i)
 p = thisMatch.SubMatches(1)
 'if specified index (0-based) > uniqueCount (1-based), something's wrong:
 If p > uniqueCount - 1 Then
 Err.Raise ERR_FORMAT_EXCEPTION, _
 ERR_SOURCE, ERR_MSG_FORMAT_EXCEPTION
 End If
 v = values(p)
 'get the alignment specifier if it is specified:
 alignmentGroup = thisMatch.SubMatches(2)
 If alignmentGroup <> vbNullString Then _
 alignmentSpecifier = Right$(alignmentGroup, LenB(alignmentGroup) / 2 - 1)
 'get the format specifier if it is specified:
 thisString = thisMatch.value
 If StringContains(thisString, ":") Then
 Dim formatGroup As String, precisionSpecifier As Integer
 Dim formatSpecifier As String, precisionString As String
 'get the string between ":" and "}":
 formatGroup = mId$(thisString, InStr(1, thisString, ":") + 1, (LenB(thisString) / 2) - 2)
 formatGroup = Left$(formatGroup, LenB(formatGroup) / 2 - 1)
 precisionString = Right$(formatGroup, LenB(formatGroup) / 2 - 1)
 formatSpecifier = mId$(thisString, InStr(1, thisString, ":") + 1, 1)
 'applicable formatting depends on the type of the value (yes, GOTO!!):
 If TypeName(values(p)) = "Date" Then GoTo DateTimeFormatSpecifiers
 If v = vbNullString Then GoTo ApplyStringFormat
NumberFormatSpecifiers:
 If precisionString <> vbNullString And Not IsNumeric(precisionString) Then
 Err.Raise ERR_FORMAT_EXCEPTION, _
 ERR_SOURCE, ERR_MSG_INVALID_FORMAT_STRING
 End If
 If Not IsNumeric(v) Then
 Err.Raise ERR_ARGUMENT_EXCEPTION, _
 ERR_SOURCE, ERR_MSG_NUMBER_ARGUMENT_EXCEPTION
 End If
 If precisionString = vbNullString Then precisionString = 0
 Select Case formatSpecifier
 Case "C", "c" 'CURRENCY format, formats string as currency.
 'Precision specifier determines number of decimal digits.
 'This implementation ignores regional settings
 '(hard-coded group separator, decimal separator and currency sign).
 precisionSpecifier = CInt(precisionString)
 thisFormat = "#,##0.$"
 If LenB(formatGroup) > 2 And precisionSpecifier > 0 Then
 'if a non-zero precision is specified...
 thisFormat = _
 Replace$(thisFormat, ".", "." & String$(precisionString, Chr$(48)))
 Else
 thisFormat = CURRENCY_FORMAT
 End If
 Case "D", "d" 'DECIMAL format, formats string as integer number.
 'Precision specifier determines number of digits in returned string.
 precisionSpecifier = CInt(precisionString)
 thisFormat = "0"
 thisFormat = Right$(String$(precisionSpecifier, "0") & thisFormat, _
 IIf(precisionSpecifier = 0, Len(thisFormat), precisionSpecifier))
 Case "E", "e" 'EXPONENTIAL NOTATION format (aka "Scientific Notation")
 'Precision specifier determines number of decimals in returned string.
 'This implementation ignores regional settings'
 '(hard-coded decimal separator).
 precisionSpecifier = CInt(precisionString)
 thisFormat = "0.00000#" & formatSpecifier & "-#" 'defaults to 6 decimals
 If LenB(formatGroup) > 2 And precisionSpecifier > 0 Then
 'if a non-zero precision is specified...
 thisFormat = "0." & String$(precisionSpecifier - 1, Chr$(48)) & "#" & formatSpecifier & "-#"
 ElseIf LenB(formatGroup) > 2 And precisionSpecifier = 0 Then
 Err.Raise ERR_FORMAT_EXCEPTION, _
 ERR_SOURCE, ERR_MSG_INVALID_FORMAT_STRING
 End If
 Case "F", "f" 'FIXED-POINT format
 'Precision specifier determines number of decimals in returned string.
 'This implementation ignores regional settings'
 '(hard-coded decimal separator).
 precisionSpecifier = CInt(precisionString)
 thisFormat = "0"
 If LenB(formatGroup) > 2 And precisionSpecifier > 0 Then
 'if a non-zero precision is specified...
 thisFormat = (thisFormat & ".") & String$(precisionSpecifier, Chr$(48))
 Else
 'no precision specified - default to 2 decimals:
 thisFormat = "0.00"
 End If
 Case "G", "g" 'GENERAL format (recursive)
 'returns the shortest of either FIXED-POINT or SCIENTIFIC formats in case of a Double.
 'returns DECIMAL format in case of a Integer or Long.
 Dim eNotation As String, ePower As Integer, specifier As String
 precisionSpecifier = IIf(CInt(precisionString) > 0, CInt(precisionString), _
 IIf(StringContains(v, "."), Len(v) - InStr(1, v, "."), 0))
 'track character case of formatSpecifier:
 specifier = IIf(formatSpecifier = "G", "D", "d")
 If TypeName(values(p)) = "Integer" Or TypeName(values(p)) = "Long" Then
 'Integer types: use {0:D} (recursive call):
 formattedValue = StringFormat("{0:" & specifier & "}", values(p))
 ElseIf TypeName(values(p)) = "Double" Then
 'Non-integer types: use {0:E}
 specifier = IIf(formatSpecifier = "G", "E", "e")
 'evaluate the exponential notation value (recursive call):
 eNotation = StringFormat("{0:" & specifier & "}", v)
 'get the power of eNotation:
 ePower = mId$(eNotation, InStr(1, UCase$(eNotation), "E-") + 1, Len(eNotation) - InStr(1, UCase$(eNotation), "E-"))
 If ePower > -5 And Abs(ePower) < precisionSpecifier Then
 'use {0:F} when ePower > -5 and abs(ePower) < precisionSpecifier:
 'evaluate the floating-point value (recursive call):
 specifier = IIf(formatSpecifier = "G", "F", "f")
 formattedValue = StringFormat("{0:" & formatSpecifier & _
 IIf(precisionSpecifier <> 0, precisionString, vbNullString) & "}", values(p))
 Else
 'fallback to {0:E} if previous rule didn't apply:
 formattedValue = eNotation
 End If
 End If
 GoTo AlignFormattedValue 'Skip the "ApplyStringFormat" step, it's applied already.
 Case "N", "n" 'NUMERIC format, formats string as an integer or decimal number.
 'Precision specifier determines number of decimal digits.
 'This implementation ignores regional settings'
 '(hard-coded group and decimal separators).
 precisionSpecifier = CInt(precisionString)
 If LenB(formatGroup) > 2 And precisionSpecifier > 0 Then
 'if a non-zero precision is specified...
 thisFormat = "#,##0"
 thisFormat = (thisFormat & ".") & String$(precisionSpecifier, Chr$(48))
 Else 'only the "D" is specified
 thisFormat = "#,##0"
 End If
 Case "P", "p" 'PERCENT format. Formats string as a percentage.
 'Value is multiplied by 100 and displayed with a percent symbol.
 'Precision specifier determines number of decimal digits.
 thisFormat = "#,##0%"
 precisionSpecifier = CInt(precisionString)
 If LenB(formatGroup) > 2 And precisionSpecifier > 0 Then
 'if a non-zero precision is specified...
 thisFormat = "#,##0"
 thisFormat = (thisFormat & ".") & String$(precisionSpecifier, Chr$(48))
 Else 'only the "P" is specified
 thisFormat = "#,##0"
 End If
 'Append the percentage sign to the format string:
 thisFormat = thisFormat & "%"
 Case "R", "r" 'ROUND-TRIP format (a string that can round-trip to an identical number)
 'example: ?StringFormat("{0:R}", 0.0000000001141596325677345362656)
 ' ...returns "0.000000000114159632567735"
 'convert value to a Double (chop off overflow digits):
 v = CDbl(v)
 Case "X", "x" 'HEX format. Formats a string as a Hexadecimal value.
 'Precision specifier determines number of total digits.
 'Returned string is prefixed with "&H" to specify Hex.
 v = Hex(v)
 precisionSpecifier = CInt(precisionString)
 If LenB(precisionString) > 0 Then 'precision here stands for left padding
 v = Right$(String$(precisionSpecifier, "0") & v, IIf(precisionSpecifier = 0, Len(v), precisionSpecifier))
 End If
 'add C# hex specifier, apply specified casing:
 '(VB6 hex specifier would cause Format() to reverse the formatting):
 v = "0x" & IIf(formatSpecifier = "X", UCase$(v), LCase$(v))
 escapeHex = True
 Case Else
 If IsNumeric(formatSpecifier) And val(formatGroup) = 0 Then
 formatSpecifier = formatGroup
 v = Format(v, formatGroup)
 Else
 Err.Raise ERR_FORMAT_EXCEPTION, _
 ERR_SOURCE, ERR_MSG_INVALID_FORMAT_STRING
 End If
 End Select
 GoTo ApplyStringFormat
DateTimeFormatSpecifiers:
 Select Case formatSpecifier
 Case "c", "C" 'CUSTOM date/time format
 'let VB Format() parse precision specifier as is:
 thisFormat = precisionString
 Case "d" 'SHORT DATE format
 thisFormat = "ddddd"
 Case "D" 'LONG DATE format
 thisFormat = "dddddd"
 Case "f" 'FULL DATE format (short)
 thisFormat = "dddddd h:mm AM/PM"
 Case "F" 'FULL DATE format (long)
 thisFormat = "dddddd ttttt"
 Case "g"
 thisFormat = "ddddd hh:mm AM/PM"
 Case "G"
 thisFormat = "ddddd ttttt"
 Case "s" 'SORTABLE DATETIME format
 thisFormat = "yyyy-mm-ddThh:mm:ss"
 Case "t" 'SHORT TIME format
 thisFormat = "hh:mm AM/PM"
 Case "T" 'LONG TIME format
 thisFormat = "ttttt"
 Case Else
 Err.Raise ERR_FORMAT_EXCEPTION, _
 ERR_SOURCE, ERR_MSG_INVALID_FORMAT_STRING
 End Select
 GoTo ApplyStringFormat
 End If
ApplyStringFormat:
 'apply computed format string:
 If thisFormat <> vbNullString Then
 formattedValue = Format(v, thisFormat)
 Else
 formattedValue = v
 End If
AlignFormattedValue:
 'apply specified alignment specifier:
 If alignmentSpecifier <> vbNullString Then
 alignmentPadding = Abs(CInt(alignmentSpecifier))
 If CInt(alignmentSpecifier) < 0 Then
 'negative: left-justified alignment
 If alignmentPadding - Len(formattedValue) > 0 Then _
 formattedValue = formattedValue & _
 String$(alignmentPadding - Len(formattedValue), PADDING_CHAR)
 Else
 'positive: right-justified alignment
 If alignmentPadding - Len(formattedValue) > 0 Then _
 formattedValue = String$(alignmentPadding - Len(formattedValue), PADDING_CHAR) & formattedValue
 End If
 End If
 'Replace C# hex specifier with VB6 hex specifier, only if hex specifier was introduced in this function:
 If (Not useLiteral And escapeHex) And StringContains(formattedValue, "0x") Then formattedValue = Replace$(formattedValue, "0x", "&H")
 'replace all occurrences of placeholder {i} with their formatted values:
 return_value = Replace(return_value, thisString, formattedValue, Count:=1)
 'reset before reiterating:
 thisFormat = vbNullString
 Next
checkEscapes:
 'if there's no more backslashes, don't bother checking for the rest:
 If useLiteral Or Not StringContains(return_value, "\") Then GoTo normalExit
 Dim escape As New EscapeSequence
 Dim escapes As New Collection
 escapes.Add escape.Create("\n", vbNewLine), "0"
 escapes.Add escape.Create("\q", Chr$(34)), "1"
 escapes.Add escape.Create("\t", vbTab), "2"
 escapes.Add escape.Create("\a", Chr$(7)), "3"
 escapes.Add escape.Create("\b", Chr$(8)), "4"
 escapes.Add escape.Create("\v", Chr$(13)), "5"
 escapes.Add escape.Create("\f", Chr$(14)), "6"
 escapes.Add escape.Create("\r", Chr$(15)), "7"
 For i = 0 To escapes.Count - 1
 Set escape = escapes(CStr(i))
 If StringContains(return_value, escape.EscapeString) Then _
 return_value = Replace(return_value, escape.EscapeString, escape.ReplacementString)
 If Not StringContains(return_value, "\") Then _
 GoTo normalExit
 Next
 'replace "ASCII (oct)" escape sequence
 Set regex = New RegExp
 regex.pattern = "\\(\d{3})"
 regex.IgnoreCase = True
 regex.Global = True
 Set matches = regex.Execute(format_string)
 Dim char As Long
 If matches.Count <> 0 Then
 For Each thisMatch In matches
 p = thisMatch.SubMatches(0)
 '"p" contains the octal number representing the ASCII code we're after:
 p = "&O" & p 'prepend octal prefix
 char = CLng(p)
 return_value = Replace(return_value, thisMatch.value, Chr$(char))
 Next
 End If
 'if there's no more backslashes, don't bother checking for the rest:
 If Not StringContains("\", return_value) Then GoTo normalExit
 'replace "ASCII (hex)" escape sequence
 Set regex = New RegExp
 regex.pattern = "\\x(\w{2})"
 regex.IgnoreCase = True
 regex.Global = True
 Set matches = regex.Execute(format_string)
 If matches.Count <> 0 Then
 For Each thisMatch In matches
 p = thisMatch.SubMatches(0)
 '"p" contains the hex value representing the ASCII code we're after:
 p = "&H" & p 'prepend hex prefix
 char = CLng(p)
 return_value = Replace(return_value, thisMatch.value, Chr$(char))
 Next
 End If
normalExit:
 Set escapes = Nothing
 Set escape = Nothing
 If Not useLiteral And StringContains(return_value, Chr$(27)) Then _
 return_value = Replace(return_value, Chr$(27), "\")
 StringFormat = return_value
End Function

I'm looking for a way to factor out the two (quite huge) Select...Case blocks, and to improve readability in general.

Note that this uses StringContains functions, and I should add a disclaimer about the fact that most of this code is already posted as an answer of mine at StackOverflow, although I do not consider it multi-posting, since I'm actually asking for a code review here.

asked Sep 5, 2013 at 12:43
\$\endgroup\$
1
  • \$\begingroup\$ I'm thinking of moving parts of this into a class, but I want to stick to a Public Function in a simple code module, to avoid breaking existing code (and still be able to call it as any other native vb function, without instanciating anything). \$\endgroup\$ Commented Sep 6, 2013 at 0:11

3 Answers 3

16
\$\begingroup\$

Key Points

  • Each Case block implements formatting functionality for a specific format specifier.
  • Goto statements indicate the function wants to be broken down into several smaller functions.
  • Local variables such as alignmentSpecifier, alignmentPadding, precisionString, precisionSpecifier, formatSpecifier and all others, could all be eliminated if there was a concept of a "FormatSpecifier" object that held all these values.
  • Bringing in escapeHex and the C# hex specifier is a hack easily made useless by correctly encapsulating each format specifier.
  • escapes collection gets rebuilt every time the function is called, which is inefficient; valid escape sequences don't change from one call to the next.
  • ASCII (hex & octal) escapes both desperately want to be part of that collection.
  • Replacing \\ with ASCII code for Esc works nicely to get backslashes escaped.

Warning: below code is absolute overkill - no one in their right minds (I did this just for fun!) would do all this just to format strings in their VB6 or VBA application. However it shows how the monolithic function can be refactored to remove all Select...Case blocks and Goto statements.


Rewrite

Here's the refactored module-level function - it uses a Private helper As New StringHelper, declared at module level ("declarations" section):

Public Function StringFormat(format_string As String, ParamArray values()) As String
 Dim valuesArray() As Variant
 valuesArray = values
 StringFormat = helper.StringFormat(format_string, valuesArray)
End Function

Escape Sequences

The EscapeSequence class was annoyingly leaving out ASCII escapes, so I tackled this first:

Private Type tEscapeSequence
 EscapeString As String
 ReplacementString As String
 IsAsciiCharacter As Boolean
 AsciiBase As AsciiEscapeBase
End Type
Public Enum AsciiEscapeBase
 Octal
 Hexadecimal
End Enum
Private this As tEscapeSequence
Option Explicit
Public Property Get EscapeString() As String
 EscapeString = this.EscapeString
End Property
Friend Property Let EscapeString(value As String)
 this.EscapeString = value
End Property
Public Property Get ReplacementString() As String
 ReplacementString = this.ReplacementString
End Property
Friend Property Let ReplacementString(value As String)
 this.ReplacementString = value
End Property
Public Property Get IsAsciiCharacter() As Boolean
 IsAsciiCharacter = this.IsAsciiCharacter
End Property
Friend Property Let IsAsciiCharacter(value As Boolean)
 this.IsAsciiCharacter = value
End Property
Public Property Get AsciiBase() As AsciiEscapeBase
 AsciiBase = this.AsciiBase
End Property
Friend Property Let AsciiBase(value As AsciiEscapeBase)
 this.AsciiBase = value
End Property

The factory Create function was added two optional parameters; one to specify whether the escape sequence indicates an ASCII replacement escape, the other to specify the base (an enum) of the digits representing the ASCII code:

Public Function Create(escape As String, replacement As String, _
 Optional ByVal isAsciiReplacement As Boolean = False, _
 Optional ByVal base As AsciiEscapeBase = Octal) As EscapeSequence
 Dim result As New EscapeSequence
 result.EscapeString = escape
 result.ReplacementString = replacement
 result.IsAsciiCharacter = isAsciiReplacement
 result.AsciiBase = base
 Set Create = result
End Function

Added an Execute method here - all escape sequences boil down to the same thing: *replace the EscapeString with the ReplacementString, so we might as well encapsulate it here. ASCII escapes are a little bit more complex so I put them in their own method:

Public Sub Execute(ByRef string_value As String)
 If this.IsAsciiCharacter Then
 ProcessAsciiEscape string_value, this.EscapeString
 ElseIf StringContains(string_value, this.EscapeString) Then
 string_value = Replace(string_value, this.EscapeString, this.ReplacementString)
 End If
End Sub
Private Sub ProcessAsciiEscape(ByRef format_string As String, _
 ByVal regexPattern As String)
 Dim regex As RegExp, matches As MatchCollection, thisMatch As Match
 Dim prefix As String, char As Long
 If Not StringContains(format_string, "\") Then Exit Sub
 Set regex = New RegExp
 regex.pattern = regexPattern
 regex.IgnoreCase = True
 regex.Global = True
 Select Case this.AsciiBase
 Case AsciiEscapeBase.Octal
 prefix = "&O"
 Case AsciiEscapeBase.Hexadecimal
 prefix = "&H"
 End Select
 Set matches = regex.Execute(format_string) 
 For Each thisMatch In matches
 char = CLng(prefix & thisMatch.SubMatches(0))
 format_string = Replace(format_string, thisMatch.value, Chr$(char))
 Next
 Set regex = Nothing
 Set matches = Nothing
End Sub

This puts escape sequences to bed, at least for now.

Format Specifiers

Each match in the main RegEx stands for a placeholder (something potentially looking like "{0,-10:C2}"); if we can call those "format specifiers", they can probably deserve their own StringFormatSpecifier class as well - the precision specifier is normally an Integer, but in the custom date format it's also taking a String so we'll make Precision a get-only property that's set when assigning CustomSpecifier:

Private Type tSpecifier
 Index As Integer
 identifier As String
 AlignmentSpecifier As Integer
 PrecisionSpecifier As Integer
 CustomSpecifier As String
End Type
Private this As tSpecifier
Option Explicit
Public Property Get Index() As Integer
 Index = this.Index
End Property
Public Property Let Index(value As Integer)
 this.Index = value
End Property 
Public Property Get identifier() As String
 identifier = this.identifier
End Property
Public Property Let identifier(value As String)
 this.identifier = value
End Property
Public Property Get Alignment() As Integer
 Alignment = this.AlignmentSpecifier
End Property
Public Property Let Alignment(value As Integer)
 this.AlignmentSpecifier = value
End Property
Public Property Get Precision() As Integer
 Precision = this.PrecisionSpecifier
End Property
Public Property Get CustomSpecifier() As String
 CustomSpecifier = this.CustomSpecifier
End Property
Public Property Let CustomSpecifier(value As String)
 this.CustomSpecifier = value
 If IsNumeric(value) And val(value) <> 0 Then this.PrecisionSpecifier = CInt(value)
End Property

All that's missing is a way to put all the pieces back together to perform the actual replacement - either we store the original string or we implement a ToString function:

Public Function ToString() As String
 ToString = "{" & this.Index & _
 IIf(this.AlignmentSpecifier <> 0, _
 "," & this.AlignmentSpecifier, vbNullString) & _
 IIf(this.identifier <> vbNullString, _
 ":" & this.identifier, vbNullString) & _
 IIf(this.CustomSpecifier <> vbNullString, _
 this.CustomSpecifier, vbNullString) & "}"
End Function

This puts another important piece to bed.

VB6 Interface?

If we encapsulated how each format specifier works into its own class, odds are we'd get over a dozen of very similar classes. If only we were in .net, we could create an interface for this, right? Very few people know that VB6 also supports interfaces. In fact, any class can be implemented by any other.

So the IStringFormatIdentifier interface/class looks like this:

Option Explicit
'returns a format string suitable for use with VB6's native Format() function.
Public Function GetFormatString(specifier As StringFormatSpecifier) As String
End Function
'returns the formatted value.
Public Function GetFormattedValue(value As Variant, _
 specifier As StringFormatSpecifier) As String
End Function
'compares specified format identifier with implementation-defined one, 
'returns true if format is applicable.
Public Function IsIdentifierMatch(specifier As StringFormatSpecifier) As Boolean
End Function

This interface needs an implementation of it for each and every single Case block of the original code - not going to list them all here, but this is GeneralNumericStringFormatIdentifier (the most complicated one); notice that doing this has also eliminated the recursive function calls:

Implements IStringFormatIdentifier
Option Explicit
Private Function IStringFormatIdentifier_GetFormatString(specifier As StringFormatSpecifier) As String
 IStringFormatIdentifier_GetFormatString = vbNullString
End Function
Private Function IStringFormatIdentifier_GetFormattedValue(value As Variant, specifier As StringFormatSpecifier) As String
 Dim result As String
 Dim exponentialNotation As String
 Dim power As Integer
 Dim exponentialFormat As New ExponentialStringFormatIdentifier
 Dim fixedPointFormat As New FixedPointStringFormatIdentifier
 Dim decimalFormat As New DecimalStringFormatIdentifier
 Dim formatSpecifier As New StringFormatSpecifier
 formatSpecifier.Alignment = specifier.Alignment
 formatSpecifier.CustomSpecifier = specifier.CustomSpecifier
 If StringMatchesAny(TypeName(value), "Integer", "Long") Then
 formatSpecifier.identifier = IIf(specifier.identifier = "G", "D", "d")
 result = decimalFormat.GetFormattedValue(value, formatSpecifier)
 ElseIf TypeName(value) = "Double" Then
 formatSpecifier.identifier = IIf(specifier.identifier = "G", "E", "e")
 exponentialNotation = exponentialFormat.GetFormattedValue(value, formatSpecifier)
 power = exponentialFormat.GetPower(exponentialNotation)
 If power > -5 And Abs(power) < specifier.Precision Then
 formatSpecifier.identifier = IIf(specifier.identifier = "G", "F", "f")
 result = fixedPointFormat.GetFormattedValue(value, formatSpecifier)
 Else
 result = exponentialNotation
 End If
 End If
 IStringFormatIdentifier_GetFormattedValue = result
 Set exponentialFormat = Nothing
 Set fixedPointFormat = Nothing
 Set decimalFormat = Nothing
 Set formatSpecifier = Nothing
End Function
Public Function GetFormattedValue(value As Variant, specifier As StringFormatSpecifier) As String
 GetFormattedValue = IStringFormatIdentifier_GetFormattedValue(value, specifier)
End Function
Private Function IStringFormatIdentifier_IsIdentifierMatch(specifier As StringFormatSpecifier) As Boolean
 IStringFormatIdentifier_IsIdentifierMatch = UCase$(specifier.identifier) = "G"
End Function

Once every format identifier ("C", "D", "N", etc.) has its implementation of the IStringFormatIdentifier interface, we're ready to initialize everything we need, once.

The StringHelper class

Diving into the StringHelper class, the "declarations" section contains the error-handling constants, the default padding character and a private type that defines the encapsulated properties (I just do that in every class I write):

Private Const ERR_FORMAT_EXCEPTION As Long = vbObjectError Or 9001
Private Const ERR_SOURCE As String = "StringHelper"
Private Const ERR_MSG_INVALID_FORMAT_STRING As String = "Invalid format string."
Private Const ERR_MSG_FORMAT_EXCEPTION As String = "The number indicating an argument to format is less than zero, or greater than or equal to the length of the args array."
Private Type tString
 PaddingCharacter As String * 1
 EscapeSequences As New Collection
 NumericSpecifiers As New Collection
 DateTimeSpecifiers As New Collection
End Type
Private Const PADDING_CHAR As String * 1 = " "
Private this As tString
Option Base 0
Option Explicit

Method Class_Initialize is where all the one-time stuff happens - this is where escape sequences, numeric and datetime specifiers are initialized:

Private Sub Class_Initialize()
 If this.PaddingCharacter = vbNullString Then this.PaddingCharacter = PADDING_CHAR
 InitEscapeSequences
 InitNumericSpecifiers
 InitDateTimeSpecifiers
End Sub
Private Sub InitEscapeSequences()
 Dim factory As New EscapeSequence
 Set this.EscapeSequences = New Collection
 this.EscapeSequences.Add factory.Create("\n", vbNewLine)
 this.EscapeSequences.Add factory.Create("\q", Chr$(34))
 this.EscapeSequences.Add factory.Create("\t", vbTab)
 this.EscapeSequences.Add factory.Create("\a", Chr$(7))
 this.EscapeSequences.Add factory.Create("\b", Chr$(8))
 this.EscapeSequences.Add factory.Create("\v", Chr$(13))
 this.EscapeSequences.Add factory.Create("\f", Chr$(14))
 this.EscapeSequences.Add factory.Create("\r", Chr$(15))
 this.EscapeSequences.Add factory.Create("\\x(\w{2})", 0, True, Hexadecimal)
 this.EscapeSequences.Add factory.Create("\\(\d{3})", 0, True, Octal)
 Set factory = Nothing
End Sub
Private Sub InitNumericSpecifiers()
 Set this.NumericSpecifiers = New Collection
 this.NumericSpecifiers.Add New CurrencyStringFormatIdentifier
 this.NumericSpecifiers.Add New DecimalStringFormatIdentifier
 this.NumericSpecifiers.Add New GeneralNumericStringFormatIdentifier
 this.NumericSpecifiers.Add New PercentStringFormatIdentifier
 this.NumericSpecifiers.Add New FixedPointStringFormatIdentifier
 this.NumericSpecifiers.Add New ExponentialStringFormatIdentifier
 this.NumericSpecifiers.Add New HexStringFormatIdentifier
 this.NumericSpecifiers.Add New RoundTripStringFormatIdentifier
 this.NumericSpecifiers.Add New NumericPaddingStringFormatIdentifier
End Sub
Private Sub InitDateTimeSpecifiers()
 Set this.DateTimeSpecifiers = New Collection
 this.DateTimeSpecifiers.Add New CustomDateFormatIdentifier
 this.DateTimeSpecifiers.Add New FullDateLongStringFormatSpecifier
 this.DateTimeSpecifiers.Add New FullDateShortStringFormatIdentifier
 this.DateTimeSpecifiers.Add New GeneralLongDateTimeStringFormatIdentifier
 this.DateTimeSpecifiers.Add New GeneralShortDateTimeStringFormatIdentifier
 this.DateTimeSpecifiers.Add New LongDateFormatIdentifier
 this.DateTimeSpecifiers.Add New LongTimeStringFormatIdentifier
 this.DateTimeSpecifiers.Add New ShortDateFormatIdentifier
 this.DateTimeSpecifiers.Add New SortableDateTimeStringFormatIdentifier
End Sub

To make the PaddingCharacter configurable, it only needs to be exposed as a property.

So let's recap here, we have:

  • A collection of escape sequences that know how to to process themselves
  • A collection of numeric specifiers that know how to process themselves
  • A collection of date/time specifiers that know how to process themselves

All we're missing is a function that will take a format_string, validate it and return a collection of StringFormatSpecifier. The regular expression we're using to do this can also be simplified a bit - unfortunately this doesn't make it run any faster (performance-wise, this function is really where the bottleneck is):

Private Function GetFormatSpecifiers(ByVal format_string As String, valuesCount As Integer) As Collection
'executes a regular expression against format_string to extract all placeholders into a MatchCollection
 Dim regex As New RegExp
 Dim matches As MatchCollection
 Dim thisMatch As Match
 Dim result As New Collection
 Dim specifier As StringFormatSpecifier
 Dim csvIndices As String
 Dim uniqueCount As Integer
 Dim largestIndex As Integer
 regex.pattern = "\{(\w+)(,円\-?\d+)?(\:[^}]+)?\}"
 ' literal {
 ' [1] numbered captured group, any number of repetitions (Index)
 ' alphanumeric, one or more repetitions
 ' [2] numbered captured group, zero or one repetitions (AlignmentSpecifier)
 ' literal ,
 ' literal -, zero or one repetitions
 ' any digit, one or more repetitions
 ' [3] numbered captured group, zero or one repetitions (FormatSpecifier)
 ' literal :
 ' any character except '}', one or more repetitions
 ' literal }
 regex.IgnoreCase = True
 regex.Global = True
 Set matches = regex.Execute(format_string)
 For Each thisMatch In matches
 Set specifier = New StringFormatSpecifier
 specifier.Index = CInt(thisMatch.SubMatches(0))
 If Not StringContains(csvIndices, specifier.Index & ",") Then
 uniqueCount = uniqueCount + 1
 csvIndices = csvIndices & specifier.Index & ","
 End If
 If specifier.Index > largestIndex Then largestIndex = specifier.Index
 If Not thisMatch.SubMatches(1) = vbEmpty Then specifier.Alignment = CInt(Replace(CStr(thisMatch.SubMatches(1)), ",", vbNullString))
 If Not thisMatch.SubMatches(2) = vbEmpty Then
 specifier.identifier = Left(Replace(CStr(thisMatch.SubMatches(2)), ":", vbNullString), 1)
 specifier.CustomSpecifier = Replace(CStr(thisMatch.SubMatches(2)), ":" & specifier.identifier, vbNullString)
 End If
 result.Add specifier
 Next
 If matches.Count > 0 And (uniqueCount <> valuesCount) Or (largestIndex >= uniqueCount) Or valuesCount = 0) Then Err.Raise ERR_FORMAT_EXCEPTION, ERR_SOURCE, ERR_MSG_FORMAT_EXCEPTION
 Set GetFormatSpecifiers = result
 Set regex = Nothing
 Set matches = Nothing
End Function

The actual StringFormat function takes an array of Variant sent from the module function's ParamArray values() parameter; taking a ParamArray here as well would make things more complicated than they already are.

So all the function really needs to do, is loop through all specifiers in format_string, and apply the appropriate format specifier's formatting. Then apply the alignment specifier and execute escape sequences (unless format_string starts with a "@") - with everything properly encapsulated in specialized objects, this should leave a pretty readable implementation:

Public Function StringFormat(format_string As String, values() As Variant) As String
 Dim result As String
 result = format_string
 Dim specifiers As Collection
 Dim specifier As StringFormatSpecifier
 Set specifiers = GetFormatSpecifiers(result, UBound(values) + 1)
 Dim useLiteral As Boolean 
 'when format_string starts with "@", escapes are not replaced 
 '(string is treated as a literal string with placeholders)
 useLiteral = StringStartsWith("@", result)
 'remove the "@" literal specifier from the result string
 If useLiteral Then result = Right(result, Len(result) - 1) 
 'replace escaped backslashes with 'ESC' character [Chr$(27)] 
 'to optimize escape sequences evaluation:
 If Not useLiteral And StringContains(result, "\\") Then _
 result = Replace(result, "\\", Chr$(27))
 Dim formattedValue As String
 Dim alignmentPadding As Integer
 Dim identifier As IStringFormatIdentifier
 Dim identifierFound As Boolean
 For Each specifier In specifiers
 formattedValue = values(specifier.Index)
 identifierFound = (specifier.identifier = vbNullString)
 If IsNumeric(values(specifier.Index)) Then
 For Each identifier In this.NumericSpecifiers
 If identifier.IsIdentifierMatch(specifier) Then
 identifierFound = True
 formattedValue = identifier.GetFormattedValue(values(specifier.Index), specifier)
 End If
 Next
 ElseIf TypeName(values(specifier.Index)) = "Date" Then
 For Each identifier In this.DateTimeSpecifiers
 If identifier.IsIdentifierMatch(specifier) Then
 identifierFound = True
 formattedValue = identifier.GetFormattedValue(values(specifier.Index), specifier)
 End If
 Next
 End If
 If Not identifierFound Then Err.Raise ERR_FORMAT_EXCEPTION, ERR_SOURCE, ERR_MSG_INVALID_FORMAT_STRING
 alignmentPadding = Abs(specifier.Alignment)
 If specifier.Alignment < 0 Then
 'negative: left-justified alignment
 If alignmentPadding - Len(formattedValue) > 0 Then _
 formattedValue = formattedValue & String$(alignmentPadding - Len(formattedValue), this.PaddingCharacter)
 ElseIf specifier.Alignment > 0 Then
 'positive: right-justified alignment
 If alignmentPadding - Len(formattedValue) > 0 Then _
 formattedValue = String$(alignmentPadding - Len(formattedValue), this.PaddingCharacter) & formattedValue
 End If
 'replace all occurrences of placeholder {i} with their formatted values:
 result = Replace(result, specifier.ToString, formattedValue)
 Next
 Dim escape As EscapeSequence
 If Not useLiteral And StringContains(result, "\") Then
 For Each escape In this.EscapeSequences
 escape.Execute result
 Next
 End If
 If Not useLiteral And StringContains(result, Chr$(27)) Then result = Replace(result, Chr$(27), "\")
 StringFormat = result
End Function

Feel free to comment below! :)

answered Sep 9, 2013 at 16:20
\$\endgroup\$
9
  • 1
    \$\begingroup\$ I should add that performance tests calling StringFormat("{0,20}", "test string") 10,000 times in a loop, with the monolithic function average at around 2730 (2703-2797) milliseconds, while the refactored and definitely more object-oriented version averages at around 2390 (2312-2594) milliseconds, which is a subtle but welcome improvement! \$\endgroup\$ Commented Sep 9, 2013 at 18:52
  • 5
    \$\begingroup\$ And I thought Winston Ewert was the only one here with the longest answers. :-P Kudos for such a lengthy (and assumably great) self-answer contribution. It's a shame I have no idea what any of this means. \$\endgroup\$ Commented Sep 10, 2013 at 0:31
  • 2
    \$\begingroup\$ As this IS an answer, no one can properly do a full review if needed. Any criticisms and such would be limited to comments. Moreover, don't be afraid to self-answer (as long as it's still intended to be an answer). I've done so before, but haven't received votes on any of them. I think it's kinda hard to self-answer on CR, especially if you yourself can't figure out one of the best reviewed versions of your code. \$\endgroup\$ Commented Sep 10, 2013 at 0:47
  • 4
    \$\begingroup\$ In that case, there would have to be another answer to review that answer. Answerception? \$\endgroup\$ Commented Sep 10, 2013 at 1:00
  • 2
    \$\begingroup\$ @Greedo I had it all on an old computer.. which died in 2015. This is all that's left of it :-/ the original function on the SO post is self-contained though, no? \$\endgroup\$ Commented Jun 5, 2018 at 18:57
12
\$\begingroup\$

Two small things I noticed at a glance, both involve this code and it looks like it made it into your refactored version as well.

 'some error-handling constants:
 Const ERR_FORMAT_EXCEPTION As Long = vbObjectError Or 9001
 Const ERR_ARGUMENT_NULL_EXCEPTION As Long = vbObjectError Or 9002
 Const ERR_ARGUMENT_EXCEPTION As Long = vbObjectError Or 9003

Why on Earth are you bitwise Oring these? Just add them like (削除) in the documentation (削除ここまで) every other sane developer.

 'some error-handling constants:
 Const ERR_FORMAT_EXCEPTION As Long = vbObjectError + 9001
 Const ERR_ARGUMENT_NULL_EXCEPTION As Long = vbObjectError + 9002
 Const ERR_ARGUMENT_EXCEPTION As Long = vbObjectError + 9003

While you're at it, why isn't this an Enum?

'some error-handling constants:
Public Enum FormatError
 ERR_FORMAT_EXCEPTION = vbObjectError + 9001
 ERR_ARGUMENT_NULL_EXCEPTION
 ERR_ARGUMENT_EXCEPTION
End Enum 
answered Aug 15, 2014 at 20:06
\$\endgroup\$
9
  • 4
    \$\begingroup\$ Dude, where have you been all this time! \$\endgroup\$ Commented Aug 15, 2014 at 20:08
  • 10
    \$\begingroup\$ Wasting time on Stack Overflow. \$\endgroup\$ Commented Aug 15, 2014 at 20:10
  • \$\begingroup\$ "Because bitwise Or is 'correct'." I agree with the Enum though. \$\endgroup\$ Commented Jun 6, 2016 at 13:23
  • 1
    \$\begingroup\$ How is the Or correct @MarkHurd? Sure, it works and is completely equivalent in this scenario, but your average VB dev is going to stare at the screen uttering "WTF". In my mind, the correct way is the way that's easy to understand. \$\endgroup\$ Commented Jun 6, 2016 at 13:32
  • 1
    \$\begingroup\$ @RubberDuck You've caused me to go back and quickly review my VB6 code: I used Or mostly, but sometimes I did something like Const ErrBase = vbObjectError Or 1000 : Err.Raise ErrBase + 19. Anyway, the other facility codes mostly aren't relevant to VB6 code, although we should probably now use &hA0000000 instead anyway. \$\endgroup\$ Commented Jun 6, 2016 at 14:40
3
\$\begingroup\$

As mentioned in the comments, note that this is "reviewing" the code in the self-answer:

The Replace calls effectively implementing the missing RemoveFrom method isn't needed here. You could add extra brackets to the RegEx, and just extract the captures you want, but here you know the position and length of what you're skipping, so just use Mid$. I.e. this:

 If Not thisMatch.SubMatches(1) = vbEmpty Then specifier.Alignment = CInt(Replace(CStr(thisMatch.SubMatches(1)), ",", vbNullString))
 If Not thisMatch.SubMatches(2) = vbEmpty Then
 specifier.identifier = Left(Replace(CStr(thisMatch.SubMatches(2)), ":", vbNullString), 1)
 specifier.CustomSpecifier = Replace(CStr(thisMatch.SubMatches(2)), ":" & specifier.identifier, vbNullString)
 End If

becomes this:

 If Not thisMatch.SubMatches(1) = vbEmpty Then specifier.Alignment = CInt(Mid$(CStr(thisMatch.SubMatches(1)), 2))
 If Not thisMatch.SubMatches(2) = vbEmpty Then
 specifier.identifier = Mid$(CStr(thisMatch.SubMatches(2)), 2, 1)
 specifier.CustomSpecifier = Mid$(CStr(thisMatch.SubMatches(2)), 3)
 End If

BUG

To avoid counting wrong for the pedantic case of "{2}{11}{1}...", initialise csvIndices to "," and search for "," & specifier.Index & ",".

answered Jun 6, 2016 at 13:42
\$\endgroup\$

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.