I have implemented a custom split method that replicates the original functionality of the collection's split method in Swift and keep the delimiters (element separator). After a lot of trial and error I think I was able to make it behave properly:
extension Collection {
func splitAndKeep(
maxSplits: Int = .max,
omittingEmptySubsequences: Bool = true,
whereSeparator isSeparator: (Element) throws -> Bool
) rethrows -> [SubSequence] {
precondition(maxSplits >= 0, "maxSplits can not be negative")
guard !isEmpty else { return [] }
var subsequences: [SubSequence] = []
var start = startIndex
while
let idx = try self[start...].indices
.first(where: { try isSeparator(self[0ドル]) }),
subsequences.count < maxSplits {
let subsequenceRange = start..<idx
if !omittingEmptySubsequences {
subsequences.append(self[subsequenceRange])
} else if !subsequenceRange.isEmpty {
subsequences.append(self[subsequenceRange])
}
if subsequences.count == maxSplits {
start = idx
break
}
start = index(after: idx)
let separatorRange = idx..<start
if !omittingEmptySubsequences {
subsequences.append(self[separatorRange])
} else if !separatorRange.isEmpty {
subsequences.append(self[separatorRange])
}
}
let tailRange = start..<endIndex
if !omittingEmptySubsequences {
subsequences.append(self[tailRange])
} else if !tailRange.isEmpty {
subsequences.append(self[tailRange])
}
return subsequences
}
}
let string = "*(22+13)/"
let delimiters = Set<Character>("{+-/*()%}.")
let ss = string.splitAndKeep(whereSeparator: delimiters.contains) // ["*", "(", "22", "+", "13", ")", "/"]
let ss1 = string.splitAndKeep(maxSplits: 0, whereSeparator: delimiters.contains) // ["*(22+13)/"]
let ss2 = string.splitAndKeep(maxSplits: 1, whereSeparator: delimiters.contains) // ["*", "(22+13)/"]
let ss3 = string.splitAndKeep(maxSplits: 2, whereSeparator: delimiters.contains) // ["*", "(", "22+13)/"]
let ss4 = string.splitAndKeep(omittingEmptySubsequences: false, whereSeparator: delimiters.contains) // ["", "*", "", "(", "22", "+", "13", ")", "", "/", ""]
let ss5 = string.splitAndKeep(maxSplits: 0, omittingEmptySubsequences: false, whereSeparator: delimiters.contains) // ["*(22+13)/"]
let ss6 = string.splitAndKeep(maxSplits: 1, omittingEmptySubsequences: false, whereSeparator: delimiters.contains) // ["", "*(22+13)/"]
let ss7 = string.splitAndKeep(maxSplits: 2, omittingEmptySubsequences: false, whereSeparator: delimiters.contains) // ["", "*", "(22+13)/"]
2 Answers 2
The code is written clearly and works correctly, as far as I can see. Here are some suggestions, some of which are a matter of personal preference.
Some of the following suggestions are also inspired by the implementation of Collection.split()
which can be inspected in Collection.swift.
I am not a huge fan of the "double negation" in
guard !isEmpty else { return [] }
and would write that as
if isEmpty {
return []
}
If a if
or while
statement is used with a multi-line condition then it can be difficult to recognize where the condition ends and the loop body starts:
while
let idx = try self[start...].indices
.first(where: { try isSeparator(self[0ドル]) }),
subsequences.count < maxSplits {
let subsequenceRange = start..<idx
// ...
}
One option is to move the left curly brace to the next line:
while
let idx = try self[start...].indices
.first(where: { try isSeparator(self[0ドル]) }),
subsequences.count < maxSplits
{
let subsequenceRange = start..<idx
// ...
}
A sequence like
let subsequenceRange = start..<idx
if !omittingEmptySubsequences {
subsequences.append(self[subsequenceRange])
} else if !subsequenceRange.isEmpty {
subsequences.append(self[subsequenceRange])
}
occurs three times in your code: For the current slice from the last separator to the newly found separator, for the separator itself, and for the remaining slice at the end.
Moving this logic to a (nested) function removes this duplication, and makes the overall logic easier to recognize.
Combining all these suggestions, the code would look like this:
extension Collection {
func splitAndKeep(
maxSplits: Int = .max,
omittingEmptySubsequences: Bool = true,
whereSeparator isSeparator: (Element) throws -> Bool
) rethrows -> [SubSequence] {
precondition(maxSplits >= 0, "maxSplits can not be negative")
if isEmpty {
return []
}
var subsequences: [SubSequence] = []
var start = startIndex
func appendSubsequence(upTo: Index) {
if !omittingEmptySubsequences || upTo != start {
subsequences.append(self[start..<upTo])
}
}
while
var idx = try self[start...].indices
.first(where: { try isSeparator(self[0ドル]) }),
subsequences.count < maxSplits
{
// Append the slice self[start..<idx]:
appendSubsequence(upTo: idx)
start = idx
if subsequences.count == maxSplits {
break
}
// Append the separator:
formIndex(after: &idx)
appendSubsequence(upTo: idx)
start = idx
}
// Append the remaining slice:
appendSubsequence(upTo: endIndex)
return subsequences
}
}
Finally you could also provide a method taking a single separator instead of a predicate, like the corresponding Collection method:
extension Collection where Element: Equatable {
func splitAndKeep(
separator: Element,
maxSplits: Int = .max,
omittingEmptySubsequences: Bool = true
) -> [SubSequence] {
splitAndKeep(
maxSplits: maxSplits,
omittingEmptySubsequences: omittingEmptySubsequences,
whereSeparator: { 0ドル == separator }
)
}
}
-
\$\begingroup\$ I definitely agree about the
appendSubsequence
and even thought about adding it before asking this question. Thank you so much @MartinR. \$\endgroup\$Leo Dabus– Leo Dabus2022年02月20日 16:39:19 +00:00Commented Feb 20, 2022 at 16:39 -
1\$\begingroup\$ @LeoDabus: You are welcome! \$\endgroup\$Martin R– Martin R2022年02月20日 16:44:27 +00:00Commented Feb 20, 2022 at 16:44
-
\$\begingroup\$ We can also move
start = idx
to the end of theappendSubsequence
methodstart = upTo
\$\endgroup\$Leo Dabus– Leo Dabus2022年02月22日 18:40:26 +00:00Commented Feb 22, 2022 at 18:40 -
\$\begingroup\$ @LeoDabus: Yes, I had thought about that, but then the function has "too much side effect" in my opinion, and makes the main loop less self-explaining. Surely a matter of personal taste. \$\endgroup\$Martin R– Martin R2022年02月22日 19:22:30 +00:00Commented Feb 22, 2022 at 19:22
-
2\$\begingroup\$ @Rob: Yes, definitely! – Perhaps you want to post that as an answer? \$\endgroup\$Martin R– Martin R2022年02月25日 12:59:45 +00:00Commented Feb 25, 2022 at 12:59
I might suggest a few refinements on Martin’s answer (+1):
If you’re going to mutate
subsequences
inappendSubsequence
, you might as well advance thestart
index there, too;A matter of personal opinion, but I find expressions like
!range.isEmpty
to be more functionally expressive thanupTo != start
.The expression:
let idx = try self[start...].indices .first(where: { try isSeparator(self[0ドル]) })
Can be simplified to:
var idx = try self[start...].firstIndex(where: isSeparator)
Thus:
extension Collection {
func splitAndKeep(
maxSplits: Int = .max,
omittingEmptySubsequences: Bool = true,
whereSeparator isSeparator: (Element) throws -> Bool
) rethrows -> [SubSequence] {
precondition(maxSplits >= 0, "maxSplits can not be negative")
if isEmpty {
return []
}
var subsequences: [SubSequence] = []
var lowerBound = startIndex
func appendAndAdvance(with upperBound: Index) {
let range = lowerBound ..< upperBound
if !omittingEmptySubsequences || !range.isEmpty {
subsequences.append(self[range])
lowerBound = upperBound
}
}
while
var upperBound = try self[lowerBound...].firstIndex(where: isSeparator),
subsequences.count < maxSplits
{
appendAndAdvance(with: upperBound) // portion before separator
if subsequences.count == maxSplits {
break
}
formIndex(after: &upperBound)
appendAndAdvance(with: upperBound) // separator
}
appendAndAdvance(with: endIndex) // trailing portion
return subsequences
}
}
-
\$\begingroup\$ Thank you so much @Rob for your suggestions as well !! \$\endgroup\$Leo Dabus– Leo Dabus2022年02月25日 17:17:32 +00:00Commented Feb 25, 2022 at 17:17