I have implemented a piece table in Swift, based on a Javascript implementation. I don't have much experience with Swift yet, so I'm especially interested if I'm doing things "the Swift way". Any feedback is highly appreciated.
import Foundation
public class PieceTable {
private let original: String
private var buffer: String
private var bufferLength: Int
private var pieces: [Piece]
public convenience init() {
self.init(from: "")
}
public init(from original: String) {
self.original = original
self.buffer = ""
self.bufferLength = 0
self.pieces = [Piece(buffer: false, offset: 0, length: original.characters.count)]
}
public func insert(string: String, offset: Int) {
if string.isEmpty {
return
}
let addBufferOffset = bufferLength
self.buffer += string
self.bufferLength += string.characters.count
let (piece, index, offsetInPiece) = self.pieceIndexAndOffset(fromSequenceOffset: offset)
// If we are inserting at the end of the piece and at the end of the buffer, we can just increase its length
if piece.buffer && offsetInPiece == piece.length && (piece.offset + piece.length) == addBufferOffset {
self.pieces[index].length += string.characters.count
return
}
self.pieces.replaceSubrange(index...index, with: [
Piece(buffer: piece.buffer, offset: piece.offset, length: offsetInPiece),
Piece(buffer: true, offset: addBufferOffset, length: string.characters.count),
Piece(buffer: piece.buffer, offset: piece.offset + offsetInPiece, length: piece.length - offsetInPiece)
].filter({0ドル.length > 0}))
}
public func delete(offset: Int, length: Int) {
if length == 0 {
return
}
if length < 0 {
self.delete(offset: offset + length, length: -length)
return
}
let (firstPiece, firstPieceIndex, offsetInFirstPiece) = self.pieceIndexAndOffset(fromSequenceOffset: offset)
let (lastPiece, lastPieceIndex, offsetInLastPiece) = self.pieceIndexAndOffset(fromSequenceOffset: offset + length)
// If the delete spans only one piece and is at the very start or end of the piece, we can just modify it
if firstPieceIndex == lastPieceIndex {
if offsetInFirstPiece == 0 {
self.pieces[firstPieceIndex].offset += length
self.pieces[firstPieceIndex].length -= length
return
} else if offsetInLastPiece == lastPiece.length {
self.pieces[firstPieceIndex].length -= length
return
}
}
self.pieces.replaceSubrange(firstPieceIndex...lastPieceIndex, with: [
Piece(buffer: firstPiece.buffer, offset: firstPiece.offset, length: offsetInFirstPiece),
Piece(buffer: lastPiece.buffer, offset: lastPiece.offset + offsetInLastPiece, length: lastPiece.length - offsetInLastPiece)
].filter({0ドル.length > 0}))
}
public func get() -> String {
var string = ""
for piece in self.pieces {
string += self.substring(for: piece)
}
return string
}
public func get(offset: Int, length: Int) -> String {
if length < 0 {
return self.get(offset: offset + length, length: -length)
}
let (firstPiece, firstPieceIndex, offsetInFirstPiece) = self.pieceIndexAndOffset(fromSequenceOffset: offset)
let (lastPiece, lastPieceIndex, offsetInLastPiece) = self.pieceIndexAndOffset(fromSequenceOffset: offset + length)
if offsetInFirstPiece + length < firstPiece.length {
return self.substring(for: firstPiece, offset: offsetInFirstPiece, length: length)
}
var string = self.substring(for: firstPiece, offset: offsetInFirstPiece, length: firstPiece.length - offsetInFirstPiece)
for index in (firstPieceIndex + 1)..<lastPieceIndex {
string += self.substring(for: self.pieces[index])
}
string += self.substring(for: lastPiece, offset: 0, length: offsetInLastPiece)
return string
}
private func pieceIndexAndOffset(fromSequenceOffset offset: Int) -> (piece: Piece, index: Int, offset: Int) {
precondition(offset >= 0, "Offset out of bounds")
var remainingOffset = offset
for (index, piece) in self.pieces.enumerated() {
if remainingOffset <= piece.length {
return (piece, index, remainingOffset)
}
remainingOffset -= piece.length
}
precondition(false, "Offset out of bounds")
}
private func substring(for piece: Piece) -> String {
let buffer = piece.buffer ? self.buffer : self.original
let startIndex = buffer.index(buffer.startIndex, offsetBy: piece.offset)
let endIndex = buffer.index(startIndex, offsetBy: piece.length)
return buffer[startIndex..<endIndex]
}
private func substring(for piece: Piece, offset: Int, length: Int) -> String {
let buffer = piece.buffer ? self.buffer : self.original
let startIndex = buffer.index(buffer.startIndex, offsetBy: piece.offset + offset)
let endIndex = buffer.index(startIndex, offsetBy: length)
return buffer[startIndex..<endIndex]
}
}
private struct Piece {
public var buffer: Bool
public var offset: Int
public var length: Int
}
Usage example:
var buffer = PieceTable(from: "Hello, world!")
buffer.insert(string: "crazy ", offset: 7)
buffer.get() // "Hello, crazy world!"
1 Answer 1
Generally your Swift code looks very good to me, and I could not detect something totally "unswifty". There are some things which can improved or simplified.
struct
vs class
You have defined PieceTable
as class
which is OK if you intend to
pass references to a "Piece Table instances" around. In that case you can
define instances as a constant (let
):
let buffer = PieceTable(from: "Hello, world!")
buffer.insert(string: "crazy ", offset: 7)
If you don't need the reference semantics then it is preferable to define
a struct
(and mark methods as mutating
where necessary).
Access modifiers
You added public/private access modifiers, which is good. But I don't
see why the properties of Piece
are declared private
if the
type itself is public
. It should be changed to
private struct Piece {
var buffer: Bool
var offset: Int
var length: Int
}
let
vs var
You declared properties a constants where possible, which is good.
The buffer
property of Piece
is also never modified after
initialization, so let's make this
private struct Piece {
let buffer: Bool
var offset: Int
var length: Int
}
Using self
You used self
for all accesses to type properties or when invoking methods, which is not necessary in Swift.
From the documentation:
In practice, you don’t need to write self in your code very often. If you don’t explicitly write self, Swift assumes that you are referring to a property or method of the current instance whenever you use a known property or method name within a method. ...
The main exception to this rule occurs when a parameter name for an instance method has the same name as a property of that instance. In this situation, the parameter name takes precedence, and it becomes necessary to refer to the property in a more qualified way. You use the self property to distinguish between the parameter name and the property name.
There are different opinions about this issue (see e.g. When should I access properties with self in swift? on Stack Overflow). The The Official raywenderlich.com Swift Style Guide recommends:
For conciseness, avoid using self since Swift does not require it to access an object's properties or invoke its methods.
Use self only when required by the compiler (in @escaping closures, or in initializers to disambiguate properties from arguments). In other words, if it compiles without self then omit it.
So if you used self
intentionally: that's fine (since you are
consequent about that). Otherwise note that you don't have to.
Method names
These method names do not provide any information about what they do:
public func get() -> String
public func get(offset: Int, length: Int) -> String
For the first one I would suggest
public var stringValue -> String
as e.g. in NSNumber.stringValue
,
and for the second one something like
public func substring(offset: Int, length: Int) -> String
// or
public func substring(range: Range<Int>) -> String
A simplification
The get()/stringValue
method can be simplified using reduce
:
public var stringValue: String {
return pieces.reduce("") { 0ドル + substring(for: 1ドル) }
}
Possible improvements
Both the get(offset: Int, length: Int)
and the delete(offset: Int, length: Int)
method call pieceIndexAndOffset()
twice: first with offset
and then with
offset + length
. A possible improvement would be to start the second search
at the piece and offset found in the first search, instead of starting at the
very beginning again.
Useful protocols to adopt
If you adopt the CustomStringConvertible
protocol
extension PieceTable: CustomStringConvertible {
public var description: String {
return stringValue
}
}
then a piece table can be printed or used in string interpolation without explicit conversion to a string:
let buffer = PieceTable(from: "Hello, world!")
buffer.insert(string: "crazy ", offset: 7)
print("The text is: \(buffer)")
And CustomDebugStringConvertible
is useful for debugging,
here is a possible implementation:
extension PieceTable: CustomDebugStringConvertible {
public var debugDescription: String {
var s = ""
print("Original:", original, to: &s)
print("Buffer:", buffer, to: &s)
for (i, piece) in pieces.enumerated() {
print("Piece#\(i):", piece.buffer ? "add " : "orig", piece.offset, piece.length, to: &s)
}
return s
}
}
In Swift 3 you'll have to declare the properties original
, buffer
,
and pieces
as fileprivate
in order to make them accessible in
an extension in the same file. This has been fixed in Swift 4.
Example:
let buffer = PieceTable(from: "Hello, world!")
buffer.insert(string: "crazy ", offset: 7)
debugPrint(buffer)
Output:
Original: Hello, world! Buffer: crazy Piece#0: orig 0 7 Piece#1: add 0 6 Piece#2: orig 7 6
-
\$\begingroup\$ Thanks! I'll keep the PieceTable as a class, because I still think I will be passing around a reference to it, and I'll stop using
self
so often (I was convinced for some reason it was necessary). Other than that, I'll be applying most if not all of your suggestions. Thank you so much for taking out some time to teach a stranger, I really appreciate it! \$\endgroup\$T .– T .2017年08月08日 15:17:22 +00:00Commented Aug 8, 2017 at 15:17