Building upon my previous question, I have come up with a method that'll search through a buffer and return a pointer to the end of the line. The primary purpose of the function to extract rows from a csv file using the logic specified in RFC4180 RFC4180; the third parameter isQuotedSequence
can be used in cases where parsing is parallelized or started in the middle of a string that is known to be quoted.
COMMENT @
C Interface:
extern char* ReadLine(const char* bufferOffset, const char* bufferTail, long long isQuotedSequence);
Reference:
https://toolswww.ietfrfc-editor.org/htmlrfc/rfc4180
@
;-----------------------------; (CONSTANTS)
CARRIAGE_RETURN = 00000000Dh
DOUBLE_QUOTE = 000000022h
LINE_FEED = 00000000Ah
TRUE = 000000001h
;-----------------------------; (ARGUMENTS)
arg0 textequ <rcx>
arg1 textequ <rdx>
arg2 textequ <r8>
;-----------------------------; (LOCALS)
bufferOffset textequ <rax>
bufferTail textequ <r9>
currentCharacter textequ <ecx>
isQuotedSequence textequ <rdx>
nextCharacter textequ <r8d>
.code
ReadLine proc
mov bufferOffset, arg0 ; initialize [bufferOffset]
mov bufferTail, arg1 ; initialize [bufferTail]
mov isQuotedSequence, arg2 ; initialize [isQuotedSequence]
cmp bufferOffset, bufferTail ; validate that there are more characters to read
jge ReadLine@Return ; if end of file reached, jump to Return
movzx nextCharacter, byte ptr[bufferOffset] ; extract [nextCharacter] from [bufferOffset]
ReadLine@NextChar:
mov currentCharacter, nextCharacter ; shift [nextCharacter] into [currentCharacter]
add bufferOffset, 1h ; increment [bufferOffset]
cmp bufferOffset, bufferTail ; validate that there are more characters to read
jge ReadLine@Return ; if end of file reached, jump to Return
movzx nextCharacter, byte ptr[bufferOffset] ; extract [nextCharacter] from [bufferOffset]
cmp currentCharacter, DOUBLE_QUOTE ; compare [currentCharacter] to QUOTE_DOUBLE
jz ReadLine@HitDoubleQuote ; if equal, jump to HitDoubleQuote
cmp currentCharacter, CARRIAGE_RETURN ; compare [currentCharacter] to CARRIAGE_RETURN
jz ReadLine@HitCarriageReturn ; if equal, jump to HitCarriageReturn
cmp currentCharacter, LINE_FEED ; compare [currentCharacter] to LINE_FEED
jz ReadLine@HitLineFeed ; if equal, jump to HitLineFeed
jmp ReadLine@NextChar ; jump to NextChar
ReadLine@HitDoubleQuote:
xor isQuotedSequence, TRUE ; invert [isQuotedSequence] indicator
jmp ReadLine@NextChar ; jump to NextChar
ReadLine@HitCarriageReturn:
cmp nextCharacter, LINE_FEED ; compare [nextCharacter] to LINE_FEED
jz ReadLine@NextChar ; if equal, jump to NextChar
ReadLine@HitLineFeed:
cmp isQuotedSequence, TRUE ; compare [isQuotedSequence] to TRUE
jz ReadLine@NextChar ; if equal, jump to NextChar
ReadLine@Return:
ret ; return to caller
ReadLine endp
end
Building upon my previous question, I have come up with a method that'll search through a buffer and return a pointer to the end of the line. The primary purpose of the function to extract rows from a csv file using the logic specified in RFC4180; the third parameter isQuotedSequence
can be used in cases where parsing is parallelized or started in the middle of a string that is known to be quoted.
COMMENT @
C Interface:
extern char* ReadLine(const char* bufferOffset, const char* bufferTail, long long isQuotedSequence);
Reference:
https://tools.ietf.org/html/rfc4180
@
;-----------------------------; (CONSTANTS)
CARRIAGE_RETURN = 00000000Dh
DOUBLE_QUOTE = 000000022h
LINE_FEED = 00000000Ah
TRUE = 000000001h
;-----------------------------; (ARGUMENTS)
arg0 textequ <rcx>
arg1 textequ <rdx>
arg2 textequ <r8>
;-----------------------------; (LOCALS)
bufferOffset textequ <rax>
bufferTail textequ <r9>
currentCharacter textequ <ecx>
isQuotedSequence textequ <rdx>
nextCharacter textequ <r8d>
.code
ReadLine proc
mov bufferOffset, arg0 ; initialize [bufferOffset]
mov bufferTail, arg1 ; initialize [bufferTail]
mov isQuotedSequence, arg2 ; initialize [isQuotedSequence]
cmp bufferOffset, bufferTail ; validate that there are more characters to read
jge ReadLine@Return ; if end of file reached, jump to Return
movzx nextCharacter, byte ptr[bufferOffset] ; extract [nextCharacter] from [bufferOffset]
ReadLine@NextChar:
mov currentCharacter, nextCharacter ; shift [nextCharacter] into [currentCharacter]
add bufferOffset, 1h ; increment [bufferOffset]
cmp bufferOffset, bufferTail ; validate that there are more characters to read
jge ReadLine@Return ; if end of file reached, jump to Return
movzx nextCharacter, byte ptr[bufferOffset] ; extract [nextCharacter] from [bufferOffset]
cmp currentCharacter, DOUBLE_QUOTE ; compare [currentCharacter] to QUOTE_DOUBLE
jz ReadLine@HitDoubleQuote ; if equal, jump to HitDoubleQuote
cmp currentCharacter, CARRIAGE_RETURN ; compare [currentCharacter] to CARRIAGE_RETURN
jz ReadLine@HitCarriageReturn ; if equal, jump to HitCarriageReturn
cmp currentCharacter, LINE_FEED ; compare [currentCharacter] to LINE_FEED
jz ReadLine@HitLineFeed ; if equal, jump to HitLineFeed
jmp ReadLine@NextChar ; jump to NextChar
ReadLine@HitDoubleQuote:
xor isQuotedSequence, TRUE ; invert [isQuotedSequence] indicator
jmp ReadLine@NextChar ; jump to NextChar
ReadLine@HitCarriageReturn:
cmp nextCharacter, LINE_FEED ; compare [nextCharacter] to LINE_FEED
jz ReadLine@NextChar ; if equal, jump to NextChar
ReadLine@HitLineFeed:
cmp isQuotedSequence, TRUE ; compare [isQuotedSequence] to TRUE
jz ReadLine@NextChar ; if equal, jump to NextChar
ReadLine@Return:
ret ; return to caller
ReadLine endp
end
Building upon my previous question, I have come up with a method that'll search through a buffer and return a pointer to the end of the line. The primary purpose of the function to extract rows from a csv file using the logic specified in RFC4180; the third parameter isQuotedSequence
can be used in cases where parsing is parallelized or started in the middle of a string that is known to be quoted.
COMMENT @
C Interface:
extern char* ReadLine(const char* bufferOffset, const char* bufferTail, long long isQuotedSequence);
Reference:
https://www.rfc-editor.org/rfc/rfc4180
@
;-----------------------------; (CONSTANTS)
CARRIAGE_RETURN = 00000000Dh
DOUBLE_QUOTE = 000000022h
LINE_FEED = 00000000Ah
TRUE = 000000001h
;-----------------------------; (ARGUMENTS)
arg0 textequ <rcx>
arg1 textequ <rdx>
arg2 textequ <r8>
;-----------------------------; (LOCALS)
bufferOffset textequ <rax>
bufferTail textequ <r9>
currentCharacter textequ <ecx>
isQuotedSequence textequ <rdx>
nextCharacter textequ <r8d>
.code
ReadLine proc
mov bufferOffset, arg0 ; initialize [bufferOffset]
mov bufferTail, arg1 ; initialize [bufferTail]
mov isQuotedSequence, arg2 ; initialize [isQuotedSequence]
cmp bufferOffset, bufferTail ; validate that there are more characters to read
jge ReadLine@Return ; if end of file reached, jump to Return
movzx nextCharacter, byte ptr[bufferOffset] ; extract [nextCharacter] from [bufferOffset]
ReadLine@NextChar:
mov currentCharacter, nextCharacter ; shift [nextCharacter] into [currentCharacter]
add bufferOffset, 1h ; increment [bufferOffset]
cmp bufferOffset, bufferTail ; validate that there are more characters to read
jge ReadLine@Return ; if end of file reached, jump to Return
movzx nextCharacter, byte ptr[bufferOffset] ; extract [nextCharacter] from [bufferOffset]
cmp currentCharacter, DOUBLE_QUOTE ; compare [currentCharacter] to QUOTE_DOUBLE
jz ReadLine@HitDoubleQuote ; if equal, jump to HitDoubleQuote
cmp currentCharacter, CARRIAGE_RETURN ; compare [currentCharacter] to CARRIAGE_RETURN
jz ReadLine@HitCarriageReturn ; if equal, jump to HitCarriageReturn
cmp currentCharacter, LINE_FEED ; compare [currentCharacter] to LINE_FEED
jz ReadLine@HitLineFeed ; if equal, jump to HitLineFeed
jmp ReadLine@NextChar ; jump to NextChar
ReadLine@HitDoubleQuote:
xor isQuotedSequence, TRUE ; invert [isQuotedSequence] indicator
jmp ReadLine@NextChar ; jump to NextChar
ReadLine@HitCarriageReturn:
cmp nextCharacter, LINE_FEED ; compare [nextCharacter] to LINE_FEED
jz ReadLine@NextChar ; if equal, jump to NextChar
ReadLine@HitLineFeed:
cmp isQuotedSequence, TRUE ; compare [isQuotedSequence] to TRUE
jz ReadLine@NextChar ; if equal, jump to NextChar
ReadLine@Return:
ret ; return to caller
ReadLine endp
end
COMMENT @
C Interface:
extern char* ReadLine(const char* bufferStartbufferOffset, const char* bufferEndbufferTail, long long isQuotedSequence);
Reference:
https://tools.ietf.org/html/rfc4180
@
;-----------------------------; (CONSTANTS)
CARRIAGE_RETURN = 00000000Dh
DOUBLE_QUOTE = 000000022h
LINE_FEED = 00000000Ah
TRUE = 000000001h
;-----------------------------; (ARGUMENTS)
arg0 textequ <rcx>
arg1 textequ <rdx>
arg2 textequ <r8>
;-----------------------------; (LOCALS)
bufferOffset textequ <rax>
bufferTail textequ <r9>
currentCharacter textequ <ecx>
isQuotedSequence textequ <rdx>
nextCharacter textequ <r8d>
.code
ReadLine proc
mov bufferOffset, arg0 ; initialize [bufferOffset]
mov bufferTail, arg1 ; initialize [bufferTail]
mov isQuotedSequence, arg2 ; initialize [isQuotedSequence]
cmp bufferOffset, bufferTail ; validate that there are more characters to read
jge ReadLine@Return ; if end of file reached, jump to Return
movzx nextCharacter, byte ptr[bufferOffset] ; extract [nextCharacter] from [bufferOffset]
ReadLine@NextChar:
mov currentCharacter, nextCharacter ; shift [nextCharacter] into [currentCharacter]
add bufferOffset, 1h ; increment [bufferOffset]
cmp bufferOffset, bufferTail ; validate that there are more characters to read
jge ReadLine@Return ; if end of file reached, jump to Return
movzx nextCharacter, byte ptr[bufferOffset] ; extract [nextCharacter] from [bufferOffset]
cmp currentCharacter, DOUBLE_QUOTE ; compare [currentCharacter] to QUOTE_DOUBLE
jz ReadLine@HitDoubleQuote ; if equal, jump to HitDoubleQuote
cmp currentCharacter, CARRIAGE_RETURN ; compare [currentCharacter] to CARRIAGE_RETURN
jz ReadLine@HitCarriageReturn ; if equal, jump to HitCarriageReturn
cmp currentCharacter, LINE_FEED ; compare [currentCharacter] to LINE_FEED
jz ReadLine@HitLineFeed ; if equal, jump to HitLineFeed
jmp ReadLine@NextChar ; jump to NextChar
ReadLine@HitDoubleQuote:
xor isQuotedSequence, TRUE ; invert [isQuotedSequence] indicator
jmp ReadLine@NextChar ; jump to NextChar
ReadLine@HitCarriageReturn:
cmp nextCharacter, LINE_FEED ; compare [nextCharacter] to LINE_FEED
jz ReadLine@NextChar ; if equal, jump to NextChar
ReadLine@HitLineFeed:
cmp isQuotedSequence, TRUE ; compare [isQuotedSequence] to TRUE
jz ReadLine@NextChar ; if equal, jump to NextChar
ReadLine@Return:
ret ; return to caller
ReadLine endp
end
COMMENT @
C Interface:
extern char* ReadLine(const char* bufferStart, const char* bufferEnd, long long isQuotedSequence);
Reference:
https://tools.ietf.org/html/rfc4180
@
;-----------------------------; (CONSTANTS)
CARRIAGE_RETURN = 00000000Dh
DOUBLE_QUOTE = 000000022h
LINE_FEED = 00000000Ah
TRUE = 000000001h
;-----------------------------; (ARGUMENTS)
arg0 textequ <rcx>
arg1 textequ <rdx>
arg2 textequ <r8>
;-----------------------------; (LOCALS)
bufferOffset textequ <rax>
bufferTail textequ <r9>
currentCharacter textequ <ecx>
isQuotedSequence textequ <rdx>
nextCharacter textequ <r8d>
.code
ReadLine proc
mov bufferOffset, arg0 ; initialize [bufferOffset]
mov bufferTail, arg1 ; initialize [bufferTail]
mov isQuotedSequence, arg2 ; initialize [isQuotedSequence]
cmp bufferOffset, bufferTail ; validate that there are more characters to read
jge ReadLine@Return ; if end of file reached, jump to Return
movzx nextCharacter, byte ptr[bufferOffset] ; extract [nextCharacter] from [bufferOffset]
ReadLine@NextChar:
mov currentCharacter, nextCharacter ; shift [nextCharacter] into [currentCharacter]
add bufferOffset, 1h ; increment [bufferOffset]
cmp bufferOffset, bufferTail ; validate that there are more characters to read
jge ReadLine@Return ; if end of file reached, jump to Return
movzx nextCharacter, byte ptr[bufferOffset] ; extract [nextCharacter] from [bufferOffset]
cmp currentCharacter, DOUBLE_QUOTE ; compare [currentCharacter] to QUOTE_DOUBLE
jz ReadLine@HitDoubleQuote ; if equal, jump to HitDoubleQuote
cmp currentCharacter, CARRIAGE_RETURN ; compare [currentCharacter] to CARRIAGE_RETURN
jz ReadLine@HitCarriageReturn ; if equal, jump to HitCarriageReturn
cmp currentCharacter, LINE_FEED ; compare [currentCharacter] to LINE_FEED
jz ReadLine@HitLineFeed ; if equal, jump to HitLineFeed
jmp ReadLine@NextChar ; jump to NextChar
ReadLine@HitDoubleQuote:
xor isQuotedSequence, TRUE ; invert [isQuotedSequence] indicator
jmp ReadLine@NextChar ; jump to NextChar
ReadLine@HitCarriageReturn:
cmp nextCharacter, LINE_FEED ; compare [nextCharacter] to LINE_FEED
jz ReadLine@NextChar ; if equal, jump to NextChar
ReadLine@HitLineFeed:
cmp isQuotedSequence, TRUE ; compare [isQuotedSequence] to TRUE
jz ReadLine@NextChar ; if equal, jump to NextChar
ReadLine@Return:
ret ; return to caller
ReadLine endp
end
COMMENT @
C Interface:
extern char* ReadLine(const char* bufferOffset, const char* bufferTail, long long isQuotedSequence);
Reference:
https://tools.ietf.org/html/rfc4180
@
;-----------------------------; (CONSTANTS)
CARRIAGE_RETURN = 00000000Dh
DOUBLE_QUOTE = 000000022h
LINE_FEED = 00000000Ah
TRUE = 000000001h
;-----------------------------; (ARGUMENTS)
arg0 textequ <rcx>
arg1 textequ <rdx>
arg2 textequ <r8>
;-----------------------------; (LOCALS)
bufferOffset textequ <rax>
bufferTail textequ <r9>
currentCharacter textequ <ecx>
isQuotedSequence textequ <rdx>
nextCharacter textequ <r8d>
.code
ReadLine proc
mov bufferOffset, arg0 ; initialize [bufferOffset]
mov bufferTail, arg1 ; initialize [bufferTail]
mov isQuotedSequence, arg2 ; initialize [isQuotedSequence]
cmp bufferOffset, bufferTail ; validate that there are more characters to read
jge ReadLine@Return ; if end of file reached, jump to Return
movzx nextCharacter, byte ptr[bufferOffset] ; extract [nextCharacter] from [bufferOffset]
ReadLine@NextChar:
mov currentCharacter, nextCharacter ; shift [nextCharacter] into [currentCharacter]
add bufferOffset, 1h ; increment [bufferOffset]
cmp bufferOffset, bufferTail ; validate that there are more characters to read
jge ReadLine@Return ; if end of file reached, jump to Return
movzx nextCharacter, byte ptr[bufferOffset] ; extract [nextCharacter] from [bufferOffset]
cmp currentCharacter, DOUBLE_QUOTE ; compare [currentCharacter] to QUOTE_DOUBLE
jz ReadLine@HitDoubleQuote ; if equal, jump to HitDoubleQuote
cmp currentCharacter, CARRIAGE_RETURN ; compare [currentCharacter] to CARRIAGE_RETURN
jz ReadLine@HitCarriageReturn ; if equal, jump to HitCarriageReturn
cmp currentCharacter, LINE_FEED ; compare [currentCharacter] to LINE_FEED
jz ReadLine@HitLineFeed ; if equal, jump to HitLineFeed
jmp ReadLine@NextChar ; jump to NextChar
ReadLine@HitDoubleQuote:
xor isQuotedSequence, TRUE ; invert [isQuotedSequence] indicator
jmp ReadLine@NextChar ; jump to NextChar
ReadLine@HitCarriageReturn:
cmp nextCharacter, LINE_FEED ; compare [nextCharacter] to LINE_FEED
jz ReadLine@NextChar ; if equal, jump to NextChar
ReadLine@HitLineFeed:
cmp isQuotedSequence, TRUE ; compare [isQuotedSequence] to TRUE
jz ReadLine@NextChar ; if equal, jump to NextChar
ReadLine@Return:
ret ; return to caller
ReadLine endp
end