Seed7 Algorithms: String

Seed7 FAQ Manual Demo Screenshots Examples Libraries Algorithms Benchmarks Index Download GitHub Build Seed7 Links
Algorithms Sorting Searching Date & Time String Float Mathematics Message digest Graphics File Puzzles Others

Algorithms String

previous up next

Replace tabs with the corresponding number of spaces

const proc: delTabs (inout string: stri, in integer: tabJump) is func
 local
 var integer: tabPos is 0;
 var integer: blankCount is 0;
 begin
 tabPos := pos(stri, "\t");
 while tabPos <> 0 do
 blankCount := tabJump - pred(tabPos) rem tabJump;
 stri := stri[ .. pred(tabPos)] &
 " " mult blankCount & stri[succ(tabPos) .. ];
 tabPos := pos(stri, "\t");
 end while;
 end func;

Replace leading spaces with the corresponding number of tabs

const proc: insertLeadingTabs (inout string: stri, in integer: tabJump) is func
 local
 var integer: blankCount is 1;
 var integer: tabCount is 0;
 begin
 while blankCount <= length(stri) and stri[blankCount] = ' ' do
 incr(blankCount);
 end while;
 decr(blankCount);
 if blankCount>= tabJump then
 tabCount := blankCount div tabJump;
 stri := "\t" mult tabCount & stri[succ(tabCount * tabJump) .. ];
 end if;
 end func;

Replace spaces with the corresponding number of tabs

const proc: insertTabs (inout string: stri, in integer: tabJump) is func
 local
 var integer: blankPos is 0;
 var integer: pos is 0;
 var integer: tabCount1 is 0;
 var integer: tabCount2 is 0;
 begin
 blankPos := rpos(stri, ' ');
 while blankPos <> 0 do
 pos := pred(blankPos);
 while pos>= 1 and stri[pos] = ' ' do
 decr(pos);
 end while;
 tabCount1 := pos div tabJump;
 tabCount2 := blankPos div tabJump;
 if blankPos - pos>= 2 and tabCount2> tabCount1 then
 stri := stri[.. pos] & "\t" mult (tabCount2 - tabCount1) &
 stri[succ(tabCount2 * tabJump) ..];
 end if;
 blankPos := rpos(stri, ' ', pos);
 end while;
 end func;

Convert an Unicode UTF-32 string to UTF-8

The "unicode.s7i" library defines toUtf8 , which converts an UTF-32 string to UTF-8. The function 'toUtf8_2' below shows how toUtf8 works:

const func string: toUtf8_2 (in string: stri) is func
 result
 var string: stri8 is "";
 local
 var char: ch is ' ';
 var integer: pos is 1;
 begin
 stri8 := "0円;" mult (6 * length(stri));
 for ch range stri do
 if ch <= '16円#7f;' then
 stri8 @:= [pos] ch;
 incr(pos);
 elsif ch <= '16円#7ff;' then
 stri8 @:= [pos ] char(16#C0 + ( ord(ch)>> 6));
 stri8 @:= [pos + 1] char(16#80 + ( ord(ch) mod 16#40));
 pos +:= 2;
 elsif ch <= '16円#ffff;' then
 stri8 @:= [pos ] char(16#E0 + ( ord(ch)>> 12));
 stri8 @:= [pos + 1] char(16#80 + ((ord(ch)>> 6) mod 16#40));
 stri8 @:= [pos + 2] char(16#80 + ( ord(ch) mod 16#40));
 pos +:= 3;
 elsif ch <= '16円#1fffff;' then
 stri8 @:= [pos ] char(16#F0 + ( ord(ch)>> 18));
 stri8 @:= [pos + 1] char(16#80 + ((ord(ch)>> 12) mod 16#40));
 stri8 @:= [pos + 2] char(16#80 + ((ord(ch)>> 6) mod 16#40));
 stri8 @:= [pos + 3] char(16#80 + ( ord(ch) mod 16#40));
 pos +:= 4;
 elsif ch <= '16円#3ffffff;' then
 stri8 @:= [pos ] char(16#F8 + ( ord(ch)>> 24));
 stri8 @:= [pos + 1] char(16#80 + ((ord(ch)>> 18) mod 16#40));
 stri8 @:= [pos + 2] char(16#80 + ((ord(ch)>> 12) mod 16#40));
 stri8 @:= [pos + 3] char(16#80 + ((ord(ch)>> 6) mod 16#40));
 stri8 @:= [pos + 4] char(16#80 + ( ord(ch) mod 16#40));
 pos +:= 5;
 else
 stri8 @:= [pos ] char(16#FC + ( ord(ch)>> 30));
 stri8 @:= [pos + 1] char(16#80 + ((ord(ch)>> 24) mod 16#40));
 stri8 @:= [pos + 2] char(16#80 + ((ord(ch)>> 18) mod 16#40));
 stri8 @:= [pos + 3] char(16#80 + ((ord(ch)>> 12) mod 16#40));
 stri8 @:= [pos + 4] char(16#80 + ((ord(ch)>> 6) mod 16#40));
 stri8 @:= [pos + 5] char(16#80 + ( ord(ch) mod 16#40));
 pos +:= 6;
 end if;
 end for;
 stri8 := stri8[.. pred(pos)];
 end func;

Convert an Unicode UTF-8 string to UTF-32

The "unicode.s7i" library defines fromUtf8 , which converts an UTF-8 string to UTF-32. The function 'fromUtf8_2' below shows how fromUtf8 works:

const func string: fromUtf8_2 (in string: stri8) is func
 result
 var string: stri is "";
 local
 var integer: length is 0;
 var integer: pos8 is 0;
 var integer: pos is 0;
 var boolean: okay is TRUE;
 begin
 length := length(stri8);
 stri := "0円;" mult length;
 pos8 := 1;
 pos := 0;
 while length> 0 do
 incr(pos);
 if ord(stri8[pos8]) <= 16#7F then
 stri @:= [pos] stri8[pos8];
 incr(pos8);
 decr(length);
 elsif ord(stri8[pos8 ])>> 5 = 16#06 and length>= 2 and
 ord(stri8[pos8 + 1])>> 6 = 16#02 then
 stri @:= [pos] char((ord(stri8[pos8 ]) mod 16#20 << 6) + (ord(stri8[pos8 + 1]) mod 16#40));
 pos8 +:= 2;
 length -:= 2;
 elsif ord(stri8[pos8 ])>> 4 = 16#0E and length>= 3 and
 ord(stri8[pos8 + 1])>> 6 = 16#02 and
 ord(stri8[pos8 + 2])>> 6 = 16#02 then
 stri @:= [pos] char((ord(stri8[pos8 ]) mod 16#10 << 12) + (ord(stri8[pos8 + 1]) mod 16#40 << 6) + (ord(stri8[pos8 + 2]) mod 16#40));
 pos8 +:= 3;
 length -:= 3;
 elsif ord(stri8[pos8 ])>> 3 = 16#1E and length>= 4 and
 ord(stri8[pos8 + 1])>> 6 = 16#02 and
 ord(stri8[pos8 + 2])>> 6 = 16#02 and
 ord(stri8[pos8 + 3])>> 6 = 16#02 then
 stri @:= [pos] char((ord(stri8[pos8 ]) mod 16#08 << 18) + (ord(stri8[pos8 + 1]) mod 16#40 << 12) + (ord(stri8[pos8 + 2]) mod 16#40 << 6) + (ord(stri8[pos8 + 3]) mod 16#40));
 pos8 +:= 4;
 length -:= 4;
 elsif ord(stri8[pos8 ])>> 2 = 16#3E and length>= 5 and
 ord(stri8[pos8 + 1])>> 6 = 16#02 and
 ord(stri8[pos8 + 2])>> 6 = 16#02 and
 ord(stri8[pos8 + 3])>> 6 = 16#02 and
 ord(stri8[pos8 + 4])>> 6 = 16#02 then
 stri @:= [pos] char((ord(stri8[pos8 ]) mod 16#04 << 24) + (ord(stri8[pos8 + 1]) mod 16#40 << 18) + (ord(stri8[pos8 + 2]) mod 16#40 << 12) + (ord(stri8[pos8 + 3]) mod 16#40 << 6) + (ord(stri8[pos8 + 4]) mod 16#40));
 pos8 +:= 5;
 length -:= 5;
 elsif ord(stri8[pos8 ])>> 2 = 16#3F and length>= 6 and
 ord(stri8[pos8 + 1])>> 6 = 16#02 and
 ord(stri8[pos8 + 2])>> 6 = 16#02 and
 ord(stri8[pos8 + 3])>> 6 = 16#02 and
 ord(stri8[pos8 + 4])>> 6 = 16#02 and
 ord(stri8[pos8 + 5])>> 6 = 16#02 then
 stri @:= [pos] char((ord(stri8[pos8 ]) mod 16#04 << 30) + (ord(stri8[pos8 + 1]) mod 16#40 << 24) + (ord(stri8[pos8 + 2]) mod 16#40 << 18) + (ord(stri8[pos8 + 3]) mod 16#40 << 12) + (ord(stri8[pos8 + 4]) mod 16#40 << 6) + (ord(stri8[pos8 + 5]) mod 16#40));
 pos8 +:= 6;
 length -:= 6;
 else
 okay := FALSE;
 length := 0;
 end if;
 end while;
 if okay then
 stri := stri[.. pos];
 else
 raise RANGE_ERROR;
 end if;
 end func;

Encode a string with the Base64 encoding

The function toBase64 is part of the "encoding.s7i" library. Base64 encodes a byte string as ASCII string. This is done by taking packs of 6-bits and translating them into a radix-64 representation. The radix-64 digits are encoded with letters, digits and the characters '+' and '/'.

const func string: toBase64 (in string: byteStri) is func
 result
 var string: base64 is "";
 local
 const string: coding is "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 var integer: index is 1;
 var integer: subIndex is 1;
 var char: ch is ' ';
 var integer: threeBytes is 0;
 var string: fourBytes is " ";
 var integer: posToAddNewline is 58;
 begin
 for index range 1 to length(byteStri) step 3 do
 threeBytes := 0;
 for subIndex range index to index + 2 do
 threeBytes <<:= 8; if subIndex <= length(byteStri) then
 ch := byteStri[subIndex];
 if ch>= '256円;' then
 raise RANGE_ERROR;
 end if;
 threeBytes +:= ord(ch);
 end if;
 end for;
 fourBytes @:= [1] coding[succ( threeBytes>> 18)];
 fourBytes @:= [2] coding[succ((threeBytes>> 12) mod 64)];
 fourBytes @:= [3] coding[succ((threeBytes>> 6) mod 64)];
 fourBytes @:= [4] coding[succ( threeBytes mod 64)];
 if index = posToAddNewline then
 base64 &:= "\n";
 posToAddNewline +:= 57;
 end if;
 base64 &:= fourBytes;
 end for;
 index := length(base64);
 if length(byteStri) rem 3 = 2 then
 base64 @:= [index] '=';
 elsif length(byteStri) rem 3 = 1 then
 base64 @:= [pred(index)] "==";
 end if;
 end func;

Decode a Base64 encoded string

The function fromBase64 is part of the "encoding.s7i" library.

const func string: fromBase64 (in string: base64) is func
 result
 var string: decoded is "";
 local
 const array integer: decode is [] ( # -1 is illegal
 62, -1, -1, -1, 63, # + /
 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, # 0 - 9
 -1, -1, -1, 0, -1, -1, -1, # =
 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, # A - M
 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, # N - Z
 -1, -1, -1, -1, -1, -1,
 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, # a - m
 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51); # n - z
 var integer: index is 1;
 var integer: subIndex is 1;
 var integer: number is 0;
 var integer: fourBytes is 0;
 var string: threeBytes is " ";
 begin
 while index <= length(base64) - 3 do
 if base64[index]>= '+' then
 fourBytes := 0;
 for subIndex range index to index + 3 do
 number := decode[ord(base64[subIndex]) - ord(pred('+'))];
 if number = -1 then
 raise RANGE_ERROR;
 end if;
 fourBytes := (fourBytes << 6) + number; end for;
 threeBytes @:= [1] char( fourBytes>> 16);
 threeBytes @:= [2] char((fourBytes>> 8) mod 256);
 threeBytes @:= [3] char( fourBytes mod 256);
 decoded &:= threeBytes;
 index +:= 4;
 elsif base64[index] = '\n' or base64[index] = '\r' then
 incr(index);
 else
 raise RANGE_ERROR;
 end if;
 end while;
 if index <> succ(length(base64)) or
 (length(base64)>= 2 and
 pos(base64[.. length(base64) - 2], '=') <> 0) then
 raise RANGE_ERROR;
 end if;
 if length(base64)>= 2 and base64[pred(length(base64)) fixLen 2] = "==" then
 decoded := decoded[.. length(decoded) - 2];
 elsif length(base64)>= 1 and base64[length(base64)] = '=' then
 decoded := decoded[.. pred(length(decoded))];
 end if;
 end func;

Encode a string with the Quoted-printable encoding

The function toQuotedPrintable is part of the "encoding.s7i" library. Quoted-printable encodes a byte string as ASCII string. This is done by encoding printable ASCII characters except '=' as themself. Other byte values are encoded with '=' followed by two hexadecimal digits representing the byte's numeric value.

const func string: toQuotedPrintable (in string: byteStri) is func
 result
 var string: quoted is "";
 local
 var integer: index is 0;
 var integer: startPos is 1;
 var integer: counter is 1;
 var char: ch is ' ';
 begin
 for index range 1 to length(byteStri) do
 ch := byteStri[index];
 if ch>= '256円;' then
 raise RANGE_ERROR;
 elsif ch = '\n' or (ch = '\r' and
 index < length(byteStri) and byteStri[succ(index)] = '\n') then
 if index> 1 then
 ch := byteStri[pred(index)];
 if ch = ' ' or ch = '\t' then
 quoted &:= byteStri[startPos .. index - 2];
 if counter>= 76 then
 quoted &:= "=\n";
 counter := 1;
 end if;
 quoted &:= "=" <& ord(byteStri[pred(index)]) RADIX 16 lpad0 2;
 counter +:= 3;
 startPos := index;
 end if;
 end if;
 counter := 1;
 elsif ch>= '127円;' or ch = '=' or (ch < ' ' and ch <> '9円;') then
 quoted &:= byteStri[startPos .. pred(index)];
 if counter>= 74 then
 quoted &:= "=\n";
 counter := 1;
 end if;
 quoted &:= "=" <& ord(ch) RADIX 16 lpad0 2;
 startPos := succ(index);
 counter +:= 3;
 elsif counter>= 76 then
 quoted &:= byteStri[startPos .. pred(index)] & "=\n";
 startPos := index;
 counter := 2;
 else
 incr(counter);
 end if;
 end for;
 quoted &:= byteStri[startPos ..];
 end func;

Decode a Quoted-printable encoded string

The function fromQuotedPrintable is part of the "encoding.s7i" library.

const func string: fromQuotedPrintable (in string: quoted) is func
 result
 var string: decoded is "";
 local
 var integer: startPos is 1;
 var integer: equalSignPos is 0;
 var string: twoChars is "";
 begin
 equalSignPos := pos(quoted, "=");
 while equalSignPos <> 0 do
 decoded &:= quoted[startPos .. pred(equalSignPos)];
 if equalSignPos < length(quoted) and
 quoted[succ(equalSignPos)] = '\n' then
 startPos := equalSignPos + 2;
 elsif equalSignPos <= length(quoted) - 2 then
 twoChars := quoted[succ(equalSignPos) fixLen 2];
 if twoChars[1] in hexdigit_char and
 twoChars[2] in hexdigit_char then
 decoded &:= char(integer(twoChars, 16));
 elsif twoChars <> "\r\n" then
 raise RANGE_ERROR;
 end if;
 startPos := equalSignPos + 3;
 else
 raise RANGE_ERROR;
 end if;
 equalSignPos := pos(quoted, "=", startPos);
 end while;
 decoded &:= quoted[startPos ..];
 end func;

Encode a string with uuencoding

The function toUuencoded is part of the "encoding.s7i" library. Uuencode encodes a byte string as ASCII string. This is done by taking packs of 6-bits and translating them into a radix-64 representation. The radix-64 digits are encoded with consecutive ASCII characters starting from ' ' (which represents 0). Every line starts with a radix-64 digit character indicating the number of data bytes encoded on that line.

const func string: toUuencoded (in string: byteStri) is func
 result
 var string: uuencoded is "";
 local
 var integer: index is 1;
 var integer: subIndex is 1;
 var char: ch is ' ';
 var integer: threeBytes is 0;
 var string: fourBytes is " ";
 var integer: posToAddNewline is 43;
 begin
 if length(byteStri) <> 0 then
 if length(byteStri) < 45 then
 uuencoded &:= char(32 + length(byteStri));
 else
 uuencoded &:= "M";
 end if;
 for index range 1 to length(byteStri) step 3 do
 threeBytes := 0;
 for subIndex range index to index + 2 do
 threeBytes <<:= 8; if subIndex <= length(byteStri) then
 ch := byteStri[subIndex];
 if ch>= '256円;' then
 raise RANGE_ERROR;
 end if;
 threeBytes +:= ord(ch);
 end if;
 end for;
 fourBytes @:= [1] char(32 + (threeBytes>> 18));
 fourBytes @:= [2] char(32 + (threeBytes>> 12) mod 64);
 fourBytes @:= [3] char(32 + (threeBytes>> 6) mod 64);
 fourBytes @:= [4] char(32 + threeBytes mod 64);
 uuencoded &:= fourBytes;
 if index = posToAddNewline and length(byteStri)> index + 2 then
 if length(byteStri) - index - 2 < 45 then
 uuencoded &:= "\n" <& char(32 + length(byteStri) - index - 2);
 else
 uuencoded &:= "\nM";
 end if;
 posToAddNewline +:= 45;
 end if;
 end for;
 uuencoded &:= "\n";
 end if;
 uuencoded &:= "`\n";
 end func;

Decode an uuencoded string

The function fromUuencoded is part of the "encoding.s7i" library.

const func string: fromUuencoded (in string: uuencoded) is func
 result
 var string: decoded is "";
 local
 var integer: lineLength is 1;
 var integer: index is 1;
 var integer: subIndex is 1;
 var integer: number is 0;
 var integer: fourBytes is 0;
 var string: threeBytes is " ";
 begin
 lineLength := ord(uuencoded[1]) - 32;
 while lineLength <> 0 and lineLength <> 64 do
 incr(index);
 while lineLength>= 1 do
 fourBytes := 0;
 for subIndex range index to index + 3 do
 number := ord(uuencoded[subIndex]) - 32;
 if number = 64 then
 number := 0;
 elsif number < 0 or number> 64 then
 raise RANGE_ERROR;
 end if;
 fourBytes := (fourBytes << 6) + number; end for;
 threeBytes @:= [1] char( fourBytes>> 16);
 threeBytes @:= [2] char((fourBytes>> 8) mod 256);
 threeBytes @:= [3] char( fourBytes mod 256);
 decoded &:= threeBytes[ .. lineLength];
 lineLength -:= 3;
 index +:= 4;
 end while;
 while index <= length(uuencoded) and uuencoded[index] <> '\n' do
 incr(index);
 end while;
 if index < length(uuencoded) then
 incr(index);
 lineLength := ord(uuencoded[index]) - 32;
 else
 lineLength := 0;
 end if;
 end while;
 end func;

Encode a string with percent encoding

The function toPercentEncoded is part of the "encoding.s7i" library. Percent-encoding encodes a byte string as ASCII string. This is done by encoding all characters, which are not in the set of unreserved characters (A-Z, a-z, 0-9 - _ . ~). The encoding uses a percent sign ('%') followed by two hexadecimal digits, which represent the ordinal value of the encoded character.

const func string: toPercentEncoded (in string: byteStri) is func
 result
 var string: percentEncoded is "";
 local
 const set of char: unreservedChars is alphanum_char | {'-', '_', '.', '~'};
 var integer: pos is 0;
 var integer: start is 1;
 var char: ch is ' ';
 begin
 for ch key pos range byteStri do
 if ch> '255円;' then
 raise RANGE_ERROR;
 elsif ch not in unreservedChars then
 percentEncoded &:= byteStri[start .. pred(pos)];
 percentEncoded &:= "%" <& ord(ch) RADIX 16 lpad0 2;
 start := succ(pos);
 end if;
 end for;
 percentEncoded &:= byteStri[start ..];
 end func;

Decode a percent encoded string

The function fromPercentEncoded is part of the "encoding.s7i" library. Percent-encoding encodes a byte string as ASCII string. It uses the percent sign ('%') followed by two hexadecimal digits to encode characters that otherwise would not be allowed in an URL. Allowed URL characters are encoded as themself.

const func string: fromPercentEncoded (in string: percentEncoded) is func
 result
 var string: decoded is "";
 local
 var integer: pos is 0;
 var integer: start is 1;
 begin
 pos := pos(percentEncoded, '%');
 while pos  0 do
 if pos <= length(percentEncoded) - 2 and
 percentEncoded[succ(pos)] in hexdigit_char and
 percentEncoded[pos + 2] in hexdigit_char then
 decoded &:= percentEncoded[start .. pred(pos)];
 decoded &:= char(integer(percentEncoded[succ(pos) fixLen 2], 16));
 pos +:= 2;
 start := succ(pos);
 end if;
 pos := pos(percentEncoded, '%', succ(pos));
 end while;
 decoded &:= percentEncoded[start ..];
 end func;

Encode a string with the URL encoding

The function toUrlEncoded is part of the "encoding.s7i" library. URL encoding encodes a byte string as ASCII string. This is done by encoding all characters, which are not in the set of unreserved characters (A-Z, a-z, 0-9 - _ . ~). The encoding uses a percent sign ('%') followed by two hexadecimal digits, which represent the ordinal value of the encoded character. A plus sign ('+') is used to encode a space (' ').
const func string: toUrlEncoded (in string: byteStri) is func result var string: urlEncoded is ""; local const set of char: unreservedChars is alphanum_char | {'-', '_', '.', '~'}; var integer: pos is 0; var integer: start is 1; var char: ch is ' '; begin for ch key pos range byteStri do if ch> '255円;' then raise RANGE_ERROR; elsif ch = ' ' then urlEncoded &:= byteStri[start .. pred(pos)]; urlEncoded &:= '+'; start := succ(pos); elsif ch not in unreservedChars then urlEncoded &:= byteStri[start .. pred(pos)]; urlEncoded &:= "%" <& ord(ch) RADIX 16 lpad0 2; start := succ(pos); end if; end for; urlEncoded &:= byteStri[start ..]; end func;

Decode an URL encoded string

The function fromUrlEncoded is part of the "encoding.s7i" library. URL encoding encodes a byte string as ASCII string. It uses the percent sign ('%') followed by two hexadecimal digits to encode characters that otherwise would not be allowed in an URL. A plus sign ('+') is used to encode a space (' '). Allowed URL characters are encoded as themself.
const func string: fromUrlEncoded (in string: urlEncoded) is func result var string: decoded is ""; local var integer: pos is 0; var integer: start is 1; var char: ch is ' '; begin for ch key pos range urlEncoded do if ch = '%' and pos <= length(urlEncoded) - 2 and urlEncoded[succ(pos)] in hexdigit_char and urlEncoded[pos + 2] in hexdigit_char then decoded &:= urlEncoded[start .. pred(pos)]; decoded &:= char(integer(urlEncoded[succ(pos) fixLen 2], 16)); start := pos + 3; elsif ch = '+' then decoded &:= urlEncoded[start .. pred(pos)]; decoded &:= ' '; start := succ(pos); end if; end for; decoded &:= urlEncoded[start ..]; end func;

Encode a string with the Ascii85 encoding

The function toAscii85 is part of the "encoding.s7i" library. Ascii85 encodes a byte string as ASCII string. This is done by encoding every four bytes with five printable ASCII characters. Five radix 85 digits provide enough possible values to encode the possible values of four bytes. The radix 85 digits are encoded with the characters '!' (encodes 0) through 'u' (encodes 84). If the last block of the byte string contains fewer than 4 bytes, the block is padded with up to three null bytes before encoding. After encoding, as many bytes as were added as padding are removed from the end of the output. In files the end of an Ascii85 encoding is marked with "~>" (this end marker is not added by toAscii85 ).
const func string: toAscii85 (in string: byteStri) is func result var string: ascii85 is ""; local var integer: index is 0; var integer: subIndex is 0; var integer: fourBytes is 0; var string: fiveBytes is " "; var integer: partialGroupSize is 0; var char: ch is ' '; begin for index range 1 to length(byteStri) - 3 step 4 do fourBytes := bytes2Int(byteStri[index fixLen 4], UNSIGNED, BE); if fourBytes = 0 then ascii85 &:= 'z'; else for subIndex range 5 downto 1 do fiveBytes @:= [subIndex] char(ord('!') + fourBytes rem 85); fourBytes := fourBytes div 85; end for; ascii85 &:= fiveBytes; end if; end for; partialGroupSize := length(byteStri) mod 4; if partialGroupSize 0 then index := succ(pred(length(byteStri)) mdiv 4 * 4); fourBytes := bytes2Int(byteStri[index ..] & "0円;" mult 4 - partialGroupSize, UNSIGNED, BE); for subIndex range 5 downto 1 do fiveBytes @:= [subIndex] char(ord('!') + fourBytes rem 85); fourBytes := fourBytes div 85; end for; ascii85 &:= fiveBytes[.. succ(partialGroupSize)]; end if; end func;

Decode a Ascii85 encoded string

The function fromAscii85 is part of the "encoding.s7i" library. Every block of five radix 85 characters is decoded to four bytes. Radix 85 characters are between '!' (encodes 0) and 'u' (encodes 84). The character 'z' is used to encode a block of four zero bytes. White space in the Ascii85 encoded string is ignored. The last block is padded to 5 bytes with the Ascii85 character 'u', and as many bytes as were added as padding are omitted from the end of the output.
const func string: fromAscii85 (in string: ascii85) is func result var string: decoded is ""; local const set of char: whiteSpace is {'0円;', '\t', '\n', '\f', '\r', ' '}; var char: ch is ' '; var integer: digitIndex is 0; var integer: base85Number is 0; var integer: idx is 0; begin for ch range ascii85 until ch = '~' do if ch>= '!' and ch <= 'u' then incr(digitIndex); base85Number := base85Number * 85 + (ord(ch) - ord('!')); if digitIndex = 5 then decoded &:= bytes(base85Number, UNSIGNED, BE, 4); digitIndex := 0; base85Number := 0; end if; elsif ch = 'z' and digitIndex = 0 then decoded &:= "0円;0円;0円;0円;"; elsif ch not in whiteSpace then raise RANGE_ERROR; end if; end for; if digitIndex 0 then for idx range 1 to 5 - digitIndex do base85Number := base85Number * 85 + 84; end for; decoded &:= bytes(base85Number, UNSIGNED, BE, 4)[.. pred(digitIndex)]; end if; end func;

Encode a string with the AsciiHex encoding

The function toAsciiHex is part of the "encoding.s7i" library. AsciiHex encodes a byte string as ASCII string. In AsciiHex each byte is encoded with two hexadecimal digits. White-space characters in an AsciiHex encoded string are ignored. The AsciiHex encoded string ends with the character '>'.
const func string: toAsciiHex (in string: byteStri) is func result var string: asciiHex is ""; local const char: endOfData is '>'; const integer: encodingsPerLine is 32; var integer: index is 1; begin while index <= length(byteStri) do asciiHex &:= hex(byteStri[index len encodingsPerLine]); asciiHex &:= "\n"; index +:= encodingsPerLine; end while; asciiHex &:= endOfData; end func;

Decode an AsciiHex encoded string

The function fromAsciiHex is part of the "encoding.s7i" library. * In AsciiHex each byte is encoded with two hexadecimal digits. * White-space characters in an AsciiHex encoded string are ignored. * The AsciiHex encoded string ends with the character '>'. * If a '>' follows the first hexadecimal digit of an encoded byte * the decoding works as if a '0' is at the place of the '>'. * The decoder works also correctly if the '>' is missing.
const func string: fromAsciiHex (in string: asciiHex) is func result var string: stri is ""; local const char: endOfData is '>'; const set of char: whiteSpace is {'0円;', '\t', '\n', '\f', '\r', ' '}; var integer: index is 1; begin while index < length(asciiHex) and asciiHex[index] endOfData do if asciiHex[index] not in whiteSpace then if asciiHex[succ(index)] = endOfData then stri &:= char(integer(asciiHex[index fixLen 1] & "0", 16)); incr(index); else stri &:= char(integer(asciiHex[index fixLen 2], 16)); index +:= 2; end if; else incr(index); end if; end while; if index = length(asciiHex) and asciiHex[index] endOfData and asciiHex[index] not in whiteSpace then stri &:= char(integer(asciiHex[index fixLen 1] & "0", 16)); end if; end func;

Compress a string using the Lempel Ziv Welch (LZW) compression

This algorithm compresses a byte string to a string of tokens which contains characters>= 256. To write this to a byte file it is necessary to add an algorithm which writes the tokens with 9 or more bits.
const func string: lzwCompress (in string: uncompressed) is func result var string: compressed is ""; local var char: ch is ' '; var hash [string] char: mydict is (hash [string] char).value; var string: buffer is ""; var string: xstr is ""; begin for ch range char(0) to char(255) do mydict @:= [str(ch)] ch; end for; for ch range uncompressed do xstr := buffer & str(ch); if xstr in mydict then buffer &:= str(ch) else compressed &:= str(mydict[buffer]); mydict @:= [xstr] char(length(mydict)); buffer := str(ch); end if; end for; if buffer <> "" then compressed &:= str(mydict[buffer]); end if; end func;

Decompress a Lempel Ziv Welch (LZW) compressed string

The compressed string consists of a sequence of tokens (which contain also characters>= 256). The decompress algorithm produces a byte string.
const func string: lzwDecompress (in string: compressed) is func result var string: uncompressed is ""; local var char: ch is ' '; var hash [char] string: mydict is (hash [char] string).value; var string: buffer is ""; var string: current is ""; var string: chain is ""; begin for ch range char(0) to char(255) do mydict @:= [ch] str(ch); end for; for ch range compressed do if buffer = "" then buffer := mydict[ch]; uncompressed &:= buffer; elsif ch <= char(255) then current := mydict[ch]; uncompressed &:= current; chain := buffer & current; mydict @:= [char(length(mydict))] chain; buffer := current; else if ch in mydict then chain := mydict[ch]; else chain := buffer & str(buffer[1]); end if; uncompressed &:= chain; mydict @:= [char(length(mydict))] buffer & str(chain[1]); buffer := chain; end if; end for; end func;

Compress a string using the run length encoding of bzip2

A sequence of 4 to 259 identical characters is replaced by four identical characters followed by a repeat length between 0 and 255. This run length encoding is used as first compression technique in the bzip2 compression.
const func string: bzip2RleCompress (in string: uncompressed) is func result var string: compressed is ""; local var integer: index is 1; var integer: oldpos is 1; var char: ch is ' '; begin while index <= length(uncompressed) - 3 do ch := uncompressed[index]; if uncompressed[succ(index)] = ch and uncompressed[index + 2] = ch and uncompressed[index + 3] = ch then index +:= 4; compressed &:= uncompressed[oldpos .. pred(index)]; oldpos := index; while index <= length(uncompressed) and uncompressed[index] = ch do incr(index); end while; compressed &:= char(index - oldpos); oldpos := index; else incr(index); end if; end while; compressed &:= uncompressed[oldpos ..]; end func;

Decompress a string using the run length encoding of bzip2

A sequence of 4 identical characters followed by a repeat length between 0 and 255 is replaced by 4 to 259 identical characters. This run length decoding is used as last decompression technique in the bzip2 decompression.
const func string: bzip2RleDecompress (in string: compressed) is func result var string: uncompressed is ""; local var integer: index is 1; var integer: oldpos is 1; var char: ch is ' '; begin while index <= length(compressed) - 3 do ch := compressed[index]; if compressed[succ(index)] = ch and compressed[index + 2] = ch and compressed[index + 3] = ch then index +:= 4; uncompressed &:= compressed[oldpos .. pred(index)] & str(ch) mult ord(compressed[index]); incr(index); oldpos := index; else incr(index); end if; end while; uncompressed &:= compressed[oldpos ..]; end func;

Compress a string using the run length encoding of PackBits

const func string: packBits (in string: uncompressed) is func result var string: compressed is ""; local var integer: index is 1; var integer: oldpos is 1; var char: ch is ' '; begin while index <= length(uncompressed) - 2 do ch := uncompressed[index]; if uncompressed[succ(index)] = ch and uncompressed[index + 2] = ch then while index - oldpos>= 128 do compressed &:= "127円;" & uncompressed[oldpos fixLen 128]; oldpos +:= 128; end while; if index> oldpos then compressed &:= str(char(pred(index - oldpos))) & uncompressed[oldpos .. pred(index)]; oldpos := index; end if; index +:= 3; while index <= length(uncompressed) and uncompressed[index] = ch do incr(index); end while; while index - oldpos>= 128 do compressed &:= "129円;" & str(ch); oldpos +:= 128; end while; if pred(index)> oldpos then compressed &:= str(char(257 - (index - oldpos))) & str(ch); oldpos := index; end if; else incr(index); end if; end while; index := succ(length(uncompressed)); while index - oldpos>= 128 do compressed &:= "127円;" & uncompressed[oldpos fixLen 128]; oldpos +:= 128; end while; if index> oldpos then compressed &:= str(char(pred(index - oldpos))) & uncompressed[oldpos ..]; end if; end func;

Decompress a string using the run length encoding of PackBits

const func string: unpackBits (in string: compressed) is func result var string: uncompressed is ""; local var integer: index is 1; var char: ch is ' '; begin while index <= length(compressed) do ch := compressed[index]; if ch <= char(127) then uncompressed &:= compressed[succ(index) fixLen succ(ord(ch))]; index +:= ord(ch) + 2; else uncompressed &:= str(compressed[succ(index)]) mult (257 - ord(ch)); index +:= 2; end if; end while; end func;

Burrows-Wheeler transform (basic concept)

This algorithm demonstrates the basic concept of the Burrows-Wheeler transform. It is not intended to be used for big data blocks.
const func string: burrowsWheelerTransformConcept (in string: stri) is func result var string: transformed is ""; local var integer: length is 0; var integer: index is 0; var array string: rotations is 0 times ""; begin length := succ(length(stri)); rotations := length times ""; for index range 1 to length do rotations[index] := stri[index ..] & "256円;" & stri[.. pred(index)]; end for; rotations := sort(rotations); for index range 1 to length do transformed &:= rotations[index][length]; end for; end func;

Inverse Burrows-Wheeler transform (basic concept)

This algorithm demonstrates the basic concept of the Burrows-Wheeler transform. It is not intended to be used for big data blocks.
const func string: inverseBurrowsWheelerTransformConcept (in string: transformed) is func result var string: stri is ""; local var integer: length is 0; var integer: count is 0; var integer: index is 0; var array string: rotations is 0 times ""; begin length := length(transformed); rotations := length times ""; for count range 1 to length do for index range 1 to length do rotations[index] := str(transformed[index]) & rotations[index]; end for; rotations := sort(rotations); end for; stri := rotations[1]; index := pos(stri, "256円;"); stri := stri[succ(index) ..] & stri[.. pred(index)]; end func;

Convert a character string to morse code

const func string: charToMorse (in char: ch) is func result var string: morseCode is ""; begin case ch of when {'a', 'A'}: morseCode := ".-"; when {'b', 'B'}: morseCode := "-..."; when {'c', 'C'}: morseCode := "-.-."; when {'d', 'D'}: morseCode := "-.."; when {'e', 'E'}: morseCode := "."; when {'f', 'F'}: morseCode := "..-."; when {'g', 'G'}: morseCode := "--."; when {'h', 'H'}: morseCode := "...."; when {'i', 'I'}: morseCode := ".."; when {'j', 'J'}: morseCode := ".---"; when {'k', 'K'}: morseCode := "-.-"; when {'l', 'L'}: morseCode := ".-.."; when {'m', 'M'}: morseCode := "--"; when {'n', 'N'}: morseCode := "-."; when {'o', 'O'}: morseCode := "---"; when {'p', 'P'}: morseCode := ".--."; when {'q', 'Q'}: morseCode := "--.-"; when {'r', 'R'}: morseCode := ".-."; when {'s', 'S'}: morseCode := "..."; when {'t', 'T'}: morseCode := "-"; when {'u', 'U'}: morseCode := "..-"; when {'v', 'V'}: morseCode := "...-"; when {'w', 'W'}: morseCode := ".--"; when {'x', 'X'}: morseCode := "-..-"; when {'y', 'Y'}: morseCode := "-.--"; when {'z', 'Z'}: morseCode := "--.."; when {'Ä', 'Æ'}: morseCode := ".-.-"; when {'À', 'Å'}: morseCode := ".--.-"; when {'Ç', 'Ĉ'}: morseCode := "-.-.."; when {'Ð'}: morseCode := "..--."; when {'È'}: morseCode := ".-..-"; when {'É'}: morseCode := "..-.."; when {'Ĝ'}: morseCode := "--.-."; when {'Ĵ'}: morseCode := ".---."; when {'Ñ'}: morseCode := "--.--"; when {'Ö', 'Ø'}: morseCode := "---."; when {'Ŝ'}: morseCode := "...-."; when {'Ü', 'Ŭ'}: morseCode := "..--"; when {'Þ'}: morseCode := ".--.."; when {'0'}: morseCode := "-----"; when {'1'}: morseCode := ".----"; when {'2'}: morseCode := "..---"; when {'3'}: morseCode := "...--"; when {'4'}: morseCode := "....-"; when {'5'}: morseCode := "....."; when {'6'}: morseCode := "-...."; when {'7'}: morseCode := "--..."; when {'8'}: morseCode := "---.."; when {'9'}: morseCode := "----."; when {'!'}: morseCode := "-.-.--"; when {'"'}: morseCode := ".-..-."; when {'$'}: morseCode := "...-..-"; when {'''}: morseCode := ".----."; when {'('}: morseCode := "-.--."; when {')'}: morseCode := "-.--.-"; when {'+'}: morseCode := ".-.-."; when {','}: morseCode := "--..--"; when {'-'}: morseCode := "-....-"; when {'.'}: morseCode := ".-.-.-"; when {'/'}: morseCode := "-..-."; when {':'}: morseCode := "---..."; when {';'}: morseCode := "-.-.-."; when {'='}: morseCode := "-...-"; when {'?'}: morseCode := "..--.."; when {'@'}: morseCode := ".--.-."; when {' '}: morseCode := " "; end case; end func; const func string: stringToMorse (in string: stri) is func result var string: morseCode is ""; local var char: ch is ' '; begin for ch range stri do morseCode &:= charToMorse(ch) & " "; end for; end func;

Convert morse code to a character string

const func char: morseToChar (in string: morseLetter) is func result var char: ch is ' '; begin if morseLetter = "" then ch := ' '; elsif morseLetter = "." then ch := 'E'; elsif morseLetter = "-" then ch := 'T'; elsif morseLetter = ".." then ch := 'I'; elsif morseLetter = ".-" then ch := 'A'; elsif morseLetter = "-." then ch := 'N'; elsif morseLetter = "--" then ch := 'M'; elsif morseLetter = "..." then ch := 'S'; elsif morseLetter = "..-" then ch := 'U'; elsif morseLetter = ".-." then ch := 'R'; elsif morseLetter = ".--" then ch := 'W'; elsif morseLetter = "-.." then ch := 'D'; elsif morseLetter = "-.-" then ch := 'K'; elsif morseLetter = "--." then ch := 'G'; elsif morseLetter = "---" then ch := 'O'; elsif morseLetter = "...." then ch := 'H'; elsif morseLetter = "...-" then ch := 'V'; elsif morseLetter = "..-." then ch := 'F'; elsif morseLetter = "..--" then ch := 'Ü'; # also 'Ŭ' elsif morseLetter = ".-.." then ch := 'L'; elsif morseLetter = ".-.-" then ch := 'Ä'; # also 'Æ' elsif morseLetter = ".--." then ch := 'P'; elsif morseLetter = ".---" then ch := 'J'; elsif morseLetter = "-..." then ch := 'B'; elsif morseLetter = "-..-" then ch := 'X'; elsif morseLetter = "-.-." then ch := 'C'; elsif morseLetter = "-.--" then ch := 'Y'; elsif morseLetter = "--.." then ch := 'Z'; elsif morseLetter = "--.-" then ch := 'Q'; elsif morseLetter = "---." then ch := 'Ö'; # also 'Ø' elsif morseLetter = "----" then ch := ' '; # 'ch' elsif morseLetter = "....." then ch := '5'; elsif morseLetter = "....-" then ch := '4'; elsif morseLetter = "...-." then ch := 'Ŝ'; elsif morseLetter = "...--" then ch := '3'; elsif morseLetter = "..-.." then ch := 'É'; elsif morseLetter = "..-.-" then ch := ' '; # unused elsif morseLetter = "..--." then ch := 'Ð'; elsif morseLetter = "..---" then ch := '2'; elsif morseLetter = ".-..." then ch := ' '; # unused elsif morseLetter = ".-..-" then ch := 'È'; elsif morseLetter = ".-.-." then ch := '+'; elsif morseLetter = ".-.--" then ch := ' '; # unused elsif morseLetter = ".--.." then ch := 'Þ'; elsif morseLetter = ".--.-" then ch := 'À'; # also 'Å' elsif morseLetter = ".---." then ch := 'Ĵ'; elsif morseLetter = ".----" then ch := '1'; elsif morseLetter = "-...." then ch := '6'; elsif morseLetter = "-...-" then ch := '='; elsif morseLetter = "-..-." then ch := '/'; elsif morseLetter = "-..--" then ch := ' '; # unused elsif morseLetter = "-.-.." then ch := 'Ç'; # also 'Ĉ' elsif morseLetter = "-.-.-" then ch := ' '; # Start of message elsif morseLetter = "-.--." then ch := '('; # also 'Ĥ' elsif morseLetter = "-.---" then ch := ' '; # unused elsif morseLetter = "--..." then ch := '7'; elsif morseLetter = "--..-" then ch := ' '; # unused elsif morseLetter = "--.-." then ch := 'Ĝ'; elsif morseLetter = "--.--" then ch := 'Ñ'; elsif morseLetter = "---.." then ch := '8'; elsif morseLetter = "---.-" then ch := ' '; # unused elsif morseLetter = "----." then ch := '9'; elsif morseLetter = "-----" then ch := '0'; elsif morseLetter = "..--.." then ch := '?'; elsif morseLetter = ".-..-." then ch := '"'; elsif morseLetter = ".-.-.-" then ch := '.'; elsif morseLetter = ".--.-." then ch := '@'; elsif morseLetter = ".----." then ch := '''; elsif morseLetter = "-....-" then ch := '-'; elsif morseLetter = "-.-.--" then ch := '!'; elsif morseLetter = "-.-.-." then ch := ';'; elsif morseLetter = "-.--.-" then ch := ')'; elsif morseLetter = "--..--" then ch := ','; elsif morseLetter = "---..." then ch := ':'; elsif morseLetter = "...-..-" then ch := '$'; else ch := ' '; end if; end func; const func string: morseToString (in string: morseCode) is func result var string: stri is ""; local var array string: letters is 0 times ""; var string: letter is ""; begin letters := split(replace(morseCode, " ", " "), ' '); for letter range letters do stri &:= str(morseToChar(letter)); end for; end func;

Wildcard match used in command shells

const func boolean: wildcard_match (in string: main_stri, in string: pattern) is func result var boolean: doesMatch is FALSE; local var integer: main_length is 0; var integer: main_index is 1; var string: pattern_tail is ""; begin if pattern = "" then doesMatch := main_stri = ""; else case pattern[1] of when {'*'}: if pattern = "*" then doesMatch := TRUE; else main_length := length(main_stri); pattern_tail := pattern[2 .. ]; while main_index <= main_length and not doesMatch do doesMatch := wildcard_match(main_stri[main_index .. ], pattern_tail); incr(main_index); end while; end if; when {'?'}: if main_stri <> "" then doesMatch := wildcard_match(main_stri[2 .. ], pattern[2 .. ]); end if; otherwise: if main_stri <> "" and main_stri[1] = pattern[1] then doesMatch := wildcard_match(main_stri[2 .. ], pattern[2 .. ]); end if; end case; end if; end func;

String compare function where digit sequences are compared numerically

include "scanstri.s7i"; const func integer: cmpNumeric (in var string: stri1, in var string: stri2) is func result var integer: signumValue is 0; local var string: part1 is ""; var string: part2 is ""; begin while signumValue = 0 and (stri1 <> "" or stri2 <> "") do part1 := getDigits(stri1); part2 := getDigits(stri2); if part1 <> "" and part2 <> "" then signumValue := compare(part1 lpad0 length(part2), part2 lpad0 length(part1)); if signumValue = 0 then signumValue := compare(length(part1), length(part2)); end if; elsif part1 <> "" then signumValue := compare(part1, stri2); elsif part2 <> "" then signumValue := compare(stri1, part2); end if; if signumValue = 0 then part1 := getNonDigits(stri1); part2 := getNonDigits(stri2); if part1 <> "" and part2 <> "" then signumValue := compare(part1, part2); elsif part1 <> "" then signumValue := compare(part1, stri2); elsif part2 <> "" then signumValue := compare(stri1, part2); end if; end if; end while; end func;

previous up next