There didn't seem to be a predefined source of "combining characters" in the Java standard libraryThe below code works as intended for all test cases I tried. According to the Unicode Character Database 10.0.0 ,Probably there are some of the charactersedge cases in the "Mn"other scripts and "Mc" category fall into the Not_Reordered class, while others fall into one of the real classeslanguages I do not know. ThereforeI'd like to learn about these, I extracted the raw list of code points from that databaseas well as any coding style issues.
Based on that, I wrote the following code, including some simple test cases. The code works as intended for all test cases I tried. Probably there are some edge cases in other scripts and languages I do not know. I'd like to learn about these, as well as any coding style issues.
package de.roland_illig.strrev
import com.ibm.icu.lang.UCharacter
import org.assertj.core.api.Assertions.assertThat
import org.junit.jupiter.api.Test
/**
* Returns the reversed string, keeping clusters of combining code points
* (such as German umlauts or Arabic tashkīl) together.
*/
fun String.reverse(): String {
fun isLamAlef(cluster: List<Int>, ch: Int) =
cluster.isNotEmpty() && cluster.first() == 0x0644 && ch == 0x0627
val clusters = mutableListOf<List<Int>>()
val cluster = mutableListOf<Int>()
this.codePoints().forEachOrdered { ch ->
if (!(UCharacter.getCombiningClass(ch) != 0) && !isLamAlef(cluster, ch)) {
if (cluster.isNotEmpty()) {
clusters += cluster.toList()
cluster.clear()
}
}
cluster += ch
}
if (cluster.isNotEmpty()) {
clusters += cluster.toList()
cluster.clear()
}
return fromCodePoints(*clusters.reversed().flatten().toIntArray())
}
class StringReverseTest {
@Test
fun ascii() {
assertThat("hello".reverse()).isEqualTo("olleh")
}
@Test
fun surrogates() {
val emoji = fromCodePoints(0x1F645)
assertThat(emoji.reverse()).isEqualTo(emoji)
}
@Test
fun combining() {
val combinedUmlaut = fromCodePoints(0x0041, 0x0308)
assertThat(combinedUmlaut.reverse()).isEqualTo(combinedUmlaut)
}
@Test
fun arabic() {
assertThat("أَهْلًا وَ سَهْلًا".reverse()).isEqualTo("لًاهْسَ وَ لًاهْأَ")
}
@Test
fun combiningAtBeginning() {
val combinedUmlaut = fromCodePoints(0x0308, 0x0041)
assertThat(combinedUmlaut.reverse())
.isEqualTo(fromCodePoints(0x0041, 0x0308))
}
}
private fun fromCodePoints(vararg codePoints: Int): String =
String(codePoints, 0, codePoints.size)
For completeness, here are the Gradle dependencies for build.gradle
:
package de.roland_illig.strrev
import org.assertj.core.api.Assertions.assertThat
import org.junit.jupiter.api.Test
/**
* Returns the reversed string, keeping clusters of combining code points
* (such as German umlauts or Arabic tashkīl) together.
*/
fun String.reverse(): Stringdependencies {
fun isLamAlef(cluster: List<Int>, ch: Int) =
cluster.isNotEmpty() && cluster.first() == 0x0644 && ch == 0x0627
val clusters = mutableListOf<List<Int>>()
val cluster = mutableListOf<Int>()
compile this"org.codePoints()jetbrains.forEachOrdered { ch kotlin:kotlin->
if (!isCombining(ch) && !isLamAlef(cluster, ch)) {
if (cluster.isNotEmpty()) {
clusters += cluster.toList()
cluster.clear()
}
}
cluster += ch
}
if (cluster.isNotEmpty()) {
clusters += cluster.toList()
cluster.clear()
}
return fromCodePoints(*clusters.reversed().flatten().toIntArray())
}
class StringReverseTest {
@Test
fun ascii() {
assertThat("hello".reverse()).isEqualTo("olleh")
}
@Test
fun surrogates() {
val emoji = fromCodePoints(0x1F645)stdlib-jdk8:$kotlin_version"
compile group: assertThat(emoji'com.reverse())ibm.isEqualTo(emoji)
}
@Test
fun combining() {
val combinedUmlaut = fromCodePoints(0x0041icu', 0x0308)
assertThat(combinedUmlaut.reverse()).isEqualTo(combinedUmlaut)
}
@Test
fun arabic() {
assertThat("أَهْلًا وَ سَهْلًا".reverse()).isEqualTo("لًاهْسَ وَ لًاهْأَ")
}
@Test
fun combiningAtBeginning() {
val combinedUmlaut =name: fromCodePoints(0x0308'icu4j', 0x0041)
version: assertThat(combinedUmlaut.reverse())'61.isEqualTo(fromCodePoints(0x0041, 0x0308))
}
}1'
/**
* Generated from Unicode 10.0.0 by:
*
* perl -wane \
* '
* if (@F > 2 && $F[1] eq ";" && $F[2] != 0) {
* if ($F[0] =~ /^([0-9a-f]+)\.\.([0-9a-f]+)$/i) {
* printf("|| codePoint >= 0x%s && codePoint <= 0x%s\n", 1,ドル 2ドル);
* } else {
* printf("|| codePoint == 0x%s\n", $F[0]);
* }
* }
* ' DerivedCombiningClass.txt
*/
private fun isCombining(codePoint: Int): Boolean {
return false
|| codePoint >= 0x0334 && codePoint <= 0x0338
|| codePoint == 0x1CD4
|| codePoint >= 0x1CE2 && codePoint <= 0x1CE8
|| codePoint >= 0x20D2 && codePoint <= 0x20D3
|| codePoint >= 0x20D8 && codePoint <= 0x20DA
|| codePoint >= 0x20E5 && codePoint <= 0x20E6
|| codePoint >= 0x20EA && codePoint <= 0x20EB
|| codePoint == 0x10A39
|| codePoint >= 0x16AF0 && codePoint <= 0x16AF4
|| codePoint == 0x1BC9E
|| codePoint >= 0x1D167 && codePoint <= 0x1D169
|| codePoint == 0x093C
|| codePoint == 0x09BC
|| codePoint == 0x0A3C
|| codePoint == 0x0ABC
|| codePoint == 0x0B3C
|| codePoint == 0x0CBC
|| codePoint == 0x1037
|| codePoint == 0x1B34
|| codePoint == 0x1BE6
|| codePoint == 0x1C37
|| codePoint == 0xA9B3
|| codePoint == 0x110BA
|| codePoint == 0x11173
|| codePoint == 0x111CA
|| codePoint == 0x11236
|| codePoint == 0x112E9
|| codePoint == 0x1133C
|| codePoint == 0x11446
|| codePoint == 0x114C3
|| codePoint == 0x115C0
|| codePoint == 0x116B7
|| codePoint == 0x11D42
|| codePoint == 0x1E94A
|| codePoint >= 0x3099 && codePoint <= 0x309A
|| codePoint == 0x094D
|| codePoint == 0x09CD
|| codePoint == 0x0A4D
|| codePoint == 0x0ACD
|| codePoint == 0x0B4D
|| codePoint == 0x0BCD
|| codePoint == 0x0C4D
|| codePoint == 0x0CCD
|| codePoint >= 0x0D3B && codePoint <= 0x0D3C
|| codePoint == 0x0D4D
|| codePoint == 0x0DCA
|| codePoint == 0x0E3A
|| codePoint == 0x0F84
|| codePoint >= 0x1039 && codePoint <= 0x103A
|| codePoint == 0x1714
|| codePoint == 0x1734
|| codePoint == 0x17D2
|| codePoint == 0x1A60
|| codePoint == 0x1B44
|| codePoint == 0x1BAA
|| codePoint == 0x1BAB
|| codePoint >= 0x1BF2 && codePoint <= 0x1BF3
|| codePoint == 0x2D7F
|| codePoint == 0xA806
|| codePoint == 0xA8C4
|| codePoint == 0xA953
|| codePoint == 0xA9C0
|| codePoint == 0xAAF6
|| codePoint == 0xABED
|| codePoint == 0x10A3F
|| codePoint == 0x11046
|| codePoint == 0x1107F
|| codePoint == 0x110B9
|| codePoint >= 0x11133 && codePoint <= 0x11134
|| codePoint == 0x111C0
|| codePoint == 0x11235
|| codePoint == 0x112EA
|| codePoint == 0x1134D
|| codePoint == 0x11442
|| codePoint == 0x114C2
|| codePoint == 0x115BF
|| codePoint == 0x1163F
|| codePoint == 0x116B6
|| codePoint == 0x1172B
|| codePoint == 0x11A34
|| codePoint == 0x11A47
|| codePoint == 0x11A99
|| codePoint == 0x11C3F
|| codePoint >= 0x11D44 && codePoint <= 0x11D45
|| codePoint == 0x05B0
|| codePoint == 0x05B1
|| codePoint == 0x05B2
|| codePoint == 0x05B3
|| codePoint == 0x05B4
|| codePoint == 0x05B5
|| codePoint == 0x05B6
|| codePoint == 0x05B7
|| codePoint == 0x05B8
|| codePoint == 0x05C7
|| codePoint >= 0x05B9 && codePoint <= 0x05BA
|| codePoint == 0x05BB
|| codePoint == 0x05BC
|| codePoint == 0x05BD
|| codePoint == 0x05BF
|| codePoint == 0x05C1
|| codePoint == 0x05C2
|| codePoint == 0xFB1E
|| codePoint == 0x064B
|| codePoint == 0x08F0
|| codePoint == 0x064C
|| codePoint == 0x08F1
|| codePoint == 0x064D
|| codePoint == 0x08F2
|| codePoint == 0x0618
|| codePoint == 0x064E
|| codePoint == 0x0619
|| codePoint == 0x064F
|| codePoint == 0x061A
|| codePoint == 0x0650
|| codePoint == 0x0651
|| codePoint == 0x0652
|| codePoint == 0x0670
|| codePoint == 0x0711
|| codePoint == 0x0C55
|| codePoint == 0x0C56
|| codePoint >= 0x0E38 && codePoint <= 0x0E39
|| codePoint >= 0x0E48 && codePoint <= 0x0E4B
|| codePoint >= 0x0EB8 && codePoint <= 0x0EB9
|| codePoint >= 0x0EC8 && codePoint <= 0x0ECB
|| codePoint == 0x0F71
|| codePoint == 0x0F72
|| codePoint >= 0x0F7A && codePoint <= 0x0F7D
|| codePoint == 0x0F80
|| codePoint == 0x0F74
|| codePoint >= 0x0321 && codePoint <= 0x0322
|| codePoint >= 0x0327 && codePoint <= 0x0328
|| codePoint == 0x1DD0
|| codePoint == 0x1DCE
|| codePoint == 0x031B
|| codePoint == 0x0F39
|| codePoint >= 0x1D165 && codePoint <= 0x1D166
|| codePoint >= 0x1D16E && codePoint <= 0x1D172
|| codePoint == 0x302A
|| codePoint >= 0x0316 && codePoint <= 0x0319
|| codePoint >= 0x031C && codePoint <= 0x0320
|| codePoint >= 0x0323 && codePoint <= 0x0326
|| codePoint >= 0x0329 && codePoint <= 0x0333
|| codePoint >= 0x0339 && codePoint <= 0x033C
|| codePoint >= 0x0347 && codePoint <= 0x0349
|| codePoint >= 0x034D && codePoint <= 0x034E
|| codePoint >= 0x0353 && codePoint <= 0x0356
|| codePoint >= 0x0359 && codePoint <= 0x035A
|| codePoint == 0x0591
|| codePoint == 0x0596
|| codePoint == 0x059B
|| codePoint >= 0x05A2 && codePoint <= 0x05A7
|| codePoint == 0x05AA
|| codePoint == 0x05C5
|| codePoint >= 0x0655 && codePoint <= 0x0656
|| codePoint == 0x065C
|| codePoint == 0x065F
|| codePoint == 0x06E3
|| codePoint == 0x06EA
|| codePoint == 0x06ED
|| codePoint == 0x0731
|| codePoint == 0x0734
|| codePoint >= 0x0737 && codePoint <= 0x0739
|| codePoint >= 0x073B && codePoint <= 0x073C
|| codePoint == 0x073E
|| codePoint == 0x0742
|| codePoint == 0x0744
|| codePoint == 0x0746
|| codePoint == 0x0748
|| codePoint == 0x07F2
|| codePoint >= 0x0859 && codePoint <= 0x085B
|| codePoint == 0x08E3
|| codePoint == 0x08E6
|| codePoint == 0x08E9
|| codePoint >= 0x08ED && codePoint <= 0x08EF
|| codePoint == 0x08F6
|| codePoint >= 0x08F9 && codePoint <= 0x08FA
|| codePoint == 0x0952
|| codePoint >= 0x0F18 && codePoint <= 0x0F19
|| codePoint == 0x0F35
|| codePoint == 0x0F37
|| codePoint == 0x0FC6
|| codePoint == 0x108D
|| codePoint == 0x193B
|| codePoint == 0x1A18
|| codePoint == 0x1A7F
|| codePoint >= 0x1AB5 && codePoint <= 0x1ABA
|| codePoint == 0x1ABD
|| codePoint == 0x1B6C
|| codePoint >= 0x1CD5 && codePoint <= 0x1CD9
|| codePoint >= 0x1CDC && codePoint <= 0x1CDF
|| codePoint == 0x1CED
|| codePoint == 0x1DC2
|| codePoint == 0x1DCA
|| codePoint == 0x1DCF
|| codePoint == 0x1DF9
|| codePoint == 0x1DFD
|| codePoint == 0x1DFF
|| codePoint == 0x20E8
|| codePoint >= 0x20EC && codePoint <= 0x20EF
|| codePoint >= 0xA92B && codePoint <= 0xA92D
|| codePoint == 0xAAB4
|| codePoint >= 0xFE27 && codePoint <= 0xFE2D
|| codePoint == 0x101FD
|| codePoint == 0x102E0
|| codePoint == 0x10A0D
|| codePoint == 0x10A3A
|| codePoint == 0x10AE6
|| codePoint >= 0x1D17B && codePoint <= 0x1D182
|| codePoint >= 0x1D18A && codePoint <= 0x1D18B
|| codePoint >= 0x1E8D0 && codePoint <= 0x1E8D6
|| codePoint == 0x059A
|| codePoint == 0x05AD
|| codePoint == 0x1939
|| codePoint == 0x302D
|| codePoint >= 0x302E && codePoint <= 0x302F
|| codePoint == 0x1D16D
|| codePoint == 0x05AE
|| codePoint == 0x18A9
|| codePoint >= 0x1DF7 && codePoint <= 0x1DF8
|| codePoint == 0x302B
|| codePoint >= 0x0300 && codePoint <= 0x0314
|| codePoint >= 0x033D && codePoint <= 0x0344
|| codePoint == 0x0346
|| codePoint >= 0x034A && codePoint <= 0x034C
|| codePoint >= 0x0350 && codePoint <= 0x0352
|| codePoint == 0x0357
|| codePoint == 0x035B
|| codePoint >= 0x0363 && codePoint <= 0x036F
|| codePoint >= 0x0483 && codePoint <= 0x0487
|| codePoint >= 0x0592 && codePoint <= 0x0595
|| codePoint >= 0x0597 && codePoint <= 0x0599
|| codePoint >= 0x059C && codePoint <= 0x05A1
|| codePoint >= 0x05A8 && codePoint <= 0x05A9
|| codePoint >= 0x05AB && codePoint <= 0x05AC
|| codePoint == 0x05AF
|| codePoint == 0x05C4
|| codePoint >= 0x0610 && codePoint <= 0x0617
|| codePoint >= 0x0653 && codePoint <= 0x0654
|| codePoint >= 0x0657 && codePoint <= 0x065B
|| codePoint >= 0x065D && codePoint <= 0x065E
|| codePoint >= 0x06D6 && codePoint <= 0x06DC
|| codePoint >= 0x06DF && codePoint <= 0x06E2
|| codePoint == 0x06E4
|| codePoint >= 0x06E7 && codePoint <= 0x06E8
|| codePoint >= 0x06EB && codePoint <= 0x06EC
|| codePoint == 0x0730
|| codePoint >= 0x0732 && codePoint <= 0x0733
|| codePoint >= 0x0735 && codePoint <= 0x0736
|| codePoint == 0x073A
|| codePoint == 0x073D
|| codePoint >= 0x073F && codePoint <= 0x0741
|| codePoint == 0x0743
|| codePoint == 0x0745
|| codePoint == 0x0747
|| codePoint >= 0x0749 && codePoint <= 0x074A
|| codePoint >= 0x07EB && codePoint <= 0x07F1
|| codePoint == 0x07F3
|| codePoint >= 0x0816 && codePoint <= 0x0819
|| codePoint >= 0x081B && codePoint <= 0x0823
|| codePoint >= 0x0825 && codePoint <= 0x0827
|| codePoint >= 0x0829 && codePoint <= 0x082D
|| codePoint >= 0x08D4 && codePoint <= 0x08E1
|| codePoint >= 0x08E4 && codePoint <= 0x08E5
|| codePoint >= 0x08E7 && codePoint <= 0x08E8
|| codePoint >= 0x08EA && codePoint <= 0x08EC
|| codePoint >= 0x08F3 && codePoint <= 0x08F5
|| codePoint >= 0x08F7 && codePoint <= 0x08F8
|| codePoint >= 0x08FB && codePoint <= 0x08FF
|| codePoint == 0x0951
|| codePoint >= 0x0953 && codePoint <= 0x0954
|| codePoint >= 0x0F82 && codePoint <= 0x0F83
|| codePoint >= 0x0F86 && codePoint <= 0x0F87
|| codePoint >= 0x135D && codePoint <= 0x135F
|| codePoint == 0x17DD
|| codePoint == 0x193A
|| codePoint == 0x1A17
|| codePoint >= 0x1A75 && codePoint <= 0x1A7C
|| codePoint >= 0x1AB0 && codePoint <= 0x1AB4
|| codePoint >= 0x1ABB && codePoint <= 0x1ABC
|| codePoint == 0x1B6B
|| codePoint >= 0x1B6D && codePoint <= 0x1B73
|| codePoint >= 0x1CD0 && codePoint <= 0x1CD2
|| codePoint >= 0x1CDA && codePoint <= 0x1CDB
|| codePoint == 0x1CE0
|| codePoint == 0x1CF4
|| codePoint >= 0x1CF8 && codePoint <= 0x1CF9
|| codePoint >= 0x1DC0 && codePoint <= 0x1DC1
|| codePoint >= 0x1DC3 && codePoint <= 0x1DC9
|| codePoint >= 0x1DCB && codePoint <= 0x1DCC
|| codePoint >= 0x1DD1 && codePoint <= 0x1DF5
|| codePoint == 0x1DFB
|| codePoint == 0x1DFE
|| codePoint >= 0x20D0 && codePoint <= 0x20D1
|| codePoint >= 0x20D4 && codePoint <= 0x20D7
|| codePoint >= 0x20DB && codePoint <= 0x20DC
|| codePoint == 0x20E1
|| codePoint == 0x20E7
|| codePoint == 0x20E9
|| codePoint == 0x20F0
|| codePoint >= 0x2CEF && codePoint <= 0x2CF1
|| codePoint >= 0x2DE0 && codePoint <= 0x2DFF
|| codePoint == 0xA66F
|| codePoint >= 0xA674 && codePoint <= 0xA67D
|| codePoint >= 0xA69E && codePoint <= 0xA69F
|| codePoint >= 0xA6F0 && codePoint <= 0xA6F1
|| codePoint >= 0xA8E0 && codePoint <= 0xA8F1
|| codePoint == 0xAAB0
|| codePoint >= 0xAAB2 && codePoint <= 0xAAB3
|| codePoint >= 0xAAB7 && codePoint <= 0xAAB8
|| codePoint >= 0xAABE && codePoint <= 0xAABF
|| codePoint == 0xAAC1
|| codePoint >= 0xFE20 && codePoint <= 0xFE26
|| codePoint >= 0xFE2E && codePoint <= 0xFE2F
|| codePoint >= 0x10376 && codePoint <= 0x1037A
|| codePoint == 0x10A0F
|| codePoint == 0x10A38
|| codePoint == 0x10AE5
|| codePoint >= 0x11100 && codePoint <= 0x11102
|| codePoint >= 0x11366 && codePoint <= 0x1136C
|| codePoint >= 0x11370 && codePoint <= 0x11374
|| codePoint >= 0x16B30 && codePoint <= 0x16B36
|| codePoint >= 0x1D185 && codePoint <= 0x1D189
|| codePoint >= 0x1D1AA && codePoint <= 0x1D1AD
|| codePoint >= 0x1D242 && codePoint <= 0x1D244
|| codePoint >= 0x1E000 && codePoint <= 0x1E006
|| codePoint >= 0x1E008 && codePoint <= 0x1E018
|| codePoint >= 0x1E01B && codePoint <= 0x1E021
|| codePoint >= 0x1E023 && codePoint <= 0x1E024
|| codePoint >= 0x1E026 && codePoint <= 0x1E02A
|| codePoint >= 0x1E944 && codePoint <= 0x1E949
|| codePoint == 0x0315
|| codePoint == 0x031A
|| codePoint == 0x0358
|| codePoint == 0x1DF6
|| codePoint == 0x302C
|| codePoint == 0x035C
|| codePoint == 0x035F
|| codePoint == 0x0362
|| codePoint == 0x1DFC
|| codePoint >= 0x035D && codePoint <= 0x035E
|| codePoint >= 0x0360 && codePoint <= 0x0361
|| codePoint == 0x1DCD
|| codePoint == 0x0345
testCompile "org.jetbrains.kotlin:kotlin-test:$kotlin_version"
testCompile "org.junit.jupiter:junit-jupiter-api:5.0.2"
testCompile "org.assertj:assertj-core:3.9.0"
}
private fun fromCodePoints(vararg codePoints: Int): String =
String(codePoints, 0, codePoints.size)
There didn't seem to be a predefined source of "combining characters" in the Java standard library. According to the Unicode Character Database 10.0.0 , some of the characters in the "Mn" and "Mc" category fall into the Not_Reordered class, while others fall into one of the real classes. Therefore, I extracted the raw list of code points from that database.
Based on that, I wrote the following code, including some simple test cases. The code works as intended for all test cases I tried. Probably there are some edge cases in other scripts and languages I do not know. I'd like to learn about these, as well as any coding style issues.
package de.roland_illig.strrev
import org.assertj.core.api.Assertions.assertThat
import org.junit.jupiter.api.Test
/**
* Returns the reversed string, keeping clusters of combining code points
* (such as German umlauts or Arabic tashkīl) together.
*/
fun String.reverse(): String {
fun isLamAlef(cluster: List<Int>, ch: Int) =
cluster.isNotEmpty() && cluster.first() == 0x0644 && ch == 0x0627
val clusters = mutableListOf<List<Int>>()
val cluster = mutableListOf<Int>()
this.codePoints().forEachOrdered { ch ->
if (!isCombining(ch) && !isLamAlef(cluster, ch)) {
if (cluster.isNotEmpty()) {
clusters += cluster.toList()
cluster.clear()
}
}
cluster += ch
}
if (cluster.isNotEmpty()) {
clusters += cluster.toList()
cluster.clear()
}
return fromCodePoints(*clusters.reversed().flatten().toIntArray())
}
class StringReverseTest {
@Test
fun ascii() {
assertThat("hello".reverse()).isEqualTo("olleh")
}
@Test
fun surrogates() {
val emoji = fromCodePoints(0x1F645)
assertThat(emoji.reverse()).isEqualTo(emoji)
}
@Test
fun combining() {
val combinedUmlaut = fromCodePoints(0x0041, 0x0308)
assertThat(combinedUmlaut.reverse()).isEqualTo(combinedUmlaut)
}
@Test
fun arabic() {
assertThat("أَهْلًا وَ سَهْلًا".reverse()).isEqualTo("لًاهْسَ وَ لًاهْأَ")
}
@Test
fun combiningAtBeginning() {
val combinedUmlaut = fromCodePoints(0x0308, 0x0041)
assertThat(combinedUmlaut.reverse()).isEqualTo(fromCodePoints(0x0041, 0x0308))
}
}
/**
* Generated from Unicode 10.0.0 by:
*
* perl -wane \
* '
* if (@F > 2 && $F[1] eq ";" && $F[2] != 0) {
* if ($F[0] =~ /^([0-9a-f]+)\.\.([0-9a-f]+)$/i) {
* printf("|| codePoint >= 0x%s && codePoint <= 0x%s\n", 1,ドル 2ドル);
* } else {
* printf("|| codePoint == 0x%s\n", $F[0]);
* }
* }
* ' DerivedCombiningClass.txt
*/
private fun isCombining(codePoint: Int): Boolean {
return false
|| codePoint >= 0x0334 && codePoint <= 0x0338
|| codePoint == 0x1CD4
|| codePoint >= 0x1CE2 && codePoint <= 0x1CE8
|| codePoint >= 0x20D2 && codePoint <= 0x20D3
|| codePoint >= 0x20D8 && codePoint <= 0x20DA
|| codePoint >= 0x20E5 && codePoint <= 0x20E6
|| codePoint >= 0x20EA && codePoint <= 0x20EB
|| codePoint == 0x10A39
|| codePoint >= 0x16AF0 && codePoint <= 0x16AF4
|| codePoint == 0x1BC9E
|| codePoint >= 0x1D167 && codePoint <= 0x1D169
|| codePoint == 0x093C
|| codePoint == 0x09BC
|| codePoint == 0x0A3C
|| codePoint == 0x0ABC
|| codePoint == 0x0B3C
|| codePoint == 0x0CBC
|| codePoint == 0x1037
|| codePoint == 0x1B34
|| codePoint == 0x1BE6
|| codePoint == 0x1C37
|| codePoint == 0xA9B3
|| codePoint == 0x110BA
|| codePoint == 0x11173
|| codePoint == 0x111CA
|| codePoint == 0x11236
|| codePoint == 0x112E9
|| codePoint == 0x1133C
|| codePoint == 0x11446
|| codePoint == 0x114C3
|| codePoint == 0x115C0
|| codePoint == 0x116B7
|| codePoint == 0x11D42
|| codePoint == 0x1E94A
|| codePoint >= 0x3099 && codePoint <= 0x309A
|| codePoint == 0x094D
|| codePoint == 0x09CD
|| codePoint == 0x0A4D
|| codePoint == 0x0ACD
|| codePoint == 0x0B4D
|| codePoint == 0x0BCD
|| codePoint == 0x0C4D
|| codePoint == 0x0CCD
|| codePoint >= 0x0D3B && codePoint <= 0x0D3C
|| codePoint == 0x0D4D
|| codePoint == 0x0DCA
|| codePoint == 0x0E3A
|| codePoint == 0x0F84
|| codePoint >= 0x1039 && codePoint <= 0x103A
|| codePoint == 0x1714
|| codePoint == 0x1734
|| codePoint == 0x17D2
|| codePoint == 0x1A60
|| codePoint == 0x1B44
|| codePoint == 0x1BAA
|| codePoint == 0x1BAB
|| codePoint >= 0x1BF2 && codePoint <= 0x1BF3
|| codePoint == 0x2D7F
|| codePoint == 0xA806
|| codePoint == 0xA8C4
|| codePoint == 0xA953
|| codePoint == 0xA9C0
|| codePoint == 0xAAF6
|| codePoint == 0xABED
|| codePoint == 0x10A3F
|| codePoint == 0x11046
|| codePoint == 0x1107F
|| codePoint == 0x110B9
|| codePoint >= 0x11133 && codePoint <= 0x11134
|| codePoint == 0x111C0
|| codePoint == 0x11235
|| codePoint == 0x112EA
|| codePoint == 0x1134D
|| codePoint == 0x11442
|| codePoint == 0x114C2
|| codePoint == 0x115BF
|| codePoint == 0x1163F
|| codePoint == 0x116B6
|| codePoint == 0x1172B
|| codePoint == 0x11A34
|| codePoint == 0x11A47
|| codePoint == 0x11A99
|| codePoint == 0x11C3F
|| codePoint >= 0x11D44 && codePoint <= 0x11D45
|| codePoint == 0x05B0
|| codePoint == 0x05B1
|| codePoint == 0x05B2
|| codePoint == 0x05B3
|| codePoint == 0x05B4
|| codePoint == 0x05B5
|| codePoint == 0x05B6
|| codePoint == 0x05B7
|| codePoint == 0x05B8
|| codePoint == 0x05C7
|| codePoint >= 0x05B9 && codePoint <= 0x05BA
|| codePoint == 0x05BB
|| codePoint == 0x05BC
|| codePoint == 0x05BD
|| codePoint == 0x05BF
|| codePoint == 0x05C1
|| codePoint == 0x05C2
|| codePoint == 0xFB1E
|| codePoint == 0x064B
|| codePoint == 0x08F0
|| codePoint == 0x064C
|| codePoint == 0x08F1
|| codePoint == 0x064D
|| codePoint == 0x08F2
|| codePoint == 0x0618
|| codePoint == 0x064E
|| codePoint == 0x0619
|| codePoint == 0x064F
|| codePoint == 0x061A
|| codePoint == 0x0650
|| codePoint == 0x0651
|| codePoint == 0x0652
|| codePoint == 0x0670
|| codePoint == 0x0711
|| codePoint == 0x0C55
|| codePoint == 0x0C56
|| codePoint >= 0x0E38 && codePoint <= 0x0E39
|| codePoint >= 0x0E48 && codePoint <= 0x0E4B
|| codePoint >= 0x0EB8 && codePoint <= 0x0EB9
|| codePoint >= 0x0EC8 && codePoint <= 0x0ECB
|| codePoint == 0x0F71
|| codePoint == 0x0F72
|| codePoint >= 0x0F7A && codePoint <= 0x0F7D
|| codePoint == 0x0F80
|| codePoint == 0x0F74
|| codePoint >= 0x0321 && codePoint <= 0x0322
|| codePoint >= 0x0327 && codePoint <= 0x0328
|| codePoint == 0x1DD0
|| codePoint == 0x1DCE
|| codePoint == 0x031B
|| codePoint == 0x0F39
|| codePoint >= 0x1D165 && codePoint <= 0x1D166
|| codePoint >= 0x1D16E && codePoint <= 0x1D172
|| codePoint == 0x302A
|| codePoint >= 0x0316 && codePoint <= 0x0319
|| codePoint >= 0x031C && codePoint <= 0x0320
|| codePoint >= 0x0323 && codePoint <= 0x0326
|| codePoint >= 0x0329 && codePoint <= 0x0333
|| codePoint >= 0x0339 && codePoint <= 0x033C
|| codePoint >= 0x0347 && codePoint <= 0x0349
|| codePoint >= 0x034D && codePoint <= 0x034E
|| codePoint >= 0x0353 && codePoint <= 0x0356
|| codePoint >= 0x0359 && codePoint <= 0x035A
|| codePoint == 0x0591
|| codePoint == 0x0596
|| codePoint == 0x059B
|| codePoint >= 0x05A2 && codePoint <= 0x05A7
|| codePoint == 0x05AA
|| codePoint == 0x05C5
|| codePoint >= 0x0655 && codePoint <= 0x0656
|| codePoint == 0x065C
|| codePoint == 0x065F
|| codePoint == 0x06E3
|| codePoint == 0x06EA
|| codePoint == 0x06ED
|| codePoint == 0x0731
|| codePoint == 0x0734
|| codePoint >= 0x0737 && codePoint <= 0x0739
|| codePoint >= 0x073B && codePoint <= 0x073C
|| codePoint == 0x073E
|| codePoint == 0x0742
|| codePoint == 0x0744
|| codePoint == 0x0746
|| codePoint == 0x0748
|| codePoint == 0x07F2
|| codePoint >= 0x0859 && codePoint <= 0x085B
|| codePoint == 0x08E3
|| codePoint == 0x08E6
|| codePoint == 0x08E9
|| codePoint >= 0x08ED && codePoint <= 0x08EF
|| codePoint == 0x08F6
|| codePoint >= 0x08F9 && codePoint <= 0x08FA
|| codePoint == 0x0952
|| codePoint >= 0x0F18 && codePoint <= 0x0F19
|| codePoint == 0x0F35
|| codePoint == 0x0F37
|| codePoint == 0x0FC6
|| codePoint == 0x108D
|| codePoint == 0x193B
|| codePoint == 0x1A18
|| codePoint == 0x1A7F
|| codePoint >= 0x1AB5 && codePoint <= 0x1ABA
|| codePoint == 0x1ABD
|| codePoint == 0x1B6C
|| codePoint >= 0x1CD5 && codePoint <= 0x1CD9
|| codePoint >= 0x1CDC && codePoint <= 0x1CDF
|| codePoint == 0x1CED
|| codePoint == 0x1DC2
|| codePoint == 0x1DCA
|| codePoint == 0x1DCF
|| codePoint == 0x1DF9
|| codePoint == 0x1DFD
|| codePoint == 0x1DFF
|| codePoint == 0x20E8
|| codePoint >= 0x20EC && codePoint <= 0x20EF
|| codePoint >= 0xA92B && codePoint <= 0xA92D
|| codePoint == 0xAAB4
|| codePoint >= 0xFE27 && codePoint <= 0xFE2D
|| codePoint == 0x101FD
|| codePoint == 0x102E0
|| codePoint == 0x10A0D
|| codePoint == 0x10A3A
|| codePoint == 0x10AE6
|| codePoint >= 0x1D17B && codePoint <= 0x1D182
|| codePoint >= 0x1D18A && codePoint <= 0x1D18B
|| codePoint >= 0x1E8D0 && codePoint <= 0x1E8D6
|| codePoint == 0x059A
|| codePoint == 0x05AD
|| codePoint == 0x1939
|| codePoint == 0x302D
|| codePoint >= 0x302E && codePoint <= 0x302F
|| codePoint == 0x1D16D
|| codePoint == 0x05AE
|| codePoint == 0x18A9
|| codePoint >= 0x1DF7 && codePoint <= 0x1DF8
|| codePoint == 0x302B
|| codePoint >= 0x0300 && codePoint <= 0x0314
|| codePoint >= 0x033D && codePoint <= 0x0344
|| codePoint == 0x0346
|| codePoint >= 0x034A && codePoint <= 0x034C
|| codePoint >= 0x0350 && codePoint <= 0x0352
|| codePoint == 0x0357
|| codePoint == 0x035B
|| codePoint >= 0x0363 && codePoint <= 0x036F
|| codePoint >= 0x0483 && codePoint <= 0x0487
|| codePoint >= 0x0592 && codePoint <= 0x0595
|| codePoint >= 0x0597 && codePoint <= 0x0599
|| codePoint >= 0x059C && codePoint <= 0x05A1
|| codePoint >= 0x05A8 && codePoint <= 0x05A9
|| codePoint >= 0x05AB && codePoint <= 0x05AC
|| codePoint == 0x05AF
|| codePoint == 0x05C4
|| codePoint >= 0x0610 && codePoint <= 0x0617
|| codePoint >= 0x0653 && codePoint <= 0x0654
|| codePoint >= 0x0657 && codePoint <= 0x065B
|| codePoint >= 0x065D && codePoint <= 0x065E
|| codePoint >= 0x06D6 && codePoint <= 0x06DC
|| codePoint >= 0x06DF && codePoint <= 0x06E2
|| codePoint == 0x06E4
|| codePoint >= 0x06E7 && codePoint <= 0x06E8
|| codePoint >= 0x06EB && codePoint <= 0x06EC
|| codePoint == 0x0730
|| codePoint >= 0x0732 && codePoint <= 0x0733
|| codePoint >= 0x0735 && codePoint <= 0x0736
|| codePoint == 0x073A
|| codePoint == 0x073D
|| codePoint >= 0x073F && codePoint <= 0x0741
|| codePoint == 0x0743
|| codePoint == 0x0745
|| codePoint == 0x0747
|| codePoint >= 0x0749 && codePoint <= 0x074A
|| codePoint >= 0x07EB && codePoint <= 0x07F1
|| codePoint == 0x07F3
|| codePoint >= 0x0816 && codePoint <= 0x0819
|| codePoint >= 0x081B && codePoint <= 0x0823
|| codePoint >= 0x0825 && codePoint <= 0x0827
|| codePoint >= 0x0829 && codePoint <= 0x082D
|| codePoint >= 0x08D4 && codePoint <= 0x08E1
|| codePoint >= 0x08E4 && codePoint <= 0x08E5
|| codePoint >= 0x08E7 && codePoint <= 0x08E8
|| codePoint >= 0x08EA && codePoint <= 0x08EC
|| codePoint >= 0x08F3 && codePoint <= 0x08F5
|| codePoint >= 0x08F7 && codePoint <= 0x08F8
|| codePoint >= 0x08FB && codePoint <= 0x08FF
|| codePoint == 0x0951
|| codePoint >= 0x0953 && codePoint <= 0x0954
|| codePoint >= 0x0F82 && codePoint <= 0x0F83
|| codePoint >= 0x0F86 && codePoint <= 0x0F87
|| codePoint >= 0x135D && codePoint <= 0x135F
|| codePoint == 0x17DD
|| codePoint == 0x193A
|| codePoint == 0x1A17
|| codePoint >= 0x1A75 && codePoint <= 0x1A7C
|| codePoint >= 0x1AB0 && codePoint <= 0x1AB4
|| codePoint >= 0x1ABB && codePoint <= 0x1ABC
|| codePoint == 0x1B6B
|| codePoint >= 0x1B6D && codePoint <= 0x1B73
|| codePoint >= 0x1CD0 && codePoint <= 0x1CD2
|| codePoint >= 0x1CDA && codePoint <= 0x1CDB
|| codePoint == 0x1CE0
|| codePoint == 0x1CF4
|| codePoint >= 0x1CF8 && codePoint <= 0x1CF9
|| codePoint >= 0x1DC0 && codePoint <= 0x1DC1
|| codePoint >= 0x1DC3 && codePoint <= 0x1DC9
|| codePoint >= 0x1DCB && codePoint <= 0x1DCC
|| codePoint >= 0x1DD1 && codePoint <= 0x1DF5
|| codePoint == 0x1DFB
|| codePoint == 0x1DFE
|| codePoint >= 0x20D0 && codePoint <= 0x20D1
|| codePoint >= 0x20D4 && codePoint <= 0x20D7
|| codePoint >= 0x20DB && codePoint <= 0x20DC
|| codePoint == 0x20E1
|| codePoint == 0x20E7
|| codePoint == 0x20E9
|| codePoint == 0x20F0
|| codePoint >= 0x2CEF && codePoint <= 0x2CF1
|| codePoint >= 0x2DE0 && codePoint <= 0x2DFF
|| codePoint == 0xA66F
|| codePoint >= 0xA674 && codePoint <= 0xA67D
|| codePoint >= 0xA69E && codePoint <= 0xA69F
|| codePoint >= 0xA6F0 && codePoint <= 0xA6F1
|| codePoint >= 0xA8E0 && codePoint <= 0xA8F1
|| codePoint == 0xAAB0
|| codePoint >= 0xAAB2 && codePoint <= 0xAAB3
|| codePoint >= 0xAAB7 && codePoint <= 0xAAB8
|| codePoint >= 0xAABE && codePoint <= 0xAABF
|| codePoint == 0xAAC1
|| codePoint >= 0xFE20 && codePoint <= 0xFE26
|| codePoint >= 0xFE2E && codePoint <= 0xFE2F
|| codePoint >= 0x10376 && codePoint <= 0x1037A
|| codePoint == 0x10A0F
|| codePoint == 0x10A38
|| codePoint == 0x10AE5
|| codePoint >= 0x11100 && codePoint <= 0x11102
|| codePoint >= 0x11366 && codePoint <= 0x1136C
|| codePoint >= 0x11370 && codePoint <= 0x11374
|| codePoint >= 0x16B30 && codePoint <= 0x16B36
|| codePoint >= 0x1D185 && codePoint <= 0x1D189
|| codePoint >= 0x1D1AA && codePoint <= 0x1D1AD
|| codePoint >= 0x1D242 && codePoint <= 0x1D244
|| codePoint >= 0x1E000 && codePoint <= 0x1E006
|| codePoint >= 0x1E008 && codePoint <= 0x1E018
|| codePoint >= 0x1E01B && codePoint <= 0x1E021
|| codePoint >= 0x1E023 && codePoint <= 0x1E024
|| codePoint >= 0x1E026 && codePoint <= 0x1E02A
|| codePoint >= 0x1E944 && codePoint <= 0x1E949
|| codePoint == 0x0315
|| codePoint == 0x031A
|| codePoint == 0x0358
|| codePoint == 0x1DF6
|| codePoint == 0x302C
|| codePoint == 0x035C
|| codePoint == 0x035F
|| codePoint == 0x0362
|| codePoint == 0x1DFC
|| codePoint >= 0x035D && codePoint <= 0x035E
|| codePoint >= 0x0360 && codePoint <= 0x0361
|| codePoint == 0x1DCD
|| codePoint == 0x0345
}
private fun fromCodePoints(vararg codePoints: Int): String =
String(codePoints, 0, codePoints.size)
The below code works as intended for all test cases I tried. Probably there are some edge cases in other scripts and languages I do not know. I'd like to learn about these, as well as any coding style issues.
package de.roland_illig.strrev
import com.ibm.icu.lang.UCharacter
import org.assertj.core.api.Assertions.assertThat
import org.junit.jupiter.api.Test
/**
* Returns the reversed string, keeping clusters of combining code points
* (such as German umlauts or Arabic tashkīl) together.
*/
fun String.reverse(): String {
fun isLamAlef(cluster: List<Int>, ch: Int) =
cluster.isNotEmpty() && cluster.first() == 0x0644 && ch == 0x0627
val clusters = mutableListOf<List<Int>>()
val cluster = mutableListOf<Int>()
this.codePoints().forEachOrdered { ch ->
if (!(UCharacter.getCombiningClass(ch) != 0) && !isLamAlef(cluster, ch)) {
if (cluster.isNotEmpty()) {
clusters += cluster.toList()
cluster.clear()
}
}
cluster += ch
}
if (cluster.isNotEmpty()) {
clusters += cluster.toList()
cluster.clear()
}
return fromCodePoints(*clusters.reversed().flatten().toIntArray())
}
class StringReverseTest {
@Test
fun ascii() {
assertThat("hello".reverse()).isEqualTo("olleh")
}
@Test
fun surrogates() {
val emoji = fromCodePoints(0x1F645)
assertThat(emoji.reverse()).isEqualTo(emoji)
}
@Test
fun combining() {
val combinedUmlaut = fromCodePoints(0x0041, 0x0308)
assertThat(combinedUmlaut.reverse()).isEqualTo(combinedUmlaut)
}
@Test
fun arabic() {
assertThat("أَهْلًا وَ سَهْلًا".reverse()).isEqualTo("لًاهْسَ وَ لًاهْأَ")
}
@Test
fun combiningAtBeginning() {
val combinedUmlaut = fromCodePoints(0x0308, 0x0041)
assertThat(combinedUmlaut.reverse())
.isEqualTo(fromCodePoints(0x0041, 0x0308))
}
}
private fun fromCodePoints(vararg codePoints: Int): String =
String(codePoints, 0, codePoints.size)
For completeness, here are the Gradle dependencies for build.gradle
:
dependencies {
compile "org.jetbrains.kotlin:kotlin-stdlib-jdk8:$kotlin_version"
compile group: 'com.ibm.icu', name: 'icu4j', version: '61.1'
testCompile "org.jetbrains.kotlin:kotlin-test:$kotlin_version"
testCompile "org.junit.jupiter:junit-jupiter-api:5.0.2"
testCompile "org.assertj:assertj-core:3.9.0"
}
Reverse string in Kotlin
In the question of how to reverse a string in Java, a comment mentioned that combining Unicode code points need to be taken into account.
There didn't seem to be a predefined source of "combining characters" in the Java standard library. According to the Unicode Character Database 10.0.0, some of the characters in the "Mn" and "Mc" category fall into the Not_Reordered class, while others fall into one of the real classes. Therefore, I extracted the raw list of code points from that database.
Based on that, I wrote the following code, including some simple test cases. The code works as intended for all test cases I tried. Probably there are some edge cases in other scripts and languages I do not know. I'd like to learn about these, as well as any coding style issues.
package de.roland_illig.strrev
import org.assertj.core.api.Assertions.assertThat
import org.junit.jupiter.api.Test
/**
* Returns the reversed string, keeping clusters of combining code points
* (such as German umlauts or Arabic tashkīl) together.
*/
fun String.reverse(): String {
fun isLamAlef(cluster: List<Int>, ch: Int) =
cluster.isNotEmpty() && cluster.first() == 0x0644 && ch == 0x0627
val clusters = mutableListOf<List<Int>>()
val cluster = mutableListOf<Int>()
this.codePoints().forEachOrdered { ch ->
if (!isCombining(ch) && !isLamAlef(cluster, ch)) {
if (cluster.isNotEmpty()) {
clusters += cluster.toList()
cluster.clear()
}
}
cluster += ch
}
if (cluster.isNotEmpty()) {
clusters += cluster.toList()
cluster.clear()
}
return fromCodePoints(*clusters.reversed().flatten().toIntArray())
}
class StringReverseTest {
@Test
fun ascii() {
assertThat("hello".reverse()).isEqualTo("olleh")
}
@Test
fun surrogates() {
val emoji = fromCodePoints(0x1F645)
assertThat(emoji.reverse()).isEqualTo(emoji)
}
@Test
fun combining() {
val combinedUmlaut = fromCodePoints(0x0041, 0x0308)
assertThat(combinedUmlaut.reverse()).isEqualTo(combinedUmlaut)
}
@Test
fun arabic() {
assertThat("أَهْلًا وَ سَهْلًا".reverse()).isEqualTo("لًاهْسَ وَ لًاهْأَ")
}
@Test
fun combiningAtBeginning() {
val combinedUmlaut = fromCodePoints(0x0308, 0x0041)
assertThat(combinedUmlaut.reverse()).isEqualTo(fromCodePoints(0x0041, 0x0308))
}
}
/**
* Generated from Unicode 10.0.0 by:
*
* perl -wane \
* '
* if (@F > 2 && $F[1] eq ";" && $F[2] != 0) {
* if ($F[0] =~ /^([0-9a-f]+)\.\.([0-9a-f]+)$/i) {
* printf("|| codePoint >= 0x%s && codePoint <= 0x%s\n", 1,ドル 2ドル);
* } else {
* printf("|| codePoint == 0x%s\n", $F[0]);
* }
* }
* ' DerivedCombiningClass.txt
*/
private fun isCombining(codePoint: Int): Boolean {
return false
|| codePoint >= 0x0334 && codePoint <= 0x0338
|| codePoint == 0x1CD4
|| codePoint >= 0x1CE2 && codePoint <= 0x1CE8
|| codePoint >= 0x20D2 && codePoint <= 0x20D3
|| codePoint >= 0x20D8 && codePoint <= 0x20DA
|| codePoint >= 0x20E5 && codePoint <= 0x20E6
|| codePoint >= 0x20EA && codePoint <= 0x20EB
|| codePoint == 0x10A39
|| codePoint >= 0x16AF0 && codePoint <= 0x16AF4
|| codePoint == 0x1BC9E
|| codePoint >= 0x1D167 && codePoint <= 0x1D169
|| codePoint == 0x093C
|| codePoint == 0x09BC
|| codePoint == 0x0A3C
|| codePoint == 0x0ABC
|| codePoint == 0x0B3C
|| codePoint == 0x0CBC
|| codePoint == 0x1037
|| codePoint == 0x1B34
|| codePoint == 0x1BE6
|| codePoint == 0x1C37
|| codePoint == 0xA9B3
|| codePoint == 0x110BA
|| codePoint == 0x11173
|| codePoint == 0x111CA
|| codePoint == 0x11236
|| codePoint == 0x112E9
|| codePoint == 0x1133C
|| codePoint == 0x11446
|| codePoint == 0x114C3
|| codePoint == 0x115C0
|| codePoint == 0x116B7
|| codePoint == 0x11D42
|| codePoint == 0x1E94A
|| codePoint >= 0x3099 && codePoint <= 0x309A
|| codePoint == 0x094D
|| codePoint == 0x09CD
|| codePoint == 0x0A4D
|| codePoint == 0x0ACD
|| codePoint == 0x0B4D
|| codePoint == 0x0BCD
|| codePoint == 0x0C4D
|| codePoint == 0x0CCD
|| codePoint >= 0x0D3B && codePoint <= 0x0D3C
|| codePoint == 0x0D4D
|| codePoint == 0x0DCA
|| codePoint == 0x0E3A
|| codePoint == 0x0F84
|| codePoint >= 0x1039 && codePoint <= 0x103A
|| codePoint == 0x1714
|| codePoint == 0x1734
|| codePoint == 0x17D2
|| codePoint == 0x1A60
|| codePoint == 0x1B44
|| codePoint == 0x1BAA
|| codePoint == 0x1BAB
|| codePoint >= 0x1BF2 && codePoint <= 0x1BF3
|| codePoint == 0x2D7F
|| codePoint == 0xA806
|| codePoint == 0xA8C4
|| codePoint == 0xA953
|| codePoint == 0xA9C0
|| codePoint == 0xAAF6
|| codePoint == 0xABED
|| codePoint == 0x10A3F
|| codePoint == 0x11046
|| codePoint == 0x1107F
|| codePoint == 0x110B9
|| codePoint >= 0x11133 && codePoint <= 0x11134
|| codePoint == 0x111C0
|| codePoint == 0x11235
|| codePoint == 0x112EA
|| codePoint == 0x1134D
|| codePoint == 0x11442
|| codePoint == 0x114C2
|| codePoint == 0x115BF
|| codePoint == 0x1163F
|| codePoint == 0x116B6
|| codePoint == 0x1172B
|| codePoint == 0x11A34
|| codePoint == 0x11A47
|| codePoint == 0x11A99
|| codePoint == 0x11C3F
|| codePoint >= 0x11D44 && codePoint <= 0x11D45
|| codePoint == 0x05B0
|| codePoint == 0x05B1
|| codePoint == 0x05B2
|| codePoint == 0x05B3
|| codePoint == 0x05B4
|| codePoint == 0x05B5
|| codePoint == 0x05B6
|| codePoint == 0x05B7
|| codePoint == 0x05B8
|| codePoint == 0x05C7
|| codePoint >= 0x05B9 && codePoint <= 0x05BA
|| codePoint == 0x05BB
|| codePoint == 0x05BC
|| codePoint == 0x05BD
|| codePoint == 0x05BF
|| codePoint == 0x05C1
|| codePoint == 0x05C2
|| codePoint == 0xFB1E
|| codePoint == 0x064B
|| codePoint == 0x08F0
|| codePoint == 0x064C
|| codePoint == 0x08F1
|| codePoint == 0x064D
|| codePoint == 0x08F2
|| codePoint == 0x0618
|| codePoint == 0x064E
|| codePoint == 0x0619
|| codePoint == 0x064F
|| codePoint == 0x061A
|| codePoint == 0x0650
|| codePoint == 0x0651
|| codePoint == 0x0652
|| codePoint == 0x0670
|| codePoint == 0x0711
|| codePoint == 0x0C55
|| codePoint == 0x0C56
|| codePoint >= 0x0E38 && codePoint <= 0x0E39
|| codePoint >= 0x0E48 && codePoint <= 0x0E4B
|| codePoint >= 0x0EB8 && codePoint <= 0x0EB9
|| codePoint >= 0x0EC8 && codePoint <= 0x0ECB
|| codePoint == 0x0F71
|| codePoint == 0x0F72
|| codePoint >= 0x0F7A && codePoint <= 0x0F7D
|| codePoint == 0x0F80
|| codePoint == 0x0F74
|| codePoint >= 0x0321 && codePoint <= 0x0322
|| codePoint >= 0x0327 && codePoint <= 0x0328
|| codePoint == 0x1DD0
|| codePoint == 0x1DCE
|| codePoint == 0x031B
|| codePoint == 0x0F39
|| codePoint >= 0x1D165 && codePoint <= 0x1D166
|| codePoint >= 0x1D16E && codePoint <= 0x1D172
|| codePoint == 0x302A
|| codePoint >= 0x0316 && codePoint <= 0x0319
|| codePoint >= 0x031C && codePoint <= 0x0320
|| codePoint >= 0x0323 && codePoint <= 0x0326
|| codePoint >= 0x0329 && codePoint <= 0x0333
|| codePoint >= 0x0339 && codePoint <= 0x033C
|| codePoint >= 0x0347 && codePoint <= 0x0349
|| codePoint >= 0x034D && codePoint <= 0x034E
|| codePoint >= 0x0353 && codePoint <= 0x0356
|| codePoint >= 0x0359 && codePoint <= 0x035A
|| codePoint == 0x0591
|| codePoint == 0x0596
|| codePoint == 0x059B
|| codePoint >= 0x05A2 && codePoint <= 0x05A7
|| codePoint == 0x05AA
|| codePoint == 0x05C5
|| codePoint >= 0x0655 && codePoint <= 0x0656
|| codePoint == 0x065C
|| codePoint == 0x065F
|| codePoint == 0x06E3
|| codePoint == 0x06EA
|| codePoint == 0x06ED
|| codePoint == 0x0731
|| codePoint == 0x0734
|| codePoint >= 0x0737 && codePoint <= 0x0739
|| codePoint >= 0x073B && codePoint <= 0x073C
|| codePoint == 0x073E
|| codePoint == 0x0742
|| codePoint == 0x0744
|| codePoint == 0x0746
|| codePoint == 0x0748
|| codePoint == 0x07F2
|| codePoint >= 0x0859 && codePoint <= 0x085B
|| codePoint == 0x08E3
|| codePoint == 0x08E6
|| codePoint == 0x08E9
|| codePoint >= 0x08ED && codePoint <= 0x08EF
|| codePoint == 0x08F6
|| codePoint >= 0x08F9 && codePoint <= 0x08FA
|| codePoint == 0x0952
|| codePoint >= 0x0F18 && codePoint <= 0x0F19
|| codePoint == 0x0F35
|| codePoint == 0x0F37
|| codePoint == 0x0FC6
|| codePoint == 0x108D
|| codePoint == 0x193B
|| codePoint == 0x1A18
|| codePoint == 0x1A7F
|| codePoint >= 0x1AB5 && codePoint <= 0x1ABA
|| codePoint == 0x1ABD
|| codePoint == 0x1B6C
|| codePoint >= 0x1CD5 && codePoint <= 0x1CD9
|| codePoint >= 0x1CDC && codePoint <= 0x1CDF
|| codePoint == 0x1CED
|| codePoint == 0x1DC2
|| codePoint == 0x1DCA
|| codePoint == 0x1DCF
|| codePoint == 0x1DF9
|| codePoint == 0x1DFD
|| codePoint == 0x1DFF
|| codePoint == 0x20E8
|| codePoint >= 0x20EC && codePoint <= 0x20EF
|| codePoint >= 0xA92B && codePoint <= 0xA92D
|| codePoint == 0xAAB4
|| codePoint >= 0xFE27 && codePoint <= 0xFE2D
|| codePoint == 0x101FD
|| codePoint == 0x102E0
|| codePoint == 0x10A0D
|| codePoint == 0x10A3A
|| codePoint == 0x10AE6
|| codePoint >= 0x1D17B && codePoint <= 0x1D182
|| codePoint >= 0x1D18A && codePoint <= 0x1D18B
|| codePoint >= 0x1E8D0 && codePoint <= 0x1E8D6
|| codePoint == 0x059A
|| codePoint == 0x05AD
|| codePoint == 0x1939
|| codePoint == 0x302D
|| codePoint >= 0x302E && codePoint <= 0x302F
|| codePoint == 0x1D16D
|| codePoint == 0x05AE
|| codePoint == 0x18A9
|| codePoint >= 0x1DF7 && codePoint <= 0x1DF8
|| codePoint == 0x302B
|| codePoint >= 0x0300 && codePoint <= 0x0314
|| codePoint >= 0x033D && codePoint <= 0x0344
|| codePoint == 0x0346
|| codePoint >= 0x034A && codePoint <= 0x034C
|| codePoint >= 0x0350 && codePoint <= 0x0352
|| codePoint == 0x0357
|| codePoint == 0x035B
|| codePoint >= 0x0363 && codePoint <= 0x036F
|| codePoint >= 0x0483 && codePoint <= 0x0487
|| codePoint >= 0x0592 && codePoint <= 0x0595
|| codePoint >= 0x0597 && codePoint <= 0x0599
|| codePoint >= 0x059C && codePoint <= 0x05A1
|| codePoint >= 0x05A8 && codePoint <= 0x05A9
|| codePoint >= 0x05AB && codePoint <= 0x05AC
|| codePoint == 0x05AF
|| codePoint == 0x05C4
|| codePoint >= 0x0610 && codePoint <= 0x0617
|| codePoint >= 0x0653 && codePoint <= 0x0654
|| codePoint >= 0x0657 && codePoint <= 0x065B
|| codePoint >= 0x065D && codePoint <= 0x065E
|| codePoint >= 0x06D6 && codePoint <= 0x06DC
|| codePoint >= 0x06DF && codePoint <= 0x06E2
|| codePoint == 0x06E4
|| codePoint >= 0x06E7 && codePoint <= 0x06E8
|| codePoint >= 0x06EB && codePoint <= 0x06EC
|| codePoint == 0x0730
|| codePoint >= 0x0732 && codePoint <= 0x0733
|| codePoint >= 0x0735 && codePoint <= 0x0736
|| codePoint == 0x073A
|| codePoint == 0x073D
|| codePoint >= 0x073F && codePoint <= 0x0741
|| codePoint == 0x0743
|| codePoint == 0x0745
|| codePoint == 0x0747
|| codePoint >= 0x0749 && codePoint <= 0x074A
|| codePoint >= 0x07EB && codePoint <= 0x07F1
|| codePoint == 0x07F3
|| codePoint >= 0x0816 && codePoint <= 0x0819
|| codePoint >= 0x081B && codePoint <= 0x0823
|| codePoint >= 0x0825 && codePoint <= 0x0827
|| codePoint >= 0x0829 && codePoint <= 0x082D
|| codePoint >= 0x08D4 && codePoint <= 0x08E1
|| codePoint >= 0x08E4 && codePoint <= 0x08E5
|| codePoint >= 0x08E7 && codePoint <= 0x08E8
|| codePoint >= 0x08EA && codePoint <= 0x08EC
|| codePoint >= 0x08F3 && codePoint <= 0x08F5
|| codePoint >= 0x08F7 && codePoint <= 0x08F8
|| codePoint >= 0x08FB && codePoint <= 0x08FF
|| codePoint == 0x0951
|| codePoint >= 0x0953 && codePoint <= 0x0954
|| codePoint >= 0x0F82 && codePoint <= 0x0F83
|| codePoint >= 0x0F86 && codePoint <= 0x0F87
|| codePoint >= 0x135D && codePoint <= 0x135F
|| codePoint == 0x17DD
|| codePoint == 0x193A
|| codePoint == 0x1A17
|| codePoint >= 0x1A75 && codePoint <= 0x1A7C
|| codePoint >= 0x1AB0 && codePoint <= 0x1AB4
|| codePoint >= 0x1ABB && codePoint <= 0x1ABC
|| codePoint == 0x1B6B
|| codePoint >= 0x1B6D && codePoint <= 0x1B73
|| codePoint >= 0x1CD0 && codePoint <= 0x1CD2
|| codePoint >= 0x1CDA && codePoint <= 0x1CDB
|| codePoint == 0x1CE0
|| codePoint == 0x1CF4
|| codePoint >= 0x1CF8 && codePoint <= 0x1CF9
|| codePoint >= 0x1DC0 && codePoint <= 0x1DC1
|| codePoint >= 0x1DC3 && codePoint <= 0x1DC9
|| codePoint >= 0x1DCB && codePoint <= 0x1DCC
|| codePoint >= 0x1DD1 && codePoint <= 0x1DF5
|| codePoint == 0x1DFB
|| codePoint == 0x1DFE
|| codePoint >= 0x20D0 && codePoint <= 0x20D1
|| codePoint >= 0x20D4 && codePoint <= 0x20D7
|| codePoint >= 0x20DB && codePoint <= 0x20DC
|| codePoint == 0x20E1
|| codePoint == 0x20E7
|| codePoint == 0x20E9
|| codePoint == 0x20F0
|| codePoint >= 0x2CEF && codePoint <= 0x2CF1
|| codePoint >= 0x2DE0 && codePoint <= 0x2DFF
|| codePoint == 0xA66F
|| codePoint >= 0xA674 && codePoint <= 0xA67D
|| codePoint >= 0xA69E && codePoint <= 0xA69F
|| codePoint >= 0xA6F0 && codePoint <= 0xA6F1
|| codePoint >= 0xA8E0 && codePoint <= 0xA8F1
|| codePoint == 0xAAB0
|| codePoint >= 0xAAB2 && codePoint <= 0xAAB3
|| codePoint >= 0xAAB7 && codePoint <= 0xAAB8
|| codePoint >= 0xAABE && codePoint <= 0xAABF
|| codePoint == 0xAAC1
|| codePoint >= 0xFE20 && codePoint <= 0xFE26
|| codePoint >= 0xFE2E && codePoint <= 0xFE2F
|| codePoint >= 0x10376 && codePoint <= 0x1037A
|| codePoint == 0x10A0F
|| codePoint == 0x10A38
|| codePoint == 0x10AE5
|| codePoint >= 0x11100 && codePoint <= 0x11102
|| codePoint >= 0x11366 && codePoint <= 0x1136C
|| codePoint >= 0x11370 && codePoint <= 0x11374
|| codePoint >= 0x16B30 && codePoint <= 0x16B36
|| codePoint >= 0x1D185 && codePoint <= 0x1D189
|| codePoint >= 0x1D1AA && codePoint <= 0x1D1AD
|| codePoint >= 0x1D242 && codePoint <= 0x1D244
|| codePoint >= 0x1E000 && codePoint <= 0x1E006
|| codePoint >= 0x1E008 && codePoint <= 0x1E018
|| codePoint >= 0x1E01B && codePoint <= 0x1E021
|| codePoint >= 0x1E023 && codePoint <= 0x1E024
|| codePoint >= 0x1E026 && codePoint <= 0x1E02A
|| codePoint >= 0x1E944 && codePoint <= 0x1E949
|| codePoint == 0x0315
|| codePoint == 0x031A
|| codePoint == 0x0358
|| codePoint == 0x1DF6
|| codePoint == 0x302C
|| codePoint == 0x035C
|| codePoint == 0x035F
|| codePoint == 0x0362
|| codePoint == 0x1DFC
|| codePoint >= 0x035D && codePoint <= 0x035E
|| codePoint >= 0x0360 && codePoint <= 0x0361
|| codePoint == 0x1DCD
|| codePoint == 0x0345
}
private fun fromCodePoints(vararg codePoints: Int): String =
String(codePoints, 0, codePoints.size)