\$\begingroup\$
\$\endgroup\$
0
In the question of how to reverse a string in Java, a comment mentioned that combining Unicode code points need to be taken into account.
The below code works as intended for all test cases I tried. Probably there are some edge cases in other scripts and languages I do not know. I'd like to learn about these, as well as any coding style issues.
package de.roland_illig.strrev
import com.ibm.icu.lang.UCharacter
import org.assertj.core.api.Assertions.assertThat
import org.junit.jupiter.api.Test
/**
* Returns the reversed string, keeping clusters of combining code points
* (such as German umlauts or Arabic tashkīl) together.
*/
fun String.reverse(): String {
fun isLamAlef(cluster: List<Int>, ch: Int) =
cluster.isNotEmpty() && cluster.first() == 0x0644 && ch == 0x0627
val clusters = mutableListOf<List<Int>>()
val cluster = mutableListOf<Int>()
this.codePoints().forEachOrdered { ch ->
if (!(UCharacter.getCombiningClass(ch) != 0) && !isLamAlef(cluster, ch)) {
if (cluster.isNotEmpty()) {
clusters += cluster.toList()
cluster.clear()
}
}
cluster += ch
}
if (cluster.isNotEmpty()) {
clusters += cluster.toList()
cluster.clear()
}
return fromCodePoints(*clusters.reversed().flatten().toIntArray())
}
class StringReverseTest {
@Test
fun ascii() {
assertThat("hello".reverse()).isEqualTo("olleh")
}
@Test
fun surrogates() {
val emoji = fromCodePoints(0x1F645)
assertThat(emoji.reverse()).isEqualTo(emoji)
}
@Test
fun combining() {
val combinedUmlaut = fromCodePoints(0x0041, 0x0308)
assertThat(combinedUmlaut.reverse()).isEqualTo(combinedUmlaut)
}
@Test
fun arabic() {
assertThat("أَهْلًا وَ سَهْلًا".reverse()).isEqualTo("لًاهْسَ وَ لًاهْأَ")
}
@Test
fun combiningAtBeginning() {
val combinedUmlaut = fromCodePoints(0x0308, 0x0041)
assertThat(combinedUmlaut.reverse())
.isEqualTo(fromCodePoints(0x0041, 0x0308))
}
}
private fun fromCodePoints(vararg codePoints: Int): String =
String(codePoints, 0, codePoints.size)
For completeness, here are the Gradle dependencies for build.gradle
:
dependencies {
compile "org.jetbrains.kotlin:kotlin-stdlib-jdk8:$kotlin_version"
compile group: 'com.ibm.icu', name: 'icu4j', version: '61.1'
testCompile "org.jetbrains.kotlin:kotlin-test:$kotlin_version"
testCompile "org.junit.jupiter:junit-jupiter-api:5.0.2"
testCompile "org.assertj:assertj-core:3.9.0"
}
Roland IlligRoland Illig
asked Apr 29, 2018 at 13:50
default