Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit c5afe3d

Browse files
feat: add escape_into (#18)
1 parent 2df28ec commit c5afe3d

File tree

3 files changed

+92
-28
lines changed

3 files changed

+92
-28
lines changed

‎src/aarch64.rs‎

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,9 @@ const PREFETCH_DISTANCE: usize = CHUNK * 2;
1010
const SLASH_SENTINEL: u8 = 0xFF;
1111

1212
#[inline]
13-
pub fn escape_neon<S: AsRef<str>>(input: S) -> String {
14-
let s = input.as_ref();
15-
let bytes = s.as_bytes();
13+
pub fn escape_neon(bytes: &[u8], output: &mut Vec<u8>) {
1614
let n = bytes.len();
1715

18-
let mut out = Vec::with_capacity(n + 2);
19-
out.push(b'"');
20-
2116
unsafe {
2217
let tbl = vld1q_u8_x4(ESCAPE.as_ptr());
2318
let slash = vdupq_n_u8(b'\\');
@@ -53,18 +48,18 @@ pub fn escape_neon<S: AsRef<str>>(input: S) -> String {
5348
let mask_r_4 = vmaxvq_u8(mask_4);
5449

5550
if mask_r_1 | mask_r_2 | mask_r_3 | mask_r_4 == 0 {
56-
out.extend_from_slice(std::slice::from_raw_parts(ptr, CHUNK));
51+
output.extend_from_slice(std::slice::from_raw_parts(ptr, CHUNK));
5752
i += CHUNK;
5853
continue;
5954
}
6055

6156
macro_rules! handle {
6257
($mask:expr, $mask_r:expr, $off:expr) => {
6358
if $mask_r == 0 {
64-
out.extend_from_slice(std::slice::from_raw_parts(ptr.add($off), 16));
59+
output.extend_from_slice(std::slice::from_raw_parts(ptr.add($off), 16));
6560
} else {
6661
vst1q_u8(placeholder.as_mut_ptr(), $mask);
67-
handle_block(&bytes[i + $off..i + $off + 16], &placeholder, &mut out);
62+
handle_block(&bytes[i + $off..i + $off + 16], &placeholder, output);
6863
}
6964
};
7065
}
@@ -78,13 +73,9 @@ pub fn escape_neon<S: AsRef<str>>(input: S) -> String {
7873
}
7974

8075
if i < n {
81-
handle_tail(&bytes[i..], &mut out);
76+
handle_tail(&bytes[i..], output);
8277
}
8378
}
84-
85-
out.push(b'"');
86-
87-
unsafe { String::from_utf8_unchecked(out) }
8879
}
8980

9081
#[inline(always)]

‎src/generic.rs‎

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,15 @@ pub fn escape_generic<S: AsRef<str>>(s: S) -> String {
1313
unsafe { String::from_utf8_unchecked(result) }
1414
}
1515

16+
#[inline]
17+
pub fn escape_into_generic<S: AsRef<str>>(s: S, output: &mut Vec<u8>) {
18+
let s = s.as_ref();
19+
let bytes = s.as_bytes();
20+
output.push(b'"');
21+
escape_inner(bytes, output);
22+
output.push(b'"');
23+
}
24+
1625
#[inline]
1726
// Slightly modified version of
1827
// <https://github.com/serde-rs/json/blob/d12e943590208da738c092db92c34b39796a2538/src/ser.rs#L2079>

‎src/lib.rs‎

Lines changed: 78 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -114,19 +114,19 @@ mod generic;
114114
#[cfg(target_arch = "x86_64")]
115115
mod x86;
116116

117-
pub use generic::escape_generic;
117+
pub use generic::{escape_generic, escape_into_generic};
118118

119119
/// Main entry point for JSON string escaping with SIMD acceleration
120120
/// If the platform is supported, the SIMD path will be used. Otherwise, the generic fallback will be used.
121121
pub fn escape<S: AsRef<str>>(input: S) -> String {
122+
use generic::escape_inner;
123+
124+
let mut result = Vec::with_capacity(input.as_ref().len() + input.as_ref().len() / 2 + 2);
125+
result.push(b'"');
126+
let s = input.as_ref();
127+
let bytes = s.as_bytes();
122128
#[cfg(target_arch = "x86_64")]
123129
{
124-
use generic::escape_inner;
125-
126-
let mut result = Vec::with_capacity(input.as_ref().len() + input.as_ref().len() / 2 + 2);
127-
result.push(b'"');
128-
let s = input.as_ref();
129-
let bytes = s.as_bytes();
130130
let len = bytes.len();
131131
// Runtime CPU feature detection for x86_64
132132
if is_x86_feature_detected!("avx512f")
@@ -144,16 +144,71 @@ pub fn escape<S: AsRef<str>>(input: S) -> String {
144144
} else {
145145
escape_inner(bytes, &mut result);
146146
}
147-
result.push(b'"');
148-
// SAFETY: We only pushed valid UTF-8 bytes (original string bytes and ASCII escape sequences)
149-
unsafe { String::from_utf8_unchecked(result) }
150147
}
151148

152149
#[cfg(target_arch = "aarch64")]
153150
{
154151
#[cfg(feature = "force_aarch64_neon")]
155152
{
156-
return aarch64::escape_neon(input);
153+
aarch64::escape_neon(bytes, &mut result);
154+
}
155+
#[cfg(not(feature = "force_aarch64_neon"))]
156+
{
157+
// on Apple M2 and later, the `bf16` feature is available
158+
// it means they have more registers and can significantly benefit from the SIMD path
159+
// TODO: add support for sve2 chips with wider registers
160+
// github actions ubuntu-24.04-arm runner has 128 bits sve2 registers, it's not enough for the SIMD path
161+
if cfg!(target_os = "macos") && std::arch::is_aarch64_feature_detected!("bf16") {
162+
aarch64::escape_neon(bytes, &mut result);
163+
} else {
164+
escape_inner(bytes, &mut result);
165+
}
166+
}
167+
}
168+
169+
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
170+
{
171+
escape_inner(bytes, &mut result);
172+
}
173+
result.push(b'"');
174+
// SAFETY: We only pushed valid UTF-8 bytes (original string bytes and ASCII escape sequences)
175+
unsafe { String::from_utf8_unchecked(result) }
176+
}
177+
178+
/// Main entry point for JSON string escaping with SIMD acceleration
179+
/// If the platform is supported, the SIMD path will be used. Otherwise, the generic fallback will be used.
180+
pub fn escape_into<S: AsRef<str>>(input: S, output: &mut Vec<u8>) {
181+
use generic::escape_inner;
182+
183+
output.push(b'"');
184+
let s = input.as_ref();
185+
let bytes = s.as_bytes();
186+
#[cfg(target_arch = "x86_64")]
187+
{
188+
let len = bytes.len();
189+
// Runtime CPU feature detection for x86_64
190+
if is_x86_feature_detected!("avx512f")
191+
&& is_x86_feature_detected!("avx512bw")
192+
&& len >= x86::LOOP_SIZE_AVX512
193+
{
194+
unsafe { x86::escape_avx512(bytes, output) }
195+
} else if is_x86_feature_detected!("avx2") && len >= x86::LOOP_SIZE_AVX2 {
196+
unsafe { x86::escape_avx2(bytes, output) }
197+
} else if is_x86_feature_detected!("sse2")
198+
&& /* if len < 128, no need to use simd */
199+
len >= x86::LOOP_SIZE_AVX2
200+
{
201+
unsafe { x86::escape_sse2(bytes, output) }
202+
} else {
203+
escape_inner(bytes, output);
204+
}
205+
}
206+
207+
#[cfg(target_arch = "aarch64")]
208+
{
209+
#[cfg(feature = "force_aarch64_neon")]
210+
{
211+
return aarch64::escape_neon(bytes, output);
157212
}
158213
#[cfg(not(feature = "force_aarch64_neon"))]
159214
{
@@ -162,15 +217,18 @@ pub fn escape<S: AsRef<str>>(input: S) -> String {
162217
// TODO: add support for sve2 chips with wider registers
163218
// github actions ubuntu-24.04-arm runner has 128 bits sve2 registers, it's not enough for the SIMD path
164219
if cfg!(target_os = "macos") && std::arch::is_aarch64_feature_detected!("bf16") {
165-
returnaarch64::escape_neon(input);
220+
aarch64::escape_neon(bytes, output);
166221
} else {
167-
returnescape_generic(input);
222+
escape_inner(bytes, output);
168223
}
169224
}
170225
}
171226

172227
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
173-
escape_generic(input)
228+
{
229+
escape_into_generic(input, output);
230+
}
231+
output.push(b'"');
174232
}
175233

176234
#[test]
@@ -377,6 +435,9 @@ fn test_rxjs() {
377435
assert!(!sources.is_empty());
378436
for source in sources {
379437
assert_eq!(escape(&source), serde_json::to_string(&source).unwrap());
438+
let mut output = String::new();
439+
escape_into(&source, unsafe { output.as_mut_vec() });
440+
assert_eq!(output, serde_json::to_string(&source).unwrap());
380441
}
381442
}
382443

@@ -402,5 +463,8 @@ fn test_sources() {
402463
assert!(!sources.is_empty());
403464
for source in sources {
404465
assert_eq!(escape(&source), serde_json::to_string(&source).unwrap());
466+
let mut output = String::new();
467+
escape_into(&source, unsafe { output.as_mut_vec() });
468+
assert_eq!(output, serde_json::to_string(&source).unwrap());
405469
}
406470
}

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /