Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit aa6bd55

Browse files
Rollup merge of #147113 - heiher:src-analysis-lsx, r=lqd
Reland "Add LSX accelerated implementation for source file analysis" This patch introduces an LSX-optimized version of `analyze_source_file` for the `loongarch64` target. Similar to existing SSE2 implementation for x86, this version: - Processes 16-byte chunks at a time using LSX vector intrinsics. - Quickly identifies newlines in ASCII-only chunks. - Falls back to the generic implementation when multi-byte UTF-8 characters are detected or in the tail portion. Reland #145963 r? ``@lqd``
2 parents 750e902 + c1259aa commit aa6bd55

File tree

2 files changed

+107
-3
lines changed

2 files changed

+107
-3
lines changed

‎compiler/rustc_span/src/analyze_source_file.rs‎

Lines changed: 106 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,8 @@ cfg_select! {
8181
// use `loadu`, which supports unaligned loading.
8282
let chunk = unsafe { _mm_loadu_si128(chunk.as_ptr() as *const __m128i) };
8383

84-
// For character in the chunk, see if its byte value is < 0, which
85-
// indicates that it's part of a UTF-8 char.
84+
// For each character in the chunk, see if its byte value is < 0,
85+
// which indicates that it's part of a UTF-8 char.
8686
let multibyte_test = _mm_cmplt_epi8(chunk, _mm_set1_epi8(0));
8787
// Create a bit mask from the comparison results.
8888
let multibyte_mask = _mm_movemask_epi8(multibyte_test);
@@ -132,8 +132,111 @@ cfg_select! {
132132
}
133133
}
134134
}
135+
target_arch = "loongarch64" => {
136+
fn analyze_source_file_dispatch(
137+
src: &str,
138+
lines: &mut Vec<RelativeBytePos>,
139+
multi_byte_chars: &mut Vec<MultiByteChar>,
140+
) {
141+
use std::arch::is_loongarch_feature_detected;
142+
143+
if is_loongarch_feature_detected!("lsx") {
144+
unsafe {
145+
analyze_source_file_lsx(src, lines, multi_byte_chars);
146+
}
147+
} else {
148+
analyze_source_file_generic(
149+
src,
150+
src.len(),
151+
RelativeBytePos::from_u32(0),
152+
lines,
153+
multi_byte_chars,
154+
);
155+
}
156+
}
157+
158+
/// Checks 16 byte chunks of text at a time. If the chunk contains
159+
/// something other than printable ASCII characters and newlines, the
160+
/// function falls back to the generic implementation. Otherwise it uses
161+
/// LSX intrinsics to quickly find all newlines.
162+
#[target_feature(enable = "lsx")]
163+
unsafe fn analyze_source_file_lsx(
164+
src: &str,
165+
lines: &mut Vec<RelativeBytePos>,
166+
multi_byte_chars: &mut Vec<MultiByteChar>,
167+
) {
168+
use std::arch::loongarch64::*;
169+
170+
const CHUNK_SIZE: usize = 16;
171+
172+
let (chunks, tail) = src.as_bytes().as_chunks::<CHUNK_SIZE>();
173+
174+
// This variable keeps track of where we should start decoding a
175+
// chunk. If a multi-byte character spans across chunk boundaries,
176+
// we need to skip that part in the next chunk because we already
177+
// handled it.
178+
let mut intra_chunk_offset = 0;
179+
180+
for (chunk_index, chunk) in chunks.iter().enumerate() {
181+
// All LSX memory instructions support unaligned access, so using
182+
// vld is fine.
183+
let chunk = unsafe { lsx_vld::<0>(chunk.as_ptr() as *const i8) };
184+
185+
// For each character in the chunk, see if its byte value is < 0,
186+
// which indicates that it's part of a UTF-8 char.
187+
let multibyte_mask = lsx_vmskltz_b(chunk);
188+
// Create a bit mask from the comparison results.
189+
let multibyte_mask = lsx_vpickve2gr_w::<0>(multibyte_mask);
190+
191+
// If the bit mask is all zero, we only have ASCII chars here:
192+
if multibyte_mask == 0 {
193+
assert!(intra_chunk_offset == 0);
194+
195+
// Check for newlines in the chunk
196+
let newlines_test = lsx_vseqi_b::<{b'\n' as i32}>(chunk);
197+
let newlines_mask = lsx_vmskltz_b(newlines_test);
198+
let mut newlines_mask = lsx_vpickve2gr_w::<0>(newlines_mask);
199+
200+
let output_offset = RelativeBytePos::from_usize(chunk_index * CHUNK_SIZE + 1);
201+
202+
while newlines_mask != 0 {
203+
let index = newlines_mask.trailing_zeros();
204+
205+
lines.push(RelativeBytePos(index) + output_offset);
206+
207+
// Clear the bit, so we can find the next one.
208+
newlines_mask &= newlines_mask - 1;
209+
}
210+
} else {
211+
// The slow path.
212+
// There are multibyte chars in here, fallback to generic decoding.
213+
let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
214+
intra_chunk_offset = analyze_source_file_generic(
215+
&src[scan_start..],
216+
CHUNK_SIZE - intra_chunk_offset,
217+
RelativeBytePos::from_usize(scan_start),
218+
lines,
219+
multi_byte_chars,
220+
);
221+
}
222+
}
223+
224+
// There might still be a tail left to analyze
225+
let tail_start = src.len() - tail.len() + intra_chunk_offset;
226+
if tail_start < src.len() {
227+
analyze_source_file_generic(
228+
&src[tail_start..],
229+
src.len() - tail_start,
230+
RelativeBytePos::from_usize(tail_start),
231+
lines,
232+
multi_byte_chars,
233+
);
234+
}
235+
}
236+
}
135237
_ => {
136-
// The target (or compiler version) does not support SSE2 ...
238+
// The target (or compiler version) does not support vector instructions
239+
// our specialized implementations need (x86 SSE2, loongarch64 LSX)...
137240
fn analyze_source_file_dispatch(
138241
src: &str,
139242
lines: &mut Vec<RelativeBytePos>,

‎compiler/rustc_span/src/lib.rs‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
1818
// tidy-alphabetical-start
1919
#![allow(internal_features)]
20+
#![cfg_attr(target_arch = "loongarch64", feature(stdarch_loongarch))]
2021
#![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")]
2122
#![doc(rust_logo)]
2223
#![feature(array_windows)]

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /