Commit aa6bd55

authored

Rollup merge of #147113 - heiher:src-analysis-lsx, r=lqd

Reland "Add LSX accelerated implementation for source file analysis" This patch introduces an LSX-optimized version of `analyze_source_file` for the `loongarch64` target. Similar to existing SSE2 implementation for x86, this version: - Processes 16-byte chunks at a time using LSX vector intrinsics. - Quickly identifies newlines in ASCII-only chunks. - Falls back to the generic implementation when multi-byte UTF-8 characters are detected or in the tail portion. Reland #145963 r? ``@lqd``

2 parents 750e902 + c1259aa commit aa6bd55Copy full SHA for aa6bd55

File tree

2 files changed

+107

-3

lines changed

compiler/rustc_span/src
- analyze_source_file.rs
- lib.rs

2 files changed

+107

-3

lines changed

`‎compiler/rustc_span/src/analyze_source_file.rs‎`

Lines changed: 106 additions & 3 deletions

Original file line number	Diff line number	Diff line change
`@@ -81,8 +81,8 @@ cfg_select! {`
`81`	`81`	// use `loadu`, which supports unaligned loading.
`82`	`82`	`let chunk = unsafe { _mm_loadu_si128(chunk.as_ptr() as *const __m128i) };`
`83`	`83`
`84`		`- // For character in the chunk, see if its byte value is < 0, which`
`85`		`- // indicates that it's part of a UTF-8 char.`
	`84`	`+ // For each character in the chunk, see if its byte value is < 0,`
	`85`	`+ // which indicates that it's part of a UTF-8 char.`
`86`	`86`	`let multibyte_test = _mm_cmplt_epi8(chunk, _mm_set1_epi8(0));`
`87`	`87`	`// Create a bit mask from the comparison results.`
`88`	`88`	`let multibyte_mask = _mm_movemask_epi8(multibyte_test);`
`@@ -132,8 +132,111 @@ cfg_select! {`
`132`	`132`	`}`
`133`	`133`	`}`
`134`	`134`	`}`
	`135`	`+ target_arch = "loongarch64" => {`
	`136`	`+ fn analyze_source_file_dispatch(`
	`137`	`+ src: &str,`
	`138`	`+ lines: &mut Vec<RelativeBytePos>,`
	`139`	`+ multi_byte_chars: &mut Vec<MultiByteChar>,`
	`140`	`+ ) {`
	`141`	`+ use std::arch::is_loongarch_feature_detected;`
	`142`	`+`
	`143`	`+ if is_loongarch_feature_detected!("lsx") {`
	`144`	`+ unsafe {`
	`145`	`+ analyze_source_file_lsx(src, lines, multi_byte_chars);`
	`146`	`+ }`
	`147`	`+ } else {`
	`148`	`+ analyze_source_file_generic(`
	`149`	`+ src,`
	`150`	`+ src.len(),`
	`151`	`+ RelativeBytePos::from_u32(0),`
	`152`	`+ lines,`
	`153`	`+ multi_byte_chars,`
	`154`	`+ );`
	`155`	`+ }`
	`156`	`+ }`
	`157`	`+`
	`158`	`+ /// Checks 16 byte chunks of text at a time. If the chunk contains`
	`159`	`+ /// something other than printable ASCII characters and newlines, the`
	`160`	`+ /// function falls back to the generic implementation. Otherwise it uses`
	`161`	`+ /// LSX intrinsics to quickly find all newlines.`
	`162`	`+ #[target_feature(enable = "lsx")]`
	`163`	`+ unsafe fn analyze_source_file_lsx(`
	`164`	`+ src: &str,`
	`165`	`+ lines: &mut Vec<RelativeBytePos>,`
	`166`	`+ multi_byte_chars: &mut Vec<MultiByteChar>,`
	`167`	`+ ) {`
	`168`	`+ use std::arch::loongarch64::*;`
	`169`	`+`
	`170`	`+ const CHUNK_SIZE: usize = 16;`
	`171`	`+`
	`172`	`+ let (chunks, tail) = src.as_bytes().as_chunks::<CHUNK_SIZE>();`
	`173`	`+`
	`174`	`+ // This variable keeps track of where we should start decoding a`
	`175`	`+ // chunk. If a multi-byte character spans across chunk boundaries,`
	`176`	`+ // we need to skip that part in the next chunk because we already`
	`177`	`+ // handled it.`
	`178`	`+ let mut intra_chunk_offset = 0;`
	`179`	`+`
	`180`	`+ for (chunk_index, chunk) in chunks.iter().enumerate() {`
	`181`	`+ // All LSX memory instructions support unaligned access, so using`
	`182`	`+ // vld is fine.`
	`183`	`+ let chunk = unsafe { lsx_vld::<0>(chunk.as_ptr() as *const i8) };`
	`184`	`+`
	`185`	`+ // For each character in the chunk, see if its byte value is < 0,`
	`186`	`+ // which indicates that it's part of a UTF-8 char.`
	`187`	`+ let multibyte_mask = lsx_vmskltz_b(chunk);`
	`188`	`+ // Create a bit mask from the comparison results.`
	`189`	`+ let multibyte_mask = lsx_vpickve2gr_w::<0>(multibyte_mask);`
	`190`	`+`
	`191`	`+ // If the bit mask is all zero, we only have ASCII chars here:`
	`192`	`+ if multibyte_mask == 0 {`
	`193`	`+ assert!(intra_chunk_offset == 0);`
	`194`	`+`
	`195`	`+ // Check for newlines in the chunk`
	`196`	`+ let newlines_test = lsx_vseqi_b::<{b'\n' as i32}>(chunk);`
	`197`	`+ let newlines_mask = lsx_vmskltz_b(newlines_test);`
	`198`	`+ let mut newlines_mask = lsx_vpickve2gr_w::<0>(newlines_mask);`
	`199`	`+`
	`200`	`+ let output_offset = RelativeBytePos::from_usize(chunk_index * CHUNK_SIZE + 1);`
	`201`	`+`
	`202`	`+ while newlines_mask != 0 {`
	`203`	`+ let index = newlines_mask.trailing_zeros();`
	`204`	`+`
	`205`	`+ lines.push(RelativeBytePos(index) + output_offset);`
	`206`	`+`
	`207`	`+ // Clear the bit, so we can find the next one.`
	`208`	`+ newlines_mask &= newlines_mask - 1;`
	`209`	`+ }`
	`210`	`+ } else {`
	`211`	`+ // The slow path.`
	`212`	`+ // There are multibyte chars in here, fallback to generic decoding.`
	`213`	`+ let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;`
	`214`	`+ intra_chunk_offset = analyze_source_file_generic(`
	`215`	`+ &src[scan_start..],`
	`216`	`+ CHUNK_SIZE - intra_chunk_offset,`
	`217`	`+ RelativeBytePos::from_usize(scan_start),`
	`218`	`+ lines,`
	`219`	`+ multi_byte_chars,`
	`220`	`+ );`
	`221`	`+ }`
	`222`	`+ }`
	`223`	`+`
	`224`	`+ // There might still be a tail left to analyze`
	`225`	`+ let tail_start = src.len() - tail.len() + intra_chunk_offset;`
	`226`	`+ if tail_start < src.len() {`
	`227`	`+ analyze_source_file_generic(`
	`228`	`+ &src[tail_start..],`
	`229`	`+ src.len() - tail_start,`
	`230`	`+ RelativeBytePos::from_usize(tail_start),`
	`231`	`+ lines,`
	`232`	`+ multi_byte_chars,`
	`233`	`+ );`
	`234`	`+ }`
	`235`	`+ }`
	`236`	`+ }`
`135`	`237`	`_ => {`
`136`		`- // The target (or compiler version) does not support SSE2 ...`
	`238`	`+ // The target (or compiler version) does not support vector instructions`
	`239`	`+ // our specialized implementations need (x86 SSE2, loongarch64 LSX)...`
`137`	`240`	`fn analyze_source_file_dispatch(`
`138`	`241`	`src: &str,`
`139`	`242`	`lines: &mut Vec<RelativeBytePos>,`

`‎compiler/rustc_span/src/lib.rs‎`

Lines changed: 1 addition & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -17,6 +17,7 @@`
`17`	`17`
`18`	`18`	`// tidy-alphabetical-start`
`19`	`19`	`#![allow(internal_features)]`
	`20`	`+#![cfg_attr(target_arch = "loongarch64", feature(stdarch_loongarch))]`
`20`	`21`	`#![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")]`
`21`	`22`	`#![doc(rust_logo)]`
`22`	`23`	`#![feature(array_windows)]`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit aa6bd55

File tree

2 files changed

2 files changed

`‎compiler/rustc_span/src/analyze_source_file.rs‎`

`‎compiler/rustc_span/src/lib.rs‎`

0 commit comments