Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 2c2bb99

Browse files
committed
Auto merge of #140999 - hkBst:update-escaper, r=nnethercote
update to literal-escaper 0.0.4 for better API without `unreachable` and faster string parsing This is the replacement for just the part of #138163 dealing with the changed API of unescape functionality, since that got moved into its own crate. <del>This uses an unpublished version of literal-escaper (https://github.com/rust-lang/literal-escaper/pull/8).</del> r? `@nnethercote`
2 parents 3de5b08 + 707a6f5 commit 2c2bb99

File tree

16 files changed

+67
-94
lines changed

16 files changed

+67
-94
lines changed

‎Cargo.lock‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3175,9 +3175,9 @@ checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
31753175

31763176
[[package]]
31773177
name = "rustc-literal-escaper"
3178-
version = "0.0.2"
3178+
version = "0.0.4"
31793179
source = "registry+https://github.com/rust-lang/crates.io-index"
3180-
checksum = "0041b6238913c41fe704213a4a9329e2f685a156d1781998128b4149c230ad04"
3180+
checksum = "ab03008eb631b703dd16978282ae36c73282e7922fe101a4bd072a40ecea7b8b"
31813181

31823182
[[package]]
31833183
name = "rustc-main"

‎Cargo.toml‎

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,3 +89,8 @@ codegen-units = 1
8989
# FIXME: LTO cannot be enabled for binaries in a workspace
9090
# <https://github.com/rust-lang/cargo/issues/9330>
9191
# lto = true
92+
93+
# If you want to use a crate with local modifications, you can set a path or git dependency here.
94+
# For git dependencies, also add your source to ALLOWED_SOURCES in src/tools/tidy/src/extdeps.rs.
95+
#[patch.crates-io]
96+

‎compiler/rustc_ast/Cargo.toml‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ edition = "2024"
77
# tidy-alphabetical-start
88
bitflags = "2.4.1"
99
memchr = "2.7.4"
10-
rustc-literal-escaper = "0.0.2"
10+
rustc-literal-escaper = "0.0.4"
1111
rustc_ast_ir = { path = "../rustc_ast_ir" }
1212
rustc_data_structures = { path = "../rustc_data_structures" }
1313
rustc_index = { path = "../rustc_index" }

‎compiler/rustc_ast/src/util/literal.rs‎

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
use std::{ascii, fmt, str};
44

55
use rustc_literal_escaper::{
6-
MixedUnit, Mode, byte_from_char, unescape_byte, unescape_char, unescape_mixed, unescape_unicode,
6+
MixedUnit, unescape_byte, unescape_byte_str, unescape_c_str, unescape_char, unescape_str,
77
};
88
use rustc_span::{Span, Symbol, kw, sym};
99
use tracing::debug;
@@ -87,11 +87,10 @@ impl LitKind {
8787
// Force-inlining here is aggressive but the closure is
8888
// called on every char in the string, so it can be hot in
8989
// programs with many long strings containing escapes.
90-
unescape_unicode(
90+
unescape_str(
9191
s,
92-
Mode::Str,
93-
&mut #[inline(always)]
94-
|_, c| match c {
92+
#[inline(always)]
93+
|_, res| match res {
9594
Ok(c) => buf.push(c),
9695
Err(err) => {
9796
assert!(!err.is_fatal(), "failed to unescape string literal")
@@ -111,8 +110,8 @@ impl LitKind {
111110
token::ByteStr => {
112111
let s = symbol.as_str();
113112
let mut buf = Vec::with_capacity(s.len());
114-
unescape_unicode(s, Mode::ByteStr,&mut|_, c| match c {
115-
Ok(c) => buf.push(byte_from_char(c)),
113+
unescape_byte_str(s, |_, res| match res {
114+
Ok(b) => buf.push(b),
116115
Err(err) => {
117116
assert!(!err.is_fatal(), "failed to unescape string literal")
118117
}
@@ -128,7 +127,7 @@ impl LitKind {
128127
token::CStr => {
129128
let s = symbol.as_str();
130129
let mut buf = Vec::with_capacity(s.len());
131-
unescape_mixed(s,Mode::CStr,&mut |_span, c| match c {
130+
unescape_c_str(s, |_span, c| match c {
132131
Ok(MixedUnit::Char(c)) => {
133132
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
134133
}

‎compiler/rustc_parse/Cargo.toml‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ edition = "2024"
66
[dependencies]
77
# tidy-alphabetical-start
88
bitflags = "2.4.1"
9-
rustc-literal-escaper = "0.0.2"
9+
rustc-literal-escaper = "0.0.4"
1010
rustc_ast = { path = "../rustc_ast" }
1111
rustc_ast_pretty = { path = "../rustc_ast_pretty" }
1212
rustc_data_structures = { path = "../rustc_data_structures" }

‎compiler/rustc_parse/src/lexer/mod.rs‎

Lines changed: 32 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
use std::ops::Range;
2-
31
use diagnostics::make_unclosed_delims_error;
42
use rustc_ast::ast::{self, AttrStyle};
53
use rustc_ast::token::{self, CommentKind, Delimiter, IdentIsRaw, Token, TokenKind};
@@ -10,7 +8,7 @@ use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey};
108
use rustc_lexer::{
119
Base, Cursor, DocStyle, FrontmatterAllowed, LiteralKind, RawStrError, is_whitespace,
1210
};
13-
use rustc_literal_escaper::{EscapeError, Mode, unescape_mixed, unescape_unicode};
11+
use rustc_literal_escaper::{EscapeError, Mode, check_for_errors};
1412
use rustc_session::lint::BuiltinLintDiag;
1513
use rustc_session::lint::builtin::{
1614
RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX,
@@ -702,7 +700,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
702700
}
703701
err.emit()
704702
}
705-
self.cook_unicode(token::Char, Mode::Char, start, end, 1, 1) // ' '
703+
self.cook_quoted(token::Char, Mode::Char, start, end, 1, 1) // ' '
706704
}
707705
rustc_lexer::LiteralKind::Byte { terminated } => {
708706
if !terminated {
@@ -714,7 +712,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
714712
.with_code(E0763)
715713
.emit()
716714
}
717-
self.cook_unicode(token::Byte, Mode::Byte, start, end, 2, 1) // b' '
715+
self.cook_quoted(token::Byte, Mode::Byte, start, end, 2, 1) // b' '
718716
}
719717
rustc_lexer::LiteralKind::Str { terminated } => {
720718
if !terminated {
@@ -726,7 +724,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
726724
.with_code(E0765)
727725
.emit()
728726
}
729-
self.cook_unicode(token::Str, Mode::Str, start, end, 1, 1) // " "
727+
self.cook_quoted(token::Str, Mode::Str, start, end, 1, 1) // " "
730728
}
731729
rustc_lexer::LiteralKind::ByteStr { terminated } => {
732730
if !terminated {
@@ -738,7 +736,8 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
738736
.with_code(E0766)
739737
.emit()
740738
}
741-
self.cook_unicode(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" "
739+
self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1)
740+
// b" "
742741
}
743742
rustc_lexer::LiteralKind::CStr { terminated } => {
744743
if !terminated {
@@ -750,13 +749,14 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
750749
.with_code(E0767)
751750
.emit()
752751
}
753-
self.cook_mixed(token::CStr, Mode::CStr, start, end, 2, 1) // c" "
752+
self.cook_quoted(token::CStr, Mode::CStr, start, end, 2, 1) // c" "
754753
}
755754
rustc_lexer::LiteralKind::RawStr { n_hashes } => {
756755
if let Some(n_hashes) = n_hashes {
757756
let n = u32::from(n_hashes);
758757
let kind = token::StrRaw(n_hashes);
759-
self.cook_unicode(kind, Mode::RawStr, start, end, 2 + n, 1 + n) // r##" "##
758+
self.cook_quoted(kind, Mode::RawStr, start, end, 2 + n, 1 + n)
759+
// r##" "##
760760
} else {
761761
self.report_raw_str_error(start, 1);
762762
}
@@ -765,7 +765,8 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
765765
if let Some(n_hashes) = n_hashes {
766766
let n = u32::from(n_hashes);
767767
let kind = token::ByteStrRaw(n_hashes);
768-
self.cook_unicode(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n) // br##" "##
768+
self.cook_quoted(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n)
769+
// br##" "##
769770
} else {
770771
self.report_raw_str_error(start, 2);
771772
}
@@ -774,7 +775,8 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
774775
if let Some(n_hashes) = n_hashes {
775776
let n = u32::from(n_hashes);
776777
let kind = token::CStrRaw(n_hashes);
777-
self.cook_unicode(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "##
778+
self.cook_quoted(kind, Mode::RawCStr, start, end, 3 + n, 1 + n)
779+
// cr##" "##
778780
} else {
779781
self.report_raw_str_error(start, 2);
780782
}
@@ -1091,40 +1093,36 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
10911093
self.dcx().emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num });
10921094
}
10931095

1094-
fn cook_common(
1096+
fn cook_quoted(
10951097
&self,
10961098
mut kind: token::LitKind,
10971099
mode: Mode,
10981100
start: BytePos,
10991101
end: BytePos,
11001102
prefix_len: u32,
11011103
postfix_len: u32,
1102-
unescape: fn(&str, Mode, &mut dyn FnMut(Range<usize>, Result<(), EscapeError>)),
11031104
) -> (token::LitKind, Symbol) {
11041105
let content_start = start + BytePos(prefix_len);
11051106
let content_end = end - BytePos(postfix_len);
11061107
let lit_content = self.str_from_to(content_start, content_end);
1107-
unescape(lit_content, mode, &mut |range, result| {
1108-
// Here we only check for errors. The actual unescaping is done later.
1109-
if let Err(err) = result {
1110-
let span_with_quotes = self.mk_sp(start, end);
1111-
let (start, end) = (range.start as u32, range.end as u32);
1112-
let lo = content_start + BytePos(start);
1113-
let hi = lo + BytePos(end - start);
1114-
let span = self.mk_sp(lo, hi);
1115-
let is_fatal = err.is_fatal();
1116-
if let Some(guar) = emit_unescape_error(
1117-
self.dcx(),
1118-
lit_content,
1119-
span_with_quotes,
1120-
span,
1121-
mode,
1122-
range,
1123-
err,
1124-
) {
1125-
assert!(is_fatal);
1126-
kind = token::Err(guar);
1127-
}
1108+
check_for_errors(lit_content, mode, |range, err| {
1109+
let span_with_quotes = self.mk_sp(start, end);
1110+
let (start, end) = (range.start as u32, range.end as u32);
1111+
let lo = content_start + BytePos(start);
1112+
let hi = lo + BytePos(end - start);
1113+
let span = self.mk_sp(lo, hi);
1114+
let is_fatal = err.is_fatal();
1115+
if let Some(guar) = emit_unescape_error(
1116+
self.dcx(),
1117+
lit_content,
1118+
span_with_quotes,
1119+
span,
1120+
mode,
1121+
range,
1122+
err,
1123+
) {
1124+
assert!(is_fatal);
1125+
kind = token::Err(guar);
11281126
}
11291127
});
11301128

@@ -1137,34 +1135,6 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
11371135
};
11381136
(kind, sym)
11391137
}
1140-
1141-
fn cook_unicode(
1142-
&self,
1143-
kind: token::LitKind,
1144-
mode: Mode,
1145-
start: BytePos,
1146-
end: BytePos,
1147-
prefix_len: u32,
1148-
postfix_len: u32,
1149-
) -> (token::LitKind, Symbol) {
1150-
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
1151-
unescape_unicode(src, mode, &mut |span, result| callback(span, result.map(drop)))
1152-
})
1153-
}
1154-
1155-
fn cook_mixed(
1156-
&self,
1157-
kind: token::LitKind,
1158-
mode: Mode,
1159-
start: BytePos,
1160-
end: BytePos,
1161-
prefix_len: u32,
1162-
postfix_len: u32,
1163-
) -> (token::LitKind, Symbol) {
1164-
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
1165-
unescape_mixed(src, mode, &mut |span, result| callback(span, result.map(drop)))
1166-
})
1167-
}
11681138
}
11691139

11701140
pub fn nfc_normalize(string: &str) -> Symbol {

‎compiler/rustc_parse_format/Cargo.toml‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ edition = "2024"
55

66
[dependencies]
77
# tidy-alphabetical-start
8-
rustc-literal-escaper = "0.0.2"
8+
rustc-literal-escaper = "0.0.4"
99
rustc_lexer = { path = "../rustc_lexer" }
1010
# tidy-alphabetical-end
1111

‎compiler/rustc_parse_format/src/lib.rs‎

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ use std::ops::Range;
2020
pub use Alignment::*;
2121
pub use Count::*;
2222
pub use Position::*;
23-
use rustc_literal_escaper::{Mode, unescape_unicode};
2423

2524
/// The type of format string that we are parsing.
2625
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
@@ -320,7 +319,7 @@ impl<'input> Parser<'input> {
320319
let without_quotes = &snippet[1..snippet.len() - 1];
321320
let (mut ok, mut vec) = (true, vec![]);
322321
let mut chars = input.chars();
323-
unescape_unicode(without_quotes,Mode::Str,&mut |range, res| match res {
322+
rustc_literal_escaper::unescape_str(without_quotes, |range, res| match res {
324323
Ok(ch) if ok && chars.next().is_some_and(|c| ch == c) => {
325324
vec.push((range, ch));
326325
}

‎compiler/rustc_proc_macro/Cargo.toml‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ test = false
1515
doctest = false
1616

1717
[dependencies]
18-
rustc-literal-escaper = "0.0.2"
18+
rustc-literal-escaper = "0.0.4"
1919

2020
[features]
2121
rustc-dep-of-std = []

‎library/Cargo.lock‎

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -271,10 +271,11 @@ dependencies = [
271271

272272
[[package]]
273273
name = "rustc-literal-escaper"
274-
version = "0.0.2"
274+
version = "0.0.4"
275275
source = "registry+https://github.com/rust-lang/crates.io-index"
276-
checksum = "0041b6238913c41fe704213a4a9329e2f685a156d1781998128b4149c230ad04"
276+
checksum = "ab03008eb631b703dd16978282ae36c73282e7922fe101a4bd072a40ecea7b8b"
277277
dependencies = [
278+
"rustc-std-workspace-core",
278279
"rustc-std-workspace-std",
279280
]
280281

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /