Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 6988a8f

Browse files
committed
Auto merge of #141875 - nnethercote:ByteSymbol, r=petrochenkov
Introduce `ByteSymbol` It's like `Symbol` but for byte strings. The interner is now used for both `Symbol` and `ByteSymbol`. E.g. if you intern `"dog"` and `b"dog"` you'll get a `Symbol` and a `ByteSymbol` with the same index and the characters will only be stored once. The motivation for this is to eliminate the `Arc`s in `ast::LitKind`, to make `ast::LitKind` impl `Copy`, and to avoid the need to arena-allocate `ast::LitKind` in HIR. The latter change reduces peak memory by a non-trivial amount on literal-heavy benchmarks such as `deep-vector` and `tuple-stress`. `Encoder`, `Decoder`, `SpanEncoder`, and `SpanDecoder` all get some changes so that they can handle normal strings and byte strings.
2 parents fdad98d + 478f828 commit 6988a8f

File tree

46 files changed

+447
-267
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+447
-267
lines changed

‎compiler/rustc_ast/src/ast.rs‎

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
//! - [`UnOp`], [`BinOp`], and [`BinOpKind`]: Unary and binary operators.
2020
2121
use std::borrow::Cow;
22-
use std::sync::Arc;
2322
use std::{cmp, fmt};
2423

2524
pub use GenericArgs::*;
@@ -32,7 +31,7 @@ use rustc_data_structures::tagged_ptr::Tag;
3231
use rustc_macros::{Decodable, Encodable, HashStable_Generic};
3332
pub use rustc_span::AttrId;
3433
use rustc_span::source_map::{Spanned, respan};
35-
use rustc_span::{DUMMY_SP, ErrorGuaranteed, Ident, Span, Symbol, kw, sym};
34+
use rustc_span::{ByteSymbol,DUMMY_SP, ErrorGuaranteed, Ident, Span, Symbol, kw, sym};
3635
use thin_vec::{ThinVec, thin_vec};
3736

3837
pub use crate::format::*;
@@ -1805,10 +1804,17 @@ pub enum ExprKind {
18051804
Become(P<Expr>),
18061805

18071806
/// Bytes included via `include_bytes!`
1807+
///
18081808
/// Added for optimization purposes to avoid the need to escape
18091809
/// large binary blobs - should always behave like [`ExprKind::Lit`]
18101810
/// with a `ByteStr` literal.
1811-
IncludedBytes(Arc<[u8]>),
1811+
///
1812+
/// The value is stored as a `ByteSymbol`. It's unfortunate that we need to
1813+
/// intern (hash) the bytes because they're likely to be large and unique.
1814+
/// But it's necessary because this will eventually be lowered to
1815+
/// `LitKind::ByteStr`, which needs a `ByteSymbol` to impl `Copy` and avoid
1816+
/// arena allocation.
1817+
IncludedBytes(ByteSymbol),
18121818

18131819
/// A `format_args!()` expression.
18141820
FormatArgs(P<FormatArgs>),
@@ -2066,7 +2072,7 @@ impl YieldKind {
20662072
}
20672073

20682074
/// A literal in a meta item.
2069-
#[derive(Clone, Encodable, Decodable, Debug, HashStable_Generic)]
2075+
#[derive(Clone, Copy,Encodable, Decodable, Debug, HashStable_Generic)]
20702076
pub struct MetaItemLit {
20712077
/// The original literal as written in the source code.
20722078
pub symbol: Symbol,
@@ -2129,16 +2135,18 @@ pub enum LitFloatType {
21292135
/// deciding the `LitKind`. This means that float literals like `1f32` are
21302136
/// classified by this type as `Float`. This is different to `token::LitKind`
21312137
/// which does *not* consider the suffix.
2132-
#[derive(Clone, Encodable, Decodable, Debug, Hash, Eq, PartialEq, HashStable_Generic)]
2138+
#[derive(Clone, Copy,Encodable, Decodable, Debug, Hash, Eq, PartialEq, HashStable_Generic)]
21332139
pub enum LitKind {
21342140
/// A string literal (`"foo"`). The symbol is unescaped, and so may differ
21352141
/// from the original token's symbol.
21362142
Str(Symbol, StrStyle),
2137-
/// A byte string (`b"foo"`). Not stored as a symbol because it might be
2138-
/// non-utf8, and symbols only allow utf8 strings.
2139-
ByteStr(Arc<[u8]>, StrStyle),
2140-
/// A C String (`c"foo"`). Guaranteed to only have `0円` at the end.
2141-
CStr(Arc<[u8]>, StrStyle),
2143+
/// A byte string (`b"foo"`). The symbol is unescaped, and so may differ
2144+
/// from the original token's symbol.
2145+
ByteStr(ByteSymbol, StrStyle),
2146+
/// A C String (`c"foo"`). Guaranteed to only have `0円` at the end. The
2147+
/// symbol is unescaped, and so may differ from the original token's
2148+
/// symbol.
2149+
CStr(ByteSymbol, StrStyle),
21422150
/// A byte char (`b'f'`).
21432151
Byte(u8),
21442152
/// A character literal (`'a'`).

‎compiler/rustc_ast/src/util/literal.rs‎

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use std::{ascii, fmt, str};
55
use rustc_literal_escaper::{
66
MixedUnit, unescape_byte, unescape_byte_str, unescape_c_str, unescape_char, unescape_str,
77
};
8-
use rustc_span::{Span, Symbol, kw, sym};
8+
use rustc_span::{ByteSymbol,Span, Symbol, kw, sym};
99
use tracing::debug;
1010

1111
use crate::ast::{self, LitKind, MetaItemLit, StrStyle};
@@ -116,13 +116,12 @@ impl LitKind {
116116
assert!(!err.is_fatal(), "failed to unescape string literal")
117117
}
118118
});
119-
LitKind::ByteStr(buf.into(), StrStyle::Cooked)
119+
LitKind::ByteStr(ByteSymbol::intern(&buf), StrStyle::Cooked)
120120
}
121121
token::ByteStrRaw(n) => {
122-
// Raw strings have no escapes so we can convert the symbol
123-
// directly to a `Arc<u8>`.
122+
// Raw byte strings have no escapes so no work is needed here.
124123
let buf = symbol.as_str().to_owned().into_bytes();
125-
LitKind::ByteStr(buf.into(), StrStyle::Raw(n))
124+
LitKind::ByteStr(ByteSymbol::intern(&buf), StrStyle::Raw(n))
126125
}
127126
token::CStr => {
128127
let s = symbol.as_str();
@@ -137,15 +136,15 @@ impl LitKind {
137136
}
138137
});
139138
buf.push(0);
140-
LitKind::CStr(buf.into(), StrStyle::Cooked)
139+
LitKind::CStr(ByteSymbol::intern(&buf), StrStyle::Cooked)
141140
}
142141
token::CStrRaw(n) => {
143142
// Raw strings have no escapes so we can convert the symbol
144143
// directly to a `Arc<u8>` after appending the terminating NUL
145144
// char.
146145
let mut buf = symbol.as_str().to_owned().into_bytes();
147146
buf.push(0);
148-
LitKind::CStr(buf.into(), StrStyle::Raw(n))
147+
LitKind::CStr(ByteSymbol::intern(&buf), StrStyle::Raw(n))
149148
}
150149
token::Err(guar) => LitKind::Err(guar),
151150
})
@@ -167,12 +166,12 @@ impl fmt::Display for LitKind {
167166
delim = "#".repeat(n as usize),
168167
string = sym
169168
)?,
170-
LitKind::ByteStr(ref bytes, StrStyle::Cooked) => {
171-
write!(f, "b\"{}\"", escape_byte_str_symbol(bytes))?
169+
LitKind::ByteStr(ref byte_sym, StrStyle::Cooked) => {
170+
write!(f, "b\"{}\"", escape_byte_str_symbol(byte_sym.as_byte_str()))?
172171
}
173-
LitKind::ByteStr(ref bytes, StrStyle::Raw(n)) => {
172+
LitKind::ByteStr(ref byte_sym, StrStyle::Raw(n)) => {
174173
// Unwrap because raw byte string literals can only contain ASCII.
175-
let symbol = str::from_utf8(bytes).unwrap();
174+
let symbol = str::from_utf8(byte_sym.as_byte_str()).unwrap();
176175
write!(
177176
f,
178177
"br{delim}\"{string}\"{delim}",
@@ -181,11 +180,11 @@ impl fmt::Display for LitKind {
181180
)?;
182181
}
183182
LitKind::CStr(ref bytes, StrStyle::Cooked) => {
184-
write!(f, "c\"{}\"", escape_byte_str_symbol(bytes))?
183+
write!(f, "c\"{}\"", escape_byte_str_symbol(bytes.as_byte_str()))?
185184
}
186185
LitKind::CStr(ref bytes, StrStyle::Raw(n)) => {
187186
// This can only be valid UTF-8.
188-
let symbol = str::from_utf8(bytes).unwrap();
187+
let symbol = str::from_utf8(bytes.as_byte_str()).unwrap();
189188
write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize),)?;
190189
}
191190
LitKind::Int(n, ty) => {

‎compiler/rustc_ast_lowering/src/expr.rs‎

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -144,11 +144,11 @@ impl<'hir> LoweringContext<'_, 'hir> {
144144
hir::ExprKind::Unary(op, ohs)
145145
}
146146
ExprKind::Lit(token_lit) => hir::ExprKind::Lit(self.lower_lit(token_lit, e.span)),
147-
ExprKind::IncludedBytes(bytes) => {
148-
let lit = self.arena.alloc(respan(
147+
ExprKind::IncludedBytes(byte_sym) => {
148+
let lit = respan(
149149
self.lower_span(e.span),
150-
LitKind::ByteStr(Arc::clone(bytes), StrStyle::Cooked),
151-
));
150+
LitKind::ByteStr(*byte_sym, StrStyle::Cooked),
151+
);
152152
hir::ExprKind::Lit(lit)
153153
}
154154
ExprKind::Cast(expr, ty) => {
@@ -421,19 +421,15 @@ impl<'hir> LoweringContext<'_, 'hir> {
421421
})
422422
}
423423

424-
pub(crate) fn lower_lit(
425-
&mut self,
426-
token_lit: &token::Lit,
427-
span: Span,
428-
) -> &'hir Spanned<LitKind> {
424+
pub(crate) fn lower_lit(&mut self, token_lit: &token::Lit, span: Span) -> hir::Lit {
429425
let lit_kind = match LitKind::from_token_lit(*token_lit) {
430426
Ok(lit_kind) => lit_kind,
431427
Err(err) => {
432428
let guar = report_lit_error(&self.tcx.sess.psess, err, *token_lit, span);
433429
LitKind::Err(guar)
434430
}
435431
};
436-
self.arena.alloc(respan(self.lower_span(span), lit_kind))
432+
respan(self.lower_span(span), lit_kind)
437433
}
438434

439435
fn lower_unop(&mut self, u: UnOp) -> hir::UnOp {
@@ -2141,10 +2137,10 @@ impl<'hir> LoweringContext<'_, 'hir> {
21412137
}
21422138

21432139
fn expr_uint(&mut self, sp: Span, ty: ast::UintTy, value: u128) -> hir::Expr<'hir> {
2144-
let lit = self.arena.alloc(hir::Lit {
2140+
let lit = hir::Lit {
21452141
span: sp,
21462142
node: ast::LitKind::Int(value.into(), ast::LitIntType::Unsigned(ty)),
2147-
});
2143+
};
21482144
self.expr(sp, hir::ExprKind::Lit(lit))
21492145
}
21502146

@@ -2161,9 +2157,7 @@ impl<'hir> LoweringContext<'_, 'hir> {
21612157
}
21622158

21632159
pub(super) fn expr_str(&mut self, sp: Span, value: Symbol) -> hir::Expr<'hir> {
2164-
let lit = self
2165-
.arena
2166-
.alloc(hir::Lit { span: sp, node: ast::LitKind::Str(value, ast::StrStyle::Cooked) });
2160+
let lit = hir::Lit { span: sp, node: ast::LitKind::Str(value, ast::StrStyle::Cooked) };
21672161
self.expr(sp, hir::ExprKind::Lit(lit))
21682162
}
21692163

‎compiler/rustc_ast_lowering/src/pat.rs‎

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -390,19 +390,15 @@ impl<'a, 'hir> LoweringContext<'a, 'hir> {
390390
allow_paths: bool,
391391
) -> &'hir hir::PatExpr<'hir> {
392392
let span = self.lower_span(expr.span);
393-
let err = |guar| hir::PatExprKind::Lit {
394-
lit: self.arena.alloc(respan(span, LitKind::Err(guar))),
395-
negated: false,
396-
};
393+
let err =
394+
|guar| hir::PatExprKind::Lit { lit: respan(span, LitKind::Err(guar)), negated: false };
397395
let kind = match &expr.kind {
398396
ExprKind::Lit(lit) => {
399397
hir::PatExprKind::Lit { lit: self.lower_lit(lit, span), negated: false }
400398
}
401399
ExprKind::ConstBlock(c) => hir::PatExprKind::ConstBlock(self.lower_const_block(c)),
402-
ExprKind::IncludedBytes(bytes) => hir::PatExprKind::Lit {
403-
lit: self
404-
.arena
405-
.alloc(respan(span, LitKind::ByteStr(Arc::clone(bytes), StrStyle::Cooked))),
400+
ExprKind::IncludedBytes(byte_sym) => hir::PatExprKind::Lit {
401+
lit: respan(span, LitKind::ByteStr(*byte_sym, StrStyle::Cooked)),
406402
negated: false,
407403
},
408404
ExprKind::Err(guar) => err(*guar),

‎compiler/rustc_ast_pretty/src/pprust/state/expr.rs‎

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -469,8 +469,12 @@ impl<'a> State<'a> {
469469
ast::ExprKind::Lit(token_lit) => {
470470
self.print_token_literal(*token_lit, expr.span);
471471
}
472-
ast::ExprKind::IncludedBytes(bytes) => {
473-
let lit = token::Lit::new(token::ByteStr, escape_byte_str_symbol(bytes), None);
472+
ast::ExprKind::IncludedBytes(byte_sym) => {
473+
let lit = token::Lit::new(
474+
token::ByteStr,
475+
escape_byte_str_symbol(byte_sym.as_byte_str()),
476+
None,
477+
);
474478
self.print_token_literal(lit, expr.span)
475479
}
476480
ast::ExprKind::Cast(expr, ty) => {

‎compiler/rustc_builtin_macros/src/concat_bytes.rs‎

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -177,15 +177,15 @@ pub(crate) fn expand_concat_bytes(
177177
Ok(LitKind::Byte(val)) => {
178178
accumulator.push(val);
179179
}
180-
Ok(LitKind::ByteStr(ref bytes, _)) => {
181-
accumulator.extend_from_slice(bytes);
180+
Ok(LitKind::ByteStr(ref byte_sym, _)) => {
181+
accumulator.extend_from_slice(byte_sym.as_byte_str());
182182
}
183183
_ => {
184184
guar.get_or_insert_with(|| invalid_type_err(cx, token_lit, e.span, false));
185185
}
186186
},
187-
ExprKind::IncludedBytes(bytes) => {
188-
accumulator.extend_from_slice(bytes);
187+
ExprKind::IncludedBytes(byte_sym) => {
188+
accumulator.extend_from_slice(byte_sym.as_byte_str());
189189
}
190190
ExprKind::Err(guarantee) => {
191191
guar = Some(*guarantee);

‎compiler/rustc_builtin_macros/src/source_util.rs‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use rustc_parse::parser::{ForceCollect, Parser};
1616
use rustc_parse::{new_parser_from_file, unwrap_or_emit_fatal, utf8_error};
1717
use rustc_session::lint::builtin::INCOMPLETE_INCLUDE;
1818
use rustc_span::source_map::SourceMap;
19-
use rustc_span::{Pos, Span, Symbol};
19+
use rustc_span::{ByteSymbol,Pos, Span, Symbol};
2020
use smallvec::SmallVec;
2121

2222
use crate::errors;
@@ -237,7 +237,7 @@ pub(crate) fn expand_include_bytes(
237237
Ok((bytes, _bsp)) => {
238238
// Don't care about getting the span for the raw bytes,
239239
// because the console can't really show them anyway.
240-
let expr = cx.expr(sp, ast::ExprKind::IncludedBytes(bytes));
240+
let expr = cx.expr(sp, ast::ExprKind::IncludedBytes(ByteSymbol::intern(&bytes)));
241241
MacEager::expr(expr)
242242
}
243243
Err(dummy) => dummy,

‎compiler/rustc_expand/src/proc_macro_server.rs‎

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -599,8 +599,12 @@ impl server::TokenStream for Rustc<'_, '_> {
599599
ast::ExprKind::Lit(token_lit) => {
600600
Ok(tokenstream::TokenStream::token_alone(token::Literal(*token_lit), expr.span))
601601
}
602-
ast::ExprKind::IncludedBytes(bytes) => {
603-
let lit = token::Lit::new(token::ByteStr, escape_byte_str_symbol(bytes), None);
602+
ast::ExprKind::IncludedBytes(byte_sym) => {
603+
let lit = token::Lit::new(
604+
token::ByteStr,
605+
escape_byte_str_symbol(byte_sym.as_byte_str()),
606+
None,
607+
);
604608
Ok(tokenstream::TokenStream::token_alone(token::TokenKind::Literal(lit), expr.span))
605609
}
606610
ast::ExprKind::Unary(ast::UnOp::Neg, e) => match &e.kind {

‎compiler/rustc_hir/src/arena.rs‎

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ macro_rules! arena_types {
88
[] asm_template: rustc_ast::InlineAsmTemplatePiece,
99
[] attribute: rustc_hir::Attribute,
1010
[] owner_info: rustc_hir::OwnerInfo<'tcx>,
11-
[] lit: rustc_hir::Lit,
1211
[] macro_def: rustc_ast::MacroDef,
1312
]);
1413
)

‎compiler/rustc_hir/src/hir.rs‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1807,7 +1807,7 @@ pub struct PatExpr<'hir> {
18071807
#[derive(Debug, Clone, Copy, HashStable_Generic)]
18081808
pub enum PatExprKind<'hir> {
18091809
Lit {
1810-
lit: &'hirLit,
1810+
lit: Lit,
18111811
// FIXME: move this into `Lit` and handle negated literal expressions
18121812
// once instead of matching on unop neg expressions everywhere.
18131813
negated: bool,
@@ -2734,7 +2734,7 @@ pub enum ExprKind<'hir> {
27342734
/// A unary operation (e.g., `!x`, `*x`).
27352735
Unary(UnOp, &'hir Expr<'hir>),
27362736
/// A literal (e.g., `1`, `"foo"`).
2737-
Lit(&'hirLit),
2737+
Lit(Lit),
27382738
/// A cast (e.g., `foo as f64`).
27392739
Cast(&'hir Expr<'hir>, &'hir Ty<'hir>),
27402740
/// A type ascription (e.g., `x: Foo`). See RFC 3307.

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /