Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 708d57e

Browse files
committed
Auto merge of #90461 - pietroalbini:bidi-beta, r=nikomatsakis
[beta] Fix CVE-2021-42574 This PR implements new lints to mitigate the impact of [CVE-2021-42574], caused by the presence of bidirectional-override Unicode codepoints in the compiled source code. [See the advisory][advisory] for more information about the vulnerability. The changes in this PR will be released in tomorrow's beta release. [CVE-2021-42574]: https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-42574 [advisory]: https://blog.rust-lang.org/2021/11/01/cve-2021-42574.html
2 parents 7e4c9ee + a59d96e commit 708d57e

File tree

13 files changed

+543
-10
lines changed

13 files changed

+543
-10
lines changed

‎Cargo.lock‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4223,6 +4223,7 @@ dependencies = [
42234223
"rustc_span",
42244224
"tracing",
42254225
"unicode-normalization",
4226+
"unicode-width",
42264227
]
42274228

42284229
[[package]]

‎RELEASES.md‎

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
Version 1.56.1 (2021年11月01日)
2+
===========================
3+
4+
- New lints to detect the presence of bidirectional-override Unicode
5+
codepoints in the compiled source code ([CVE-2021-42574])
6+
7+
[CVE-2021-42574]: https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-42574
8+
19
Version 1.56.0 (2021年10月21日)
210
========================
311

‎compiler/rustc_errors/src/emitter.rs‎

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2054,8 +2054,26 @@ fn num_decimal_digits(num: usize) -> usize {
20542054
MAX_DIGITS
20552055
}
20562056

2057+
// We replace some characters so the CLI output is always consistent and underlines aligned.
2058+
const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[
2059+
('\t', " "), // We do our own tab replacement
2060+
('\u{202A}', ""), // The following unicode text flow control characters are inconsistently
2061+
('\u{202B}', ""), // supported accross CLIs and can cause confusion due to the bytes on disk
2062+
('\u{202D}', ""), // not corresponding to the visible source code, so we replace them always.
2063+
('\u{202E}', ""),
2064+
('\u{2066}', ""),
2065+
('\u{2067}', ""),
2066+
('\u{2068}', ""),
2067+
('\u{202C}', ""),
2068+
('\u{2069}', ""),
2069+
];
2070+
20572071
fn replace_tabs(str: &str) -> String {
2058-
str.replace('\t', " ")
2072+
let mut s = str.to_string();
2073+
for (c, replacement) in OUTPUT_REPLACEMENTS {
2074+
s = s.replace(*c, replacement);
2075+
}
2076+
s
20592077
}
20602078

20612079
fn draw_col_separator(buffer: &mut StyledBuffer, line: usize, col: usize) {

‎compiler/rustc_lint/src/context.rs‎

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
1717
use self::TargetLint::*;
1818

19+
use crate::hidden_unicode_codepoints::UNICODE_TEXT_FLOW_CHARS;
1920
use crate::levels::{is_known_lint_tool, LintLevelsBuilder};
2021
use crate::passes::{EarlyLintPassObject, LateLintPassObject};
2122
use rustc_ast as ast;
@@ -39,7 +40,7 @@ use rustc_session::lint::{BuiltinLintDiagnostics, ExternDepSpec};
3940
use rustc_session::lint::{FutureIncompatibleInfo, Level, Lint, LintBuffer, LintId};
4041
use rustc_session::Session;
4142
use rustc_span::lev_distance::find_best_match_for_name;
42-
use rustc_span::{symbol::Symbol, MultiSpan, Span, DUMMY_SP};
43+
use rustc_span::{symbol::Symbol, BytePos,MultiSpan, Span, DUMMY_SP};
4344
use rustc_target::abi;
4445
use tracing::debug;
4546

@@ -597,6 +598,42 @@ pub trait LintContext: Sized {
597598
// Now, set up surrounding context.
598599
let sess = self.sess();
599600
match diagnostic {
601+
BuiltinLintDiagnostics::UnicodeTextFlow(span, content) => {
602+
let spans: Vec<_> = content
603+
.char_indices()
604+
.filter_map(|(i, c)| {
605+
UNICODE_TEXT_FLOW_CHARS.contains(&c).then(|| {
606+
let lo = span.lo() + BytePos(2 + i as u32);
607+
(c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32)))
608+
})
609+
})
610+
.collect();
611+
let (an, s) = match spans.len() {
612+
1 => ("an ", ""),
613+
_ => ("", "s"),
614+
};
615+
db.span_label(span, &format!(
616+
"this comment contains {}invisible unicode text flow control codepoint{}",
617+
an,
618+
s,
619+
));
620+
for (c, span) in &spans {
621+
db.span_label(*span, format!("{:?}", c));
622+
}
623+
db.note(
624+
"these kind of unicode codepoints change the way text flows on \
625+
applications that support them, but can cause confusion because they \
626+
change the order of characters on the screen",
627+
);
628+
if !spans.is_empty() {
629+
db.multipart_suggestion_with_style(
630+
"if their presence wasn't intentional, you can remove them",
631+
spans.into_iter().map(|(_, span)| (span, "".to_string())).collect(),
632+
Applicability::MachineApplicable,
633+
SuggestionStyle::HideCodeAlways,
634+
);
635+
}
636+
},
600637
BuiltinLintDiagnostics::Normal => (),
601638
BuiltinLintDiagnostics::BareTraitObject(span, is_global) => {
602639
let (sugg, app) = match sess.source_map().span_to_snippet(span) {
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
use crate::{EarlyContext, EarlyLintPass, LintContext};
2+
use rustc_ast as ast;
3+
use rustc_errors::{Applicability, SuggestionStyle};
4+
use rustc_span::{BytePos, Span, Symbol};
5+
6+
declare_lint! {
7+
/// The `text_direction_codepoint_in_literal` lint detects Unicode codepoints that change the
8+
/// visual representation of text on screen in a way that does not correspond to their on
9+
/// memory representation.
10+
///
11+
/// ### Explanation
12+
///
13+
/// The unicode characters `\u{202A}`, `\u{202B}`, `\u{202D}`, `\u{202E}`, `\u{2066}`,
14+
/// `\u{2067}`, `\u{2068}`, `\u{202C}` and `\u{2069}` make the flow of text on screen change
15+
/// its direction on software that supports these codepoints. This makes the text "abc" display
16+
/// as "cba" on screen. By leveraging software that supports these, people can write specially
17+
/// crafted literals that make the surrounding code seem like it's performing one action, when
18+
/// in reality it is performing another. Because of this, we proactively lint against their
19+
/// presence to avoid surprises.
20+
///
21+
/// ### Example
22+
///
23+
/// ```rust,compile_fail
24+
/// #![deny(text_direction_codepoint_in_literal)]
25+
/// fn main() {
26+
/// println!("{:?}", '‮');
27+
/// }
28+
/// ```
29+
///
30+
/// {{produces}}
31+
///
32+
pub TEXT_DIRECTION_CODEPOINT_IN_LITERAL,
33+
Deny,
34+
"detect special Unicode codepoints that affect the visual representation of text on screen, \
35+
changing the direction in which text flows",
36+
}
37+
38+
declare_lint_pass!(HiddenUnicodeCodepoints => [TEXT_DIRECTION_CODEPOINT_IN_LITERAL]);
39+
40+
crate const UNICODE_TEXT_FLOW_CHARS: &[char] = &[
41+
'\u{202A}', '\u{202B}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}', '\u{202C}',
42+
'\u{2069}',
43+
];
44+
45+
impl HiddenUnicodeCodepoints {
46+
fn lint_text_direction_codepoint(
47+
&self,
48+
cx: &EarlyContext<'_>,
49+
text: Symbol,
50+
span: Span,
51+
padding: u32,
52+
point_at_inner_spans: bool,
53+
label: &str,
54+
) {
55+
// Obtain the `Span`s for each of the forbidden chars.
56+
let spans: Vec<_> = text
57+
.as_str()
58+
.char_indices()
59+
.filter_map(|(i, c)| {
60+
UNICODE_TEXT_FLOW_CHARS.contains(&c).then(|| {
61+
let lo = span.lo() + BytePos(i as u32 + padding);
62+
(c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32)))
63+
})
64+
})
65+
.collect();
66+
67+
cx.struct_span_lint(TEXT_DIRECTION_CODEPOINT_IN_LITERAL, span, |lint| {
68+
let mut err = lint.build(&format!(
69+
"unicode codepoint changing visible direction of text present in {}",
70+
label
71+
));
72+
let (an, s) = match spans.len() {
73+
1 => ("an ", ""),
74+
_ => ("", "s"),
75+
};
76+
err.span_label(
77+
span,
78+
&format!(
79+
"this {} contains {}invisible unicode text flow control codepoint{}",
80+
label, an, s,
81+
),
82+
);
83+
if point_at_inner_spans {
84+
for (c, span) in &spans {
85+
err.span_label(*span, format!("{:?}", c));
86+
}
87+
}
88+
err.note(
89+
"these kind of unicode codepoints change the way text flows on applications that \
90+
support them, but can cause confusion because they change the order of \
91+
characters on the screen",
92+
);
93+
if point_at_inner_spans && !spans.is_empty() {
94+
err.multipart_suggestion_with_style(
95+
"if their presence wasn't intentional, you can remove them",
96+
spans.iter().map(|(_, span)| (*span, "".to_string())).collect(),
97+
Applicability::MachineApplicable,
98+
SuggestionStyle::HideCodeAlways,
99+
);
100+
err.multipart_suggestion(
101+
"if you want to keep them but make them visible in your source code, you can \
102+
escape them",
103+
spans
104+
.into_iter()
105+
.map(|(c, span)| {
106+
let c = format!("{:?}", c);
107+
(span, c[1..c.len() - 1].to_string())
108+
})
109+
.collect(),
110+
Applicability::MachineApplicable,
111+
);
112+
} else {
113+
// FIXME: in other suggestions we've reversed the inner spans of doc comments. We
114+
// should do the same here to provide the same good suggestions as we do for
115+
// literals above.
116+
err.note("if their presence wasn't intentional, you can remove them");
117+
err.note(&format!(
118+
"if you want to keep them but make them visible in your source code, you can \
119+
escape them: {}",
120+
spans
121+
.into_iter()
122+
.map(|(c, _)| { format!("{:?}", c) })
123+
.collect::<Vec<String>>()
124+
.join(", "),
125+
));
126+
}
127+
err.emit();
128+
});
129+
}
130+
}
131+
impl EarlyLintPass for HiddenUnicodeCodepoints {
132+
fn check_attribute(&mut self, cx: &EarlyContext<'_>, attr: &ast::Attribute) {
133+
if let ast::AttrKind::DocComment(_, comment) = attr.kind {
134+
if comment.as_str().contains(UNICODE_TEXT_FLOW_CHARS) {
135+
self.lint_text_direction_codepoint(cx, comment, attr.span, 0, false, "doc comment");
136+
}
137+
}
138+
}
139+
140+
fn check_expr(&mut self, cx: &EarlyContext<'_>, expr: &ast::Expr) {
141+
// byte strings are already handled well enough by `EscapeError::NonAsciiCharInByteString`
142+
let (text, span, padding) = match &expr.kind {
143+
ast::ExprKind::Lit(ast::Lit { token, kind, span }) => {
144+
let text = token.symbol;
145+
if !text.as_str().contains(UNICODE_TEXT_FLOW_CHARS) {
146+
return;
147+
}
148+
let padding = match kind {
149+
// account for `"` or `'`
150+
ast::LitKind::Str(_, ast::StrStyle::Cooked) | ast::LitKind::Char(_) => 1,
151+
// account for `r###"`
152+
ast::LitKind::Str(_, ast::StrStyle::Raw(val)) => *val as u32 + 2,
153+
_ => return,
154+
};
155+
(text, span, padding)
156+
}
157+
_ => return,
158+
};
159+
self.lint_text_direction_codepoint(cx, text, *span, padding, true, "literal");
160+
}
161+
}

‎compiler/rustc_lint/src/lib.rs‎

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ pub mod builtin;
4848
mod context;
4949
mod early;
5050
mod enum_intrinsics_non_enums;
51+
pub mod hidden_unicode_codepoints;
5152
mod internal;
5253
mod late;
5354
mod levels;
@@ -78,6 +79,7 @@ use rustc_span::Span;
7879
use array_into_iter::ArrayIntoIter;
7980
use builtin::*;
8081
use enum_intrinsics_non_enums::EnumIntrinsicsNonEnums;
82+
use hidden_unicode_codepoints::*;
8183
use internal::*;
8284
use methods::*;
8385
use non_ascii_idents::*;
@@ -129,6 +131,7 @@ macro_rules! early_lint_passes {
129131
DeprecatedAttr: DeprecatedAttr::new(),
130132
WhileTrue: WhileTrue,
131133
NonAsciiIdents: NonAsciiIdents,
134+
HiddenUnicodeCodepoints: HiddenUnicodeCodepoints,
132135
IncompleteFeatures: IncompleteFeatures,
133136
RedundantSemicolons: RedundantSemicolons,
134137
UnusedDocComment: UnusedDocComment,

‎compiler/rustc_lint_defs/src/builtin.rs‎

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3518,6 +3518,34 @@ declare_lint! {
35183518
@feature_gate = sym::non_exhaustive_omitted_patterns_lint;
35193519
}
35203520

3521+
declare_lint! {
3522+
/// The `text_direction_codepoint_in_comment` lint detects Unicode codepoints in comments that
3523+
/// change the visual representation of text on screen in a way that does not correspond to
3524+
/// their on memory representation.
3525+
///
3526+
/// ### Example
3527+
///
3528+
/// ```rust,compile_fail
3529+
/// #![deny(text_direction_codepoint_in_comment)]
3530+
/// fn main() {
3531+
/// println!("{:?}"); // '‮');
3532+
/// }
3533+
/// ```
3534+
///
3535+
/// {{produces}}
3536+
///
3537+
/// ### Explanation
3538+
///
3539+
/// Unicode allows changing the visual flow of text on screen in order to support scripts that
3540+
/// are written right-to-left, but a specially crafted comment can make code that will be
3541+
/// compiled appear to be part of a comment, depending on the software used to read the code.
3542+
/// To avoid potential problems or confusion, such as in CVE-2021-42574, by default we deny
3543+
/// their use.
3544+
pub TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
3545+
Deny,
3546+
"invisible directionality-changing codepoints in comment"
3547+
}
3548+
35213549
declare_lint! {
35223550
/// The `deref_into_dyn_supertrait` lint is output whenever there is a use of the
35233551
/// `Deref` implementation with a `dyn SuperTrait` type as `Output`.

‎compiler/rustc_lint_defs/src/lib.rs‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ pub enum BuiltinLintDiagnostics {
306306
TrailingMacro(bool, Ident),
307307
BreakWithLabelAndLoop(Span),
308308
NamedAsmLabel(String),
309+
UnicodeTextFlow(Span, String),
309310
}
310311

311312
/// Lints that are buffered up early on in the `Session` before the

‎compiler/rustc_parse/Cargo.toml‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,4 @@ rustc_session = { path = "../rustc_session" }
1818
rustc_span = { path = "../rustc_span" }
1919
rustc_ast = { path = "../rustc_ast" }
2020
unicode-normalization = "0.1.11"
21+
unicode-width = "0.1.4"

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /