Skip to content

Commit 1566a60

Browse files
Auto merge of #149689 - fereidani:main, r=<try>
Improve lexer performance by 5-10% overall, improve string lexer performance 15%
2 parents 0b96731 + 8ae8758 commit 1566a60

File tree

2 files changed

+74
-40
lines changed

2 files changed

+74
-40
lines changed

compiler/rustc_lexer/src/cursor.rs

Lines changed: 50 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,28 @@ impl<'a> Cursor<'a> {
102102
Some(c)
103103
}
104104

105+
pub(crate) fn bump_if(&mut self, byte: char) -> bool {
106+
let mut chars = self.chars.clone();
107+
if chars.next() == Some(byte) {
108+
self.chars = chars;
109+
true
110+
} else {
111+
false
112+
}
113+
}
114+
115+
/// Bumps the cursor if the next character is either of the two expected characters.
116+
pub(crate) fn bump_if_either(&mut self, byte1: char, byte2: char) -> bool {
117+
let mut chars = self.chars.clone();
118+
if let Some(c) = chars.next()
119+
&& (c == byte1 || c == byte2)
120+
{
121+
self.chars = chars;
122+
return true;
123+
}
124+
false
125+
}
126+
105127
/// Moves to a substring by a number of bytes.
106128
pub(crate) fn bump_bytes(&mut self, n: usize) {
107129
self.chars = self.as_str()[n..].chars();
@@ -115,11 +137,35 @@ impl<'a> Cursor<'a> {
115137
self.bump();
116138
}
117139
}
140+
/// Eats characters until the given byte is found.
141+
/// Returns true if the byte was found, false if end of file was reached.
142+
pub(crate) fn eat_until(&mut self, byte: u8) -> bool {
143+
match memchr::memchr(byte, self.as_str().as_bytes()) {
144+
Some(index) => {
145+
self.bump_bytes(index);
146+
true
147+
}
148+
None => {
149+
self.chars = "".chars();
150+
false
151+
}
152+
}
153+
}
118154

119-
pub(crate) fn eat_until(&mut self, byte: u8) {
120-
self.chars = match memchr::memchr(byte, self.as_str().as_bytes()) {
121-
Some(index) => self.as_str()[index..].chars(),
122-
None => "".chars(),
155+
/// Eats characters until any of the given bytes is found, then consumes past it.
156+
/// Returns the found byte if any, or None if end of file was reached.
157+
pub(crate) fn eat_past_either(&mut self, byte1: u8, byte2: u8) -> Option<u8> {
158+
let bytes = self.as_str().as_bytes();
159+
match memchr::memchr2(byte1, byte2, bytes) {
160+
Some(index) => {
161+
let found = bytes[index];
162+
self.bump_bytes(index + 1);
163+
Some(found)
164+
}
165+
None => {
166+
self.chars = "".chars();
167+
None
168+
}
123169
}
124170
}
125171
}

compiler/rustc_lexer/src/lib.rs

Lines changed: 24 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -563,7 +563,6 @@ impl Cursor<'_> {
563563
self.eat_while(|ch| ch != '\n' && is_horizontal_whitespace(ch));
564564
let invalid_infostring = self.first() != '\n';
565565

566-
let mut found = false;
567566
let nl_fence_pattern = format!("\n{:-<1$}", "", length_opening as usize);
568567
if let Some(closing) = self.as_str().find(&nl_fence_pattern) {
569568
// candidate found
@@ -576,10 +575,7 @@ impl Cursor<'_> {
576575
// ----
577576
// combine those stuff into this frontmatter token such that it gets detected later.
578577
self.eat_until(b'\n');
579-
found = true;
580-
}
581-
582-
if !found {
578+
} else {
583579
// recovery strategy: a closing statement might have preceding whitespace/newline
584580
// but not have enough dashes to properly close. In this case, we eat until there,
585581
// and report a mismatch in the parser.
@@ -656,23 +652,25 @@ impl Cursor<'_> {
656652
};
657653

658654
let mut depth = 1usize;
659-
while let Some(c) = self.bump() {
655+
while let Some(c) = self.eat_past_either(b'/', b'*') {
660656
match c {
661-
'/' if self.first() == '*' => {
662-
self.bump();
663-
depth += 1;
657+
b'/' => {
658+
if self.bump_if('*') {
659+
depth += 1;
660+
}
664661
}
665-
'*' if self.first() == '/' => {
666-
self.bump();
667-
depth -= 1;
668-
if depth == 0 {
669-
// This block comment is closed, so for a construction like "/* */ */"
670-
// there will be a successfully parsed block comment "/* */"
671-
// and " */" will be processed separately.
672-
break;
662+
b'*' => {
663+
if self.bump_if('/') {
664+
depth -= 1;
665+
if depth == 0 {
666+
// This block comment is closed, so for a construction like "/* */ */"
667+
// there will be a successfully parsed block comment "/* */"
668+
// and " */" will be processed separately.
669+
break;
670+
}
673671
}
674672
}
675-
_ => (),
673+
_ => unreachable!(),
676674
}
677675
}
678676

@@ -935,19 +933,15 @@ impl Cursor<'_> {
935933
/// if string is terminated.
936934
fn double_quoted_string(&mut self) -> bool {
937935
debug_assert!(self.prev() == '"');
938-
while let Some(c) = self.bump() {
936+
while let Some(c) = self.eat_past_either(b'"', b'\\') {
939937
match c {
940-
'"' => {
938+
b'"' => {
941939
return true;
942940
}
943-
'\\' if self.first() == '\\' || self.first() == '"' => {
944-
// Bump again to skip escaped character.
945-
self.bump();
946-
}
947-
_ => (),
941+
b'\\' => _ = self.bump_if_either('\\', '"'),
942+
_ => unreachable!(),
948943
}
949944
}
950-
// End of file reached.
951945
false
952946
}
953947

@@ -963,9 +957,8 @@ impl Cursor<'_> {
963957
debug_assert!(self.prev() != '#');
964958

965959
let mut n_start_hashes: u32 = 0;
966-
while self.first() == '#' {
960+
while self.bump_if('#') {
967961
n_start_hashes += 1;
968-
self.bump();
969962
}
970963

971964
if self.first() != '"' {
@@ -1025,9 +1018,8 @@ impl Cursor<'_> {
10251018

10261019
// Count opening '#' symbols.
10271020
let mut eaten = 0;
1028-
while self.first() == '#' {
1021+
while self.bump_if('#') {
10291022
eaten += 1;
1030-
self.bump();
10311023
}
10321024
let n_start_hashes = eaten;
10331025

@@ -1043,9 +1035,7 @@ impl Cursor<'_> {
10431035
// Skip the string contents and on each '#' character met, check if this is
10441036
// a raw string termination.
10451037
loop {
1046-
self.eat_until(b'"');
1047-
1048-
if self.is_eof() {
1038+
if !self.eat_until(b'"') {
10491039
return Err(RawStrError::NoTerminator {
10501040
expected: n_start_hashes,
10511041
found: max_hashes,
@@ -1117,9 +1107,7 @@ impl Cursor<'_> {
11171107
/// and returns false otherwise.
11181108
fn eat_float_exponent(&mut self) -> bool {
11191109
debug_assert!(self.prev() == 'e' || self.prev() == 'E');
1120-
if self.first() == '-' || self.first() == '+' {
1121-
self.bump();
1122-
}
1110+
self.bump_if_either('-', '+');
11231111
self.eat_decimal_digits()
11241112
}
11251113

0 commit comments

Comments
 (0)