1mod cursor;
2mod error;
3pub mod unescaped;
4
5use std::{collections::HashMap, sync::OnceLock};
6
7pub use cursor::Cursor;
8pub mod token;
9use error::{LexerError, LexerErrorKind};
10use error_manager::ErrorManager;
11use token::{Token, TokenKind};
12
13pub struct TokenStream<'lex, 'src> {
15 lexer: Lexer<'lex, 'src>,
16 prev: Option<Token>,
17 next: Option<Token>,
18}
19
20impl TokenStream<'_, '_> {
21 #[inline]
23 pub fn is_finished(&self) -> bool { self.lexer.c.is_finished() }
24
25 #[inline]
27 pub fn previous(&self) -> Option<&Token> { self.prev.as_ref() }
28
29 #[inline]
31 pub fn peek(&self) -> Option<&Token> { self.next.as_ref() }
32}
33
34pub struct Lexer<'lex, 'src> {
35 c: Cursor<'src>,
36 em: &'lex mut ErrorManager,
37}
38
39fn as_keyword(ident: &str) -> Option<TokenKind> {
40 static KEYWORKDS: OnceLock<HashMap<&str, TokenKind>> = OnceLock::new();
41 KEYWORKDS
42 .get_or_init(|| {
43 let mut map = HashMap::new();
44 map.insert("struct", TokenKind::Struct);
45 map.insert("else", TokenKind::Else);
46 map.insert("false", TokenKind::False);
47 map.insert("fn", TokenKind::Fn);
48 map.insert("for", TokenKind::For);
49 map.insert("if", TokenKind::If);
50 map.insert("i8", TokenKind::I8);
51 map.insert("i16", TokenKind::I16);
52 map.insert("i32", TokenKind::I32);
53 map.insert("i64", TokenKind::I64);
54 map.insert("u8", TokenKind::U8);
55 map.insert("u16", TokenKind::U16);
56 map.insert("u32", TokenKind::U32);
57 map.insert("u64", TokenKind::U64);
58 map.insert("f32", TokenKind::F32);
59 map.insert("f64", TokenKind::F64);
60 map.insert("char", TokenKind::Char);
61 map.insert("bool", TokenKind::Bool);
62 map.insert("return", TokenKind::Return);
63 map.insert("true", TokenKind::True);
64 map.insert("let", TokenKind::Let);
65 map.insert("use", TokenKind::Use);
66 map.insert("as", TokenKind::As);
67 map.insert("mod", TokenKind::Mod);
68 map.insert("const", TokenKind::Const);
69 map.insert("while", TokenKind::While);
70 map.insert("break", TokenKind::Break);
71 map.insert("continue", TokenKind::Continue);
72 map.insert("extern", TokenKind::Extern);
73 map
74 })
75 .get(ident)
76 .copied()
77}
78
79impl Iterator for TokenStream<'_, '_> {
80 type Item = Token;
81
82 fn next(&mut self) -> Option<Self::Item> {
87 let ret = self.next.take();
88 self.prev.clone_from(&ret);
89 self.next = self.lexer.next_token();
90
91 ret
92 }
93}
94
95impl<'lex, 'src> IntoIterator for Lexer<'lex, 'src> {
96 type Item = Token;
97 type IntoIter = TokenStream<'lex, 'src>;
98
99 #[inline]
103 fn into_iter(self) -> Self::IntoIter { self.into_token_stream() }
104}
105
106impl<'lex, 'src> Lexer<'lex, 'src> {
107 pub fn new(text: &'src str, base_offset: usize, em: &'lex mut ErrorManager) -> Self {
113 Self {
114 c: Cursor::new(text, base_offset),
115 em,
116 }
117 }
118
119 pub fn into_token_stream(mut self) -> TokenStream<'lex, 'src> {
121 let next = self.next_token();
122 TokenStream {
123 lexer: self,
124 prev: None,
125 next,
126 }
127 }
128
129 pub fn next_token(&mut self) -> Option<Token> {
133 if self.c.is_finished() {
134 return None;
135 }
136 self.c.step();
137 if let Some(t) = self.scan_token() {
138 Some(t)
139 } else {
140 self.next_token()
141 }
142 }
143
144 #[allow(clippy::unnecessary_wraps)]
145 fn add_token(&self, kind: TokenKind) -> Option<Token> {
146 Some(Token {
147 kind,
148 span: self.c.current_span(),
149 })
150 }
151 #[allow(clippy::too_many_lines)]
152 fn scan_token(&mut self) -> Option<Token> {
153 match self.c.advance() {
154 '(' => self.add_token(TokenKind::LeftParen),
155 ')' => self.add_token(TokenKind::RightParen),
156 '{' => self.add_token(TokenKind::LeftBrace),
157 '}' => self.add_token(TokenKind::RightBrace),
158 '[' => self.add_token(TokenKind::LeftBracket),
159 ']' => self.add_token(TokenKind::RightBracket),
160 ',' => self.add_token(TokenKind::Comma),
161 '.' => {
162 if self.c.match_next('.') && self.c.match_next('.') {
163 self.add_token(TokenKind::ThreeDot)
164 }
165 else if self.c.peek().is_numeric() {
166 self.error(LexerErrorKind::FloatLitWithoutIntegralPart);
167 None
168 } else {
169 self.add_token(TokenKind::Dot)
170 }
171 }
172 '-' => {
173 if self.c.match_next('>') {
174 self.add_token(TokenKind::Arrow)
175 } else {
176 self.add_token(TokenKind::Minus)
177 }
178 }
179 '+' => self.add_token(TokenKind::Plus),
180 ';' => self.add_token(TokenKind::Semicolon),
181 ':' => {
182 if self.c.match_next(':') {
183 self.add_token(TokenKind::DoubleColon)
184 } else {
185 self.add_token(TokenKind::Colon)
186 }
187 }
188 '?' => self.add_token(TokenKind::Question),
189 '*' => self.add_token(TokenKind::Star),
190 '\'' => self.char_literal(),
191 '!' => {
192 if self.c.match_next('=') {
193 self.add_token(TokenKind::BangEqual)
194 } else {
195 self.add_token(TokenKind::Bang)
196 }
197 }
198 '=' => {
199 if self.c.match_next('=') {
200 self.add_token(TokenKind::EqualEqual)
201 } else {
202 self.add_token(TokenKind::Equal)
203 }
204 }
205 '<' => {
206 if self.c.match_next('=') {
207 self.add_token(TokenKind::LessEqual)
208 } else {
209 self.add_token(TokenKind::Less)
210 }
211 }
212 '>' => {
213 if self.c.match_next('=') {
214 self.add_token(TokenKind::GreaterEqual)
215 } else {
216 self.add_token(TokenKind::Greater)
217 }
218 }
219 '/' => {
220 if self.c.match_next('/') {
221 self.comment()
222 } else if self.c.match_next('*') {
223 self.ml_comment()
224 } else {
225 self.add_token(TokenKind::Slash)
226 }
227 }
228 '&' => {
229 if self.c.match_next('&') {
230 self.add_token(TokenKind::And)
231 } else {
232 self.add_token(TokenKind::Ampersand)
233 }
234 }
235 '|' => {
236 if self.c.match_next('|') {
237 self.add_token(TokenKind::Or)
238 } else {
239 self.add_token(TokenKind::VerticalPipe)
240 }
241 }
242 '%' => self.add_token(TokenKind::Modulo),
243 '"' => self.string(),
244 ' ' | '\n' | '\r' | '\t' => None, c => {
246 if c.is_numeric() {
247 self.number()
248 } else if c.is_alphabetic() || c == '_' {
249 self.identifier()
250 } else {
251 self.error(LexerErrorKind::UnexpectedCharacter(c));
252 None
253 }
254 }
255 }
256 }
257 fn char_literal(&mut self) -> Option<Token> {
258 if self.c.advance() == '\\' {
259 self.c.advance();
260 }
261 if !self.c.match_next('\'') {
262 self.error(LexerErrorKind::ExpectedClosingTickOnCharLiteral);
263 self.c.advance_while(|c| *c != '\'');
264 self.c.advance();
265 }
266 self.add_token(TokenKind::CharLiteral)
267 }
268 fn comment(&mut self) -> Option<Token> {
269 self.c.advance_while(|c| *c != '\n');
270 None
271 }
272 fn ml_comment(&mut self) -> Option<Token> {
273 while self.c.advance() != '*' || self.c.peek() != '/' {
274 if self.c.is_finished() {
275 self.error(LexerErrorKind::UnterminatedComment);
276 }
277 }
278 self.c.advance(); None
280 }
281 fn string(&mut self) -> Option<Token> {
282 while self.c.advance() != '"' {
283 if self.c.is_finished() {
284 self.error(LexerErrorKind::UnterminatedString);
285 return None;
286 }
287 if self.c.peek() == '\\' {
288 self.c.advance();
289 self.c.advance();
290 }
291 }
292 self.add_token(TokenKind::String)
293 }
294 fn floating(&mut self) -> Option<Token> {
295 self.c.advance(); if self.c.peek().is_numeric() {
297 self.c.advance_while(char::is_ascii_digit);
298 self.add_token(TokenKind::FloatLiteral)
299 } else {
300 self.error(LexerErrorKind::FloatLitWithoutFloatingPart);
301 None
302 }
303 }
304 fn number(&mut self) -> Option<Token> {
305 self.c.advance_while(|n| n.is_numeric());
306 if self.c.peek() == '.' {
307 self.floating()
308 } else {
309 self.add_token(TokenKind::IntLiteral)
310 }
311 }
312 fn identifier(&mut self) -> Option<Token> {
313 self.c.advance_while(|c| c.is_alphanumeric() || *c == '_');
314 let lexem = self.c.current_lexem();
315 let token_type = as_keyword(lexem).unwrap_or(TokenKind::Identifier);
316 self.add_token(token_type)
317 }
318 fn error(&mut self, kind: LexerErrorKind) {
319 self.em.emit_error(LexerError {
320 kind,
321 span: self.c.current_span(),
322 });
323 }
324}