Skip to main content

cl_parser/
parser.rs

1//! The parser takes a stream of [`Token`]s from the [`Lexer`], and turns them into [`cl_ast::ast`]
2//! nodes.
3
4pub mod expr;
5pub mod pat;
6
7pub mod error;
8
9use cl_ast::{types::*, *};
10use cl_lexer::{LexError, LexFailure, Lexer};
11use cl_structures::span::Span;
12use cl_token::{Lexeme, TKind, Token};
13pub use error::{EOF, PResult, PResultExt, ParseError, no_eof};
14
15/// Parse an expression from a [Parser]'s token stream at a given precedence level
16pub trait Parse<'t> {
17    /// The possible precedence `level`s for this parser implementation
18    type Prec: Copy + Default;
19
20    /// Parses `Self` from the tokens (and extra data) held in a [Parser]
21    fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self>
22    where Self: Sized;
23}
24
25/// Handles stateful extraction from a [Lexer], with single-[Token] lookahead.
26#[derive(Debug)]
27pub struct Parser<'t> {
28    /// A stream of tokens produced from some source text
29    pub lexer: Lexer<'t>,
30
31    /// The currently-peeked output from the [Lexer]
32    pub next_tok: Option<PResult<Token>>,
33
34    /// The span of the last-consumed [Token]
35    pub last_loc: Span,
36
37    /// Whether the last-consumed [Token] can stand in for a semicolon in a `do` sequence
38    pub can_do: bool,
39}
40
41impl<'t> Parser<'t> {
42    /// Constructs a new Parser
43    pub fn new(lexer: Lexer<'t>) -> Self {
44        Self { last_loc: lexer.span(), lexer, next_tok: None, can_do: false }
45    }
46
47    /// The identity function. This exists to make production chaining easier.
48    pub const fn then<T>(&self, t: T) -> T {
49        t
50    }
51
52    /// Gets the [struct@Span] of the last-consumed [Token]
53    pub const fn span(&self) -> Span {
54        self.last_loc
55    }
56
57    /// Parses a value that implements the [Parse] trait.
58    pub fn parse<T: Parse<'t>>(&mut self, level: T::Prec) -> PResult<T> {
59        Parse::parse(self, level)
60    }
61
62    /// Parses a value that implements the [Parse] trait, and asserts the entire input
63    /// has been consumed.
64    pub fn parse_entire<T: Parse<'t>>(&mut self, level: T::Prec) -> PResult<T> {
65        let out = Parse::parse(self, level);
66        match self.peek().allow_eof()? {
67            Some(t) => Err(ParseError::ExpectedEOF(t.kind, t.span)),
68            None => out,
69        }
70    }
71
72    /// Peeks the next [`Token`]. Returns [`ParseError::FromLexer`] on lexer error.
73    pub fn peek(&mut self) -> PResult<&Token> {
74        let next_tok = match self.next_tok.take() {
75            Some(tok) => tok,
76            None => loop {
77                match self.lexer.scan() {
78                    Ok(Token { kind: TKind::Comment, .. }) => {}
79                    Ok(tok) => break Ok(tok),
80                    Err(LexError { pos, res: LexFailure::EOF }) => Err(ParseError::EOF(pos))?,
81                    Err(e) => break Err(ParseError::FromLexer(e)),
82                }
83            },
84        };
85        let next_tok = self.next_tok.insert(next_tok);
86        next_tok.as_ref().map_err(|e| *e)
87    }
88
89    /// Peeks the next token if it matches the `expected` [`TKind`]
90    pub fn peek_if(&mut self, expected: TKind) -> PResult<Option<&Token>> {
91        match self.peek() {
92            Ok(tok) if tok.kind == expected => Ok(Some(tok)),
93            Ok(_) => Ok(None),
94            Err(e) => Err(e),
95        }
96    }
97
98    /// Consumes and returns the currently-peeked [Token].
99    pub fn take(&mut self) -> PResult<Token> {
100        let tok = self
101            .next_tok
102            .take()
103            .unwrap_or(Err(ParseError::UnexpectedEOF(self.last_loc)));
104
105        if let Ok(tok) = &tok {
106            self.last_loc = tok.span;
107            self.can_do = matches!(tok.kind, TKind::RCurly | TKind::Semi)
108        }
109
110        tok
111    }
112
113    /// Consumes the currently-peeked [Token], returning its lexeme without cloning.
114    pub fn take_lexeme(&mut self) -> PResult<Lexeme> {
115        self.take().map(|tok| tok.lexeme)
116    }
117
118    #[allow(clippy::should_implement_trait)]
119    pub fn next(&mut self) -> PResult<Token> {
120        self.peek().no_eof()?;
121        self.take() // .expect("should have token here")
122    }
123
124    /// Consumes and returns the next [`Token`] if it matches the `expected` [`TKind`]
125    pub fn next_if(&mut self, expected: TKind) -> PResult<Result<Token, TKind>> {
126        match self.peek() {
127            Ok(t) if t.kind == expected => self.take().map(Ok),
128            Ok(t) => Ok(Err(t.kind)),
129            Err(e) => Err(e),
130        }
131    }
132
133    /// Parses a list of P separated by `sep` tokens, ending in an `end` token.
134    /// ```ignore
135    /// List<T> = (T sep)* T? end ;
136    /// ```
137    pub fn list<P: Parse<'t>>(
138        &mut self,
139        mut elems: Vec<P>,
140        level: P::Prec,
141        sep: TKind,
142        end: TKind,
143    ) -> PResult<Vec<P>> {
144        // TODO: This loses lexer errors
145        while self.peek_if(end).no_eof()?.is_none() {
146            elems.push(self.parse(level).no_eof()?);
147            match self.peek_if(sep)? {
148                Some(_) => self.consume(),
149                None => break,
150            };
151        }
152        let kind = self.peek().map(Token::kind)?;
153        if kind == end {
154            self.consume();
155        } else if let Ok((first, _)) = kind.split()
156            && first == end
157        {
158            self.split()?;
159        } else {
160            return Err(ParseError::Expected(end, kind, self.span()));
161        }
162        Ok(elems)
163    }
164
165    /// Parses a list of one or more P at level `level`, separated by `sep` tokens
166    /// ```ignore
167    /// UnterminatedList<P> = P (sep P)*
168    /// ```
169    pub fn list_bare<P: Parse<'t>>(
170        &mut self,
171        mut elems: Vec<P>,
172        level: P::Prec,
173        sep: TKind,
174    ) -> PResult<Vec<P>> {
175        loop {
176            let elem = self.parse(level).no_eof()?;
177            elems.push(elem);
178            match self.peek_if(sep) {
179                Ok(Some(_)) => self.consume(),
180                Ok(None) | Err(ParseError::EOF(_)) => break Ok(elems),
181                Err(e) => Err(e)?,
182            };
183        }
184    }
185
186    /// Parses into an [`Option<P>`] if the next token is `next`
187    pub fn opt_if<P: Parse<'t>>(&mut self, level: P::Prec, next: TKind) -> PResult<Option<P>> {
188        Ok(match self.next_if(next)? {
189            Ok(_) => Some(self.parse(level).no_eof()?),
190            Err(_) => None,
191        })
192    }
193
194    /// Parses a P unless the next [Token]'s [TKind] is `end`
195    pub fn opt<P: Parse<'t>>(&mut self, level: P::Prec, end: TKind) -> PResult<Option<P>> {
196        let out = match self.peek_if(end)? {
197            None => Some(self.parse(level).no_eof()?),
198            Some(_) => None,
199        };
200        self.expect(end)?;
201        Ok(out)
202    }
203
204    /// Ensures the next [Token]'s [TKind] is `next`
205    pub fn expect(&mut self, next: TKind) -> PResult<&mut Self> {
206        self.next_if(next)?
207            .map_err(|tk| ParseError::Expected(next, tk, self.span()))?;
208        Ok(self)
209    }
210
211    /// Consumes the currently peeked token without returning it.
212    pub fn consume(&mut self) -> &mut Self {
213        if self.next_tok.as_ref().is_some_and(|tok| tok.is_ok()) {
214            let _ = self.take();
215        }
216        self
217    }
218
219    /// Consumes the next token, and attempts to split it into multiple.
220    ///
221    /// If the next token cannot be split, it will be returned.
222    pub fn split(&mut self) -> PResult<Token> {
223        let Token { lexeme, kind, span } = self.next()?;
224        let kind = match kind.split() {
225            Err(_) => kind,
226            Ok((out, next)) => {
227                self.next_tok = Some(Ok(Token { lexeme: lexeme.clone(), kind: next, span }));
228                out
229            }
230        };
231        Ok(Token { lexeme, kind, span })
232    }
233}
234
235impl<'t> Parse<'t> for Path {
236    type Prec = ();
237
238    fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
239        let mut parts = vec![];
240        if p.next_if(TKind::ColonColon)?.is_ok() {
241            parts.push("".into()); // the "root"
242        }
243        while let Some(Ok(id)) = p.next_if(TKind::Identifier).allow_eof()? {
244            parts.push(
245                id.lexeme
246                    .str()
247                    .expect("Identifier should have String")
248                    .into(),
249            );
250            if let None | Some(Err(_)) = p.next_if(TKind::ColonColon).allow_eof()? {
251                break;
252            }
253        }
254
255        Ok(Path { parts })
256    }
257}
258
259impl<'t> Parse<'t> for Literal {
260    type Prec = ();
261    fn parse(p: &mut Parser<'t>, _level: ()) -> PResult<Self> {
262        let tok = p.peek()?;
263        Ok(match tok.kind {
264            TKind::True => p.consume().then(Literal::Bool(true)),
265            TKind::False => p.consume().then(Literal::Bool(false)),
266            TKind::Character | TKind::Integer | TKind::String => {
267                match p.take().expect("should have Token after peek").lexeme {
268                    Lexeme::String(str) => Literal::Str(str),
269                    Lexeme::Integer(int, base) => Literal::Int(int, base),
270                    Lexeme::Char(chr) => Literal::Char(chr),
271                }
272            }
273            other => Err(ParseError::NotLiteral(other, tok.span))?,
274        })
275    }
276}
277
278impl<'t> Parse<'t> for Use {
279    type Prec = ();
280
281    fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
282        let tok = p.next()?;
283        Ok(match tok.kind {
284            TKind::Star => p.then(Use::Glob),
285            TKind::Identifier => {
286                let name = tok.lexeme.str().expect("should have String").into();
287                match p.peek().map(Token::kind).allow_eof()? {
288                    Some(TKind::ColonColon) => Use::Path(name, p.consume().parse(())?),
289                    Some(TKind::As) => Use::Alias(
290                        name,
291                        p.consume()
292                            .next_if(TKind::Identifier)?
293                            .map_err(|e| ParseError::Expected(TKind::Identifier, e, p.span()))?
294                            .lexeme
295                            .str()
296                            .expect("Identifier should have string")
297                            .into(),
298                    ),
299                    _ => Use::Name(name),
300                }
301            }
302            TKind::LCurly => Use::Tree(p.list(vec![], (), TKind::Comma, TKind::RCurly)?),
303            _ => Err(ParseError::NotUse(tok.kind, tok.span))?,
304        })
305    }
306}
307
308impl<'t, P: Parse<'t> + AstNode> Parse<'t> for At<P> {
309    type Prec = P::Prec;
310    fn parse(p: &mut Parser<'t>, level: P::Prec) -> PResult<Self>
311    where Self: Sized {
312        let start = p.peek().map(|t| t.span).unwrap_or_else(|_| p.span());
313        Ok(At(p.parse(level)?, start.merge(p.span())))
314    }
315}
316
317impl<'t, P: Parse<'t>> Parse<'t> for Box<P> {
318    type Prec = P::Prec;
319    fn parse(p: &mut Parser<'t>, level: P::Prec) -> PResult<Self>
320    where Self: Sized {
321        Ok(Box::new(p.parse(level)?))
322    }
323}