Skip to main content

cl_parser/
parser.rs

1//! The parser takes a stream of [`Token`]s from the [`Lexer`], and turns them into [`cl_ast::ast`]
2//! nodes.
3use cl_ast::{
4    types::{Literal, Path},
5    *,
6};
7use cl_lexer::{LexError, LexFailure, Lexer};
8use cl_structures::span::Span;
9use cl_token::{Lexeme, TKind, Token};
10
11pub trait Parse<'t> {
12    type Prec: Copy + Default;
13
14    fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self>
15    where Self: Sized;
16}
17
18pub mod expr;
19pub mod pat;
20
21pub mod error;
22pub use error::{EOF, PResult, PResultExt, ParseError, no_eof};
23
24/// Handles stateful extraction from a [Lexer], with single-[Token] lookahead.
25#[derive(Debug)]
26pub struct Parser<'t> {
27    pub lexer: Lexer<'t>,
28    pub next_tok: Option<PResult<Token>>,
29    pub last_loc: Span,
30    pub elide_do: bool,
31}
32
33impl<'t> Parser<'t> {
34    /// Constructs a new Parser
35    pub fn new(lexer: Lexer<'t>) -> Self {
36        Self { last_loc: lexer.span(), lexer, next_tok: None, elide_do: false }
37    }
38
39    /// The identity function. This exists to make production chaining easier.
40    pub const fn then<T>(&self, t: T) -> T {
41        t
42    }
43
44    /// Gets the [struct@Span] of the last-consumed [Token]
45    pub const fn span(&self) -> Span {
46        self.last_loc
47    }
48
49    /// Parses a value that implements the [Parse] trait.
50    pub fn parse<T: Parse<'t>>(&mut self, level: T::Prec) -> PResult<T> {
51        Parse::parse(self, level)
52    }
53
54    /// Parses a value that implements the [Parse] trait, and asserts the entire input
55    /// has been consumed.
56    pub fn parse_entire<T: Parse<'t>>(&mut self, level: T::Prec) -> PResult<T> {
57        let out = Parse::parse(self, level);
58        match self.peek().allow_eof()? {
59            Some(t) => Err(ParseError::ExpectedEOF(t.kind, t.span)),
60            None => out,
61        }
62    }
63
64    /// Peeks the next [`Token`]. Returns [`ParseError::FromLexer`] on lexer error.
65    pub fn peek(&mut self) -> PResult<&Token> {
66        let next_tok = match self.next_tok.take() {
67            Some(tok) => tok,
68            None => loop {
69                match self.lexer.scan() {
70                    Ok(Token { kind: TKind::Comment, .. }) => {}
71                    Ok(tok) => break Ok(tok),
72                    Err(LexError { pos, res: LexFailure::EOF }) => Err(ParseError::EOF(pos))?,
73                    Err(e) => break Err(ParseError::FromLexer(e)),
74                }
75            },
76        };
77        let next_tok = self.next_tok.insert(next_tok);
78        next_tok.as_ref().map_err(|e| *e)
79    }
80
81    /// Peeks the next token if it matches the `expected` [`TKind`]
82    pub fn peek_if(&mut self, expected: TKind) -> PResult<Option<&Token>> {
83        match self.peek() {
84            Ok(tok) if tok.kind == expected => Ok(Some(tok)),
85            Ok(_) => Ok(None),
86            Err(e) => Err(e),
87        }
88    }
89
90    /// Consumes and returns the currently-peeked [Token].
91    pub fn take(&mut self) -> PResult<Token> {
92        let tok = self
93            .next_tok
94            .take()
95            .unwrap_or(Err(ParseError::UnexpectedEOF(self.last_loc)));
96
97        if let Ok(tok) = &tok {
98            self.last_loc = tok.span;
99            self.elide_do = matches!(
100                tok.kind,
101                TKind::RCurly | TKind::Semi | TKind::DotDot | TKind::DotDotEq
102            )
103        }
104
105        tok
106    }
107
108    /// Consumes the currently-peeked [Token], returning its lexeme without cloning.
109    pub fn take_lexeme(&mut self) -> PResult<Lexeme> {
110        self.take().map(|tok| tok.lexeme)
111    }
112
113    #[allow(clippy::should_implement_trait)]
114    pub fn next(&mut self) -> PResult<Token> {
115        self.peek().no_eof()?;
116        self.take() // .expect("should have token here")
117    }
118
119    /// Consumes and returns the next [`Token`] if it matches the `expected` [`TKind`]
120    pub fn next_if(&mut self, expected: TKind) -> PResult<Result<Token, TKind>> {
121        match self.peek() {
122            Ok(t) if t.kind == expected => self.take().map(Ok),
123            Ok(t) => Ok(Err(t.kind)),
124            Err(e) => Err(e),
125        }
126    }
127
128    /// Parses a list of P separated by `sep` tokens, ending in an `end` token.
129    /// ```ignore
130    /// List<T> = (T sep)* T? end ;
131    /// ```
132    pub fn list<P: Parse<'t>>(
133        &mut self,
134        mut elems: Vec<P>,
135        level: P::Prec,
136        sep: TKind,
137        end: TKind,
138    ) -> PResult<Vec<P>> {
139        // TODO: This loses lexer errors
140        while self.peek_if(end).no_eof()?.is_none() {
141            elems.push(self.parse(level).no_eof()?);
142            match self.peek_if(sep)? {
143                Some(_) => self.consume(),
144                None => break,
145            };
146        }
147        let kind = self.peek().map(Token::kind)?;
148        if kind == end {
149            self.consume();
150        } else if let Ok((first, _)) = kind.split()
151            && first == end
152        {
153            self.split()?;
154        } else {
155            return Err(ParseError::Expected(end, kind, self.span()));
156        }
157        Ok(elems)
158    }
159
160    /// Parses a list of one or more P at level `level`, separated by `sep` tokens
161    /// ```ignore
162    /// UnterminatedList<P> = P (sep P)*
163    /// ```
164    pub fn list_bare<P: Parse<'t>>(
165        &mut self,
166        mut elems: Vec<P>,
167        level: P::Prec,
168        sep: TKind,
169    ) -> PResult<Vec<P>> {
170        loop {
171            let elem = self.parse(level).no_eof()?;
172            elems.push(elem);
173            match self.peek_if(sep) {
174                Ok(Some(_)) => self.consume(),
175                Ok(None) | Err(ParseError::EOF(_)) => break Ok(elems),
176                Err(e) => Err(e)?,
177            };
178        }
179    }
180
181    /// Parses into an [`Option<P>`] if the next token is `next`
182    pub fn opt_if<P: Parse<'t>>(&mut self, level: P::Prec, next: TKind) -> PResult<Option<P>> {
183        Ok(match self.next_if(next)? {
184            Ok(_) => Some(self.parse(level).no_eof()?),
185            Err(_) => None,
186        })
187    }
188
189    /// Parses a P unless the next [Token]'s [TKind] is `end`
190    pub fn opt<P: Parse<'t>>(&mut self, level: P::Prec, end: TKind) -> PResult<Option<P>> {
191        let out = match self.peek_if(end)? {
192            None => Some(self.parse(level).no_eof()?),
193            Some(_) => None,
194        };
195        self.expect(end)?;
196        Ok(out)
197    }
198
199    /// Ensures the next [Token]'s [TKind] is `next`
200    pub fn expect(&mut self, next: TKind) -> PResult<&mut Self> {
201        self.next_if(next)?
202            .map_err(|tk| ParseError::Expected(next, tk, self.span()))?;
203        Ok(self)
204    }
205
206    /// Consumes the currently peeked token without returning it.
207    pub fn consume(&mut self) -> &mut Self {
208        if self.next_tok.as_ref().is_some_and(|tok| tok.is_ok()) {
209            let _ = self.take();
210        }
211        self
212    }
213
214    /// Consumes the next token, and attempts to split it into multiple.
215    ///
216    /// If the next token cannot be split, it will be returned.
217    pub fn split(&mut self) -> PResult<Token> {
218        let Token { lexeme, kind, span } = self.next()?;
219        let kind = match kind.split() {
220            Err(_) => kind,
221            Ok((out, next)) => {
222                self.next_tok = Some(Ok(Token { lexeme: lexeme.clone(), kind: next, span }));
223                out
224            }
225        };
226        Ok(Token { lexeme, kind, span })
227    }
228}
229
230impl<'t> Parse<'t> for Path {
231    type Prec = ();
232
233    fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
234        let mut parts = vec![];
235        if p.next_if(TKind::ColonColon)?.is_ok() {
236            parts.push("".into()); // the "root"
237        }
238        while let Some(Ok(id)) = p.next_if(TKind::Identifier).allow_eof()? {
239            parts.push(
240                id.lexeme
241                    .str()
242                    .expect("Identifier should have String")
243                    .into(),
244            );
245            if let None | Some(Err(_)) = p.next_if(TKind::ColonColon).allow_eof()? {
246                break;
247            }
248        }
249
250        Ok(Path { parts })
251    }
252}
253
254impl<'t> Parse<'t> for Literal {
255    type Prec = ();
256    fn parse(p: &mut Parser<'t>, _level: ()) -> PResult<Self> {
257        let tok = p.peek()?;
258        Ok(match tok.kind {
259            TKind::True => p.consume().then(Literal::Bool(true)),
260            TKind::False => p.consume().then(Literal::Bool(false)),
261            TKind::Character | TKind::Integer | TKind::String => {
262                match p.take().expect("should have Token after peek").lexeme {
263                    Lexeme::String(str) => Literal::Str(str),
264                    Lexeme::Integer(int, base) => Literal::Int(int, base),
265                    Lexeme::Char(chr) => Literal::Char(chr),
266                }
267            }
268            other => Err(ParseError::NotLiteral(other, tok.span))?,
269        })
270    }
271}
272
273impl<'t> Parse<'t> for Use {
274    type Prec = ();
275
276    fn parse(p: &mut Parser<'t>, _level: Self::Prec) -> PResult<Self> {
277        let tok = p.next()?;
278        Ok(match tok.kind {
279            TKind::Star => p.then(Use::Glob),
280            TKind::Identifier => {
281                let name = tok.lexeme.str().expect("should have String").into();
282                match p.peek().map(Token::kind).allow_eof()? {
283                    Some(TKind::ColonColon) => Use::Path(name, p.consume().parse(())?),
284                    Some(TKind::As) => Use::Alias(
285                        name,
286                        p.consume()
287                            .next_if(TKind::Identifier)?
288                            .map_err(|e| ParseError::Expected(TKind::Identifier, e, p.span()))?
289                            .lexeme
290                            .str()
291                            .expect("Identifier should have string")
292                            .into(),
293                    ),
294                    _ => Use::Name(name),
295                }
296            }
297            TKind::LCurly => Use::Tree(p.list(vec![], (), TKind::Comma, TKind::RCurly)?),
298            _ => Err(ParseError::NotUse(tok.kind, tok.span))?,
299        })
300    }
301}
302
303impl<'t, P: Parse<'t> + Annotation> Parse<'t> for At<P> {
304    type Prec = P::Prec;
305    fn parse(p: &mut Parser<'t>, level: P::Prec) -> PResult<Self>
306    where Self: Sized {
307        let start = p.span();
308        Ok(At(p.parse(level)?, start.merge(p.span())))
309    }
310}
311
312impl<'t, P: Parse<'t>> Parse<'t> for Box<P> {
313    type Prec = P::Prec;
314    fn parse(p: &mut Parser<'t>, level: P::Prec) -> PResult<Self>
315    where Self: Sized {
316        Ok(Box::new(p.parse(level)?))
317    }
318}