texlang/parse/
mod.rs

1//! Logic for parsing elements of the TeX grammar from token streams.
2//!
3//! This parsing module is based around the [Parsable] trait, which is the most important type in the module.
4//! This trait is implemented by Rust types that correspond to elements of the TeX grammar.
5//! The trait implementation provides a way to parse grammar elements out of the input stream.
6//!
7//! The module contains implementations of [Parsable] for tuples where each element is parsable.
8//! This allows expressions like `<integer><relation><integer>` to be parsed by one invocation
9//!     of [Parsable::parse], in this case on the type `(i32, std::cmp::Ordering, i32)`.
10//!
11//! The second most important thing is the collection of custom Rust types like [OptionalEquals] and
12//!     [FileLocation] which correspond to Rust grammar elements.
13//!
14//! Finally this module contains some functions for special situation like parsing lists of tokens.
15
16#[macro_use]
17mod helpers;
18
19mod dimen;
20mod filelocation;
21mod glue;
22mod integer;
23mod keyword;
24mod relation;
25#[cfg(test)]
26mod testing;
27mod variable;
28
29pub use filelocation::FileLocation;
30pub use integer::Uint;
31pub use keyword::parse_keyword;
32pub use relation::Ordering;
33pub use variable::OptionalEquals;
34pub use variable::OptionalEqualsUnexpanded;
35
36use crate::prelude as txl;
37use crate::traits::*;
38use crate::types::CatCode;
39use crate::*;
40
41/// Implementations of this trait are elements of the TeX grammar than can be parsed from a stream of tokens.
42pub trait Parsable: Sized {
43    /// Parses a value from an input stream.
44    ///
45    /// This method just delegates to [Parsable::parse_impl].
46    #[inline]
47    fn parse<S: TexlangState, I>(input: &mut I) -> txl::Result<Self>
48    where
49        I: AsMut<vm::ExpandedStream<S>>,
50    {
51        Parsable::parse_impl(input.as_mut())
52    }
53
54    /// Parses a value from the [vm::ExpandedStream].
55    fn parse_impl<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<Self>;
56}
57
58#[derive(Debug)]
59pub struct Error {
60    pub expected: String,
61    pub got: Option<token::Token>,
62    pub got_override: String,
63    pub annotation_override: String,
64    pub guidance: String,
65    pub additional_notes: Vec<String>,
66}
67
68impl error::TexError for Error {
69    fn kind(&self) -> error::Kind {
70        match self.got {
71            None => error::Kind::EndOfInput,
72            Some(token) => error::Kind::Token(token),
73        }
74    }
75
76    fn title(&self) -> String {
77        let got = if self.got_override.is_empty() {
78            match self.got {
79                None => "the input ended".to_string(),
80                Some(token) => match token.value() {
81                    token::Value::Letter(c) => format!["found the letter {c}"],
82                    token::Value::Other(c) => format!["found a non-letter character {c}"],
83                    _ => match (token.char(), token.cat_code()) {
84                        (Some(c), Some(code)) => {
85                            format!["found a token with value {c} and category code {code}"]
86                        }
87                        _ => "found a control sequence".to_string(),
88                    },
89                },
90            }
91        } else {
92            self.got_override.clone()
93        };
94        format!["expected {}, instead {}", self.expected, got]
95    }
96
97    fn notes(&self) -> Vec<error::display::Note> {
98        vec![self.guidance.clone().into()]
99    }
100
101    fn source_annotation(&self) -> String {
102        if !self.annotation_override.is_empty() {
103            return self.annotation_override.clone();
104        }
105        error::TexError::default_source_annotation(self)
106    }
107}
108
109impl Error {
110    pub fn new<T: Into<String>, R: Into<String>>(
111        expected: T,
112        got: Option<token::Token>,
113        guidance: R,
114    ) -> Self {
115        Error {
116            expected: expected.into(),
117            got,
118            got_override: "".into(),
119            annotation_override: "".into(),
120            guidance: guidance.into(),
121            additional_notes: vec![],
122        }
123    }
124
125    pub fn with_got_override<T: Into<String>>(mut self, got_override: T) -> Self {
126        self.got_override = got_override.into();
127        self
128    }
129
130    pub fn with_annotation_override<T: Into<String>>(mut self, annotation_override: T) -> Self {
131        self.annotation_override = annotation_override.into();
132        self
133    }
134}
135
136macro_rules! generate_tuple_impls {
137    ( $first: ident ) => {};
138    ( $first: ident, $( $name: ident ),+ ) => {
139        generate_tuple_impls![ $( $name ),+];
140
141        impl<$first : Parsable, $( $name : Parsable ),+> Parsable for ($first, $( $name ),+) {
142            fn parse_impl<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<Self> {
143                Ok(($first::parse(input)?, $( $name::parse(input)? ),+))
144            }
145        }
146    };
147}
148
149generate_tuple_impls![T1, T2, T3, T4, T5];
150
151impl Parsable for Option<token::CommandRef> {
152    // TeX.2021.get_r_token
153    // TeX.2021.1215
154    fn parse_impl<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<Self> {
155        // Implements get_r_token
156        while let Some(found_equals) = get_optional_element![
157            input.unexpanded(),
158            token::Value::Space(_) => true,
159        ] {
160            if found_equals {
161                break;
162            }
163        }
164        let ref_or = get_required_element![
165            input.unexpanded(),
166            "a control sequence or active character",
167            "a command must be a control sequence or an active character",
168            token::Value::CommandRef(command_ref) => command_ref,
169        ];
170        Ok(ref_or)
171    }
172}
173
174pub struct LeftBrace;
175
176impl Parsable for LeftBrace {
177    // TeX.2021.403 scan_left_brace
178    fn parse_impl<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<Self> {
179        get_required_element![
180            input.unexpanded(),
181            "an opening brace",
182            "a balanced token list must start with an opening brace",
183            token::Value::BeginGroup(_) => (),
184        ];
185        Ok(LeftBrace {})
186    }
187}
188
189pub struct UnexpandedTokenList(pub Vec<token::Token>);
190
191impl Parsable for UnexpandedTokenList {
192    fn parse_impl<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<Self> {
193        let mut result = input.checkout_token_buffer();
194        LeftBrace::parse(input)?;
195        finish_parsing_balanced_tokens(input.unexpanded(), &mut result)?;
196        Ok(UnexpandedTokenList(result))
197    }
198}
199
200impl Parsable for Vec<token::Token> {
201    fn parse_impl<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<Self> {
202        let mut result = input.checkout_token_buffer();
203        let first_token = input.next_or_err(TokenStreamEndOfInputError {})?;
204        let got = match first_token.value() {
205            token::Value::CommandRef(command_ref) => {
206                match input.commands_map().get_command(&command_ref) {
207                    Some(command::Command::Variable(cmd)) => {
208                        if let crate::variable::ValueRef::TokenList(token_list) =
209                            cmd.clone().value(first_token, input)?
210                        {
211                            result.extend(token_list.iter());
212                            return Ok(result);
213                        };
214                        "a variable command of the wrong type (wanted a token list)"
215                    }
216                    Some(_) => "a command that is not a variable command",
217                    None => "an undefined command",
218                }
219            }
220            token::Value::BeginGroup(_) => {
221                finish_parsing_balanced_tokens(input, &mut result)?;
222                return Ok(result);
223            }
224            _ => "a non-command, non-opening brace token",
225        };
226        input.return_token_buffer(result);
227        Err(input.fatal_error(
228            parse::Error::new(
229                "an opening brace or a variable of type token list",
230                Some(first_token),
231                "",
232            )
233            .with_got_override(format!("got {got}"))
234            .with_annotation_override(got),
235        ))
236    }
237}
238
239#[derive(Debug)]
240struct TokenStreamEndOfInputError;
241
242impl error::EndOfInputError for TokenStreamEndOfInputError {
243    fn doing(&self) -> String {
244        "parsing a token list".into()
245    }
246}
247
248/// Parses balanced tokens from the stream.
249///
250/// This function assumes the the initial opening brace has ready been consumed.
251/// It returns false if the input ends before balanced tokens completed.
252///
253/// This function is somewhat analogous to `scan_toks` in Knuth's TeX.
254/// For us the `xpand` parameter can be controlled by providing a different token stream.
255pub fn finish_parsing_balanced_tokens<S: vm::TokenStream>(
256    stream: &mut S,
257    result: &mut Vec<token::Token>,
258) -> txl::Result<()> {
259    let mut scope_depth = 0;
260    loop {
261        let token = stream.next_or_err(TokenStreamEndOfInputError {})?;
262        match token.value() {
263            token::Value::BeginGroup(_) => {
264                scope_depth += 1;
265            }
266            token::Value::EndGroup(_) => {
267                if scope_depth == 0 {
268                    return Ok(());
269                }
270                scope_depth -= 1;
271            }
272            _ => (),
273        }
274        result.push(token);
275    }
276}
277
278/// When parsed, this type consumes an arbitrary number of spaces from the input stream
279///
280/// TODO: we should audit all places Knuth uses this, and ensure we're using it too.
281///
282/// TeX.2021.406
283pub struct Spaces;
284
285impl Parsable for Spaces {
286    fn parse_impl<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<Self> {
287        while let Some(token) = input.next()? {
288            match token.value() {
289                token::Value::Space(_) => {
290                    continue;
291                }
292                _ => {
293                    input.back(token);
294                    break;
295                }
296            }
297        }
298        Ok(Spaces {})
299    }
300}
301
302/// When parsed, this type consumes an arbitrary number of spaces from the unexpanded input stream
303pub struct SpacesUnexpanded;
304
305impl Parsable for SpacesUnexpanded {
306    fn parse_impl<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<Self> {
307        let input = input.unexpanded();
308        while let Some(token) = input.next()? {
309            match token.value() {
310                token::Value::Space(_) => {
311                    continue;
312                }
313                _ => {
314                    input.back(token);
315                    break;
316                }
317            }
318        }
319        Ok(SpacesUnexpanded {})
320    }
321}
322
323impl Parsable for Option<char> {
324    fn parse_impl<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<Self> {
325        let Some(token) = input.next()? else {
326            return Ok(None);
327        };
328        let c = match token.value() {
329            token::Value::BeginGroup(_)
330            | token::Value::EndGroup(_)
331            | token::Value::MathShift(_)
332            | token::Value::AlignmentTab(_)
333            | token::Value::Parameter(_)
334            | token::Value::Superscript(_)
335            | token::Value::Subscript(_)
336            | token::Value::Space(_) => {
337                input.back(token);
338                return Ok(None);
339            }
340            token::Value::Letter(c) => c,
341            token::Value::Other(c) => c,
342            token::Value::CommandRef(command_ref) => {
343                match input.commands_map().get_command(&command_ref) {
344                    Some(command::Command::Character(c)) => *c,
345                    _ => {
346                        input.back(token);
347                        return Ok(None);
348                    }
349                }
350            }
351        };
352        Ok(Some(c))
353    }
354}
355
356/// When parsed, this type consumes an optional space from the token stream.
357pub struct OptionalSpace;
358
359impl Parsable for OptionalSpace {
360    fn parse_impl<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<Self> {
361        // TeX.2021.443
362        if let Some(next) = input.next()? {
363            if next.cat_code() != Some(CatCode::Space) {
364                input.back(next);
365            }
366        }
367        Ok(OptionalSpace {})
368    }
369}