texlang/parse/
mod.rs

1//! Logic for parsing elements of the TeX grammar from token streams.
2//!
3//! This parsing module is based around the [Parsable] trait, which is the most important type in the module.
4//! This trait is implemented by Rust types that correspond to elements of the TeX grammar.
5//! The trait implementation provides a way to parse grammar elements out of the input stream.
6//!
7//! The module contains implementations of [Parsable] for tuples where each element is parsable.
8//! This allows expressions like `<integer><relation><integer>` to be parsed by one invocation
9//!     of [Parsable::parse], in this case on the type `(i32, std::cmp::Ordering, i32)`.
10//!
11//! The second most important thing is the collection of custom Rust types like [OptionalEquals] and
12//!     [FileLocation] which correspond to Rust grammar elements.
13//!
14//! Finally this module contains some functions for special situation like parsing lists of tokens.
15
16#[macro_use]
17mod helpers;
18
19mod dimen;
20mod filelocation;
21mod glue;
22mod integer;
23mod keyword;
24mod relation;
25#[cfg(test)]
26mod testing;
27mod variable;
28
29pub use filelocation::FileLocation;
30pub use integer::Uint;
31pub use keyword::parse_keyword;
32pub use relation::Ordering;
33pub use variable::OptionalEquals;
34pub use variable::OptionalEqualsUnexpanded;
35
36use crate::prelude as txl;
37use crate::traits::*;
38use crate::types::CatCode;
39use crate::*;
40
41/// Implementations of this trait are elements of the TeX grammar than can be parsed from a stream of tokens.
42pub trait Parsable: Sized {
43    /// Parses a value from an input stream.
44    ///
45    /// This method just delegates to [Parsable::parse_impl].
46    #[inline]
47    fn parse<S: TexlangState, I>(input: &mut I) -> txl::Result<Self>
48    where
49        I: AsMut<vm::ExpandedStream<S>>,
50    {
51        Parsable::parse_impl(input.as_mut())
52    }
53
54    /// Parses a value from the [vm::ExpandedStream].
55    fn parse_impl<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<Self>;
56}
57
58#[derive(Debug)]
59pub struct Error {
60    pub expected: String,
61    pub got: Option<token::Token>,
62    pub got_override: String,
63    pub annotation_override: String,
64    pub guidance: String,
65    pub additional_notes: Vec<String>,
66}
67
68impl error::TexError for Error {
69    fn kind(&self) -> error::Kind {
70        match self.got {
71            None => error::Kind::EndOfInput,
72            Some(token) => error::Kind::Token(token),
73        }
74    }
75
76    fn title(&self) -> String {
77        let got = if self.got_override.is_empty() {
78            match self.got {
79                None => "the input ended".to_string(),
80                Some(token) => match token.value() {
81                    token::Value::Letter(c) => format!["found the letter {c}"],
82                    token::Value::Other(c) => format!["found a non-letter character {c}"],
83                    _ => match (token.char(), token.cat_code()) {
84                        (Some(c), Some(code)) => {
85                            format!["found a token with value {c} and category code {code}"]
86                        }
87                        _ => "found a control sequence".to_string(),
88                    },
89                },
90            }
91        } else {
92            self.got_override.clone()
93        };
94        format!["expected {}, instead {}", self.expected, got]
95    }
96
97    fn notes(&self) -> Vec<error::display::Note> {
98        vec![self.guidance.clone().into()]
99    }
100
101    fn source_annotation(&self) -> String {
102        if !self.annotation_override.is_empty() {
103            return self.annotation_override.clone();
104        }
105        error::TexError::default_source_annotation(self)
106    }
107}
108
109impl Error {
110    pub fn new<T: Into<String>, R: Into<String>>(
111        expected: T,
112        got: Option<token::Token>,
113        guidance: R,
114    ) -> Self {
115        Error {
116            expected: expected.into(),
117            got,
118            got_override: "".into(),
119            annotation_override: "".into(),
120            guidance: guidance.into(),
121            additional_notes: vec![],
122        }
123    }
124
125    pub fn with_got_override<T: Into<String>>(mut self, got_override: T) -> Self {
126        self.got_override = got_override.into();
127        self
128    }
129
130    pub fn with_annotation_override<T: Into<String>>(mut self, annotation_override: T) -> Self {
131        self.annotation_override = annotation_override.into();
132        self
133    }
134}
135
136macro_rules! generate_tuple_impls {
137    ( $first: ident ) => {};
138    ( $first: ident, $( $name: ident ),+ ) => {
139        generate_tuple_impls![ $( $name ),+];
140
141        impl<$first : Parsable, $( $name : Parsable ),+> Parsable for ($first, $( $name ),+) {
142            fn parse_impl<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<Self> {
143                Ok(($first::parse(input)?, $( $name::parse(input)? ),+))
144            }
145        }
146    };
147}
148
149generate_tuple_impls![T1, T2, T3, T4, T5];
150
151impl Parsable for Option<token::CommandRef> {
152    // TeX.2021.get_r_token
153    // TeX.2021.1215
154    fn parse_impl<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<Self> {
155        // Implements get_r_token
156        while let Some(found_equals) = get_optional_element![
157            input.unexpanded(),
158            token::Value::Space(_) => true,
159        ] {
160            if found_equals {
161                break;
162            }
163        }
164        let ref_or = get_required_element![
165            input.unexpanded(),
166            "a control sequence or active character",
167            "a command must be a control sequence or an active character",
168            token::Value::CommandRef(command_ref) => command_ref,
169        ];
170        Ok(ref_or)
171    }
172}
173
174impl Parsable for Vec<token::Token> {
175    fn parse_impl<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<Self> {
176        let mut result = input.checkout_token_buffer();
177        let first_token = input.next_or_err(TokenStreamEndOfInputError {})?;
178        let got = match first_token.value() {
179            token::Value::CommandRef(command_ref) => {
180                match input.commands_map().get_command(&command_ref) {
181                    Some(command::Command::Variable(cmd)) => {
182                        if let crate::variable::ValueRef::TokenList(token_list) =
183                            cmd.clone().value(first_token, input)?
184                        {
185                            result.extend(token_list.iter());
186                            return Ok(result);
187                        };
188                        "a variable command of the wrong type (wanted a token list)"
189                    }
190                    Some(_) => "a command that is not a variable command",
191                    None => "an undefined command",
192                }
193            }
194            token::Value::BeginGroup(_) => {
195                finish_parsing_balanced_tokens(input, &mut result)?;
196                return Ok(result);
197            }
198            _ => "a non-command, non-opening brace token",
199        };
200        input.return_token_buffer(result);
201        Err(input.fatal_error(
202            parse::Error::new(
203                "an opening brace or a variable of type token list",
204                Some(first_token),
205                "",
206            )
207            .with_got_override(format!("got {got}"))
208            .with_annotation_override(got),
209        ))
210    }
211}
212
213#[derive(Debug)]
214struct TokenStreamEndOfInputError;
215
216impl error::EndOfInputError for TokenStreamEndOfInputError {
217    fn doing(&self) -> String {
218        "parsing a token list".into()
219    }
220}
221
222/// Parses balanced tokens from the stream.
223///
224/// This function assumes the the initial opening brace has ready been consumed.
225/// It returns false if the input ends before balanced tokens completed.
226///
227/// This function is analogous to `scan_toks(true, true)` in Knuth's TeX.
228pub fn finish_parsing_balanced_tokens<S: vm::TokenStream>(
229    stream: &mut S,
230    result: &mut Vec<token::Token>,
231) -> txl::Result<()> {
232    let mut scope_depth = 0;
233    loop {
234        let token = stream.next_or_err(TokenStreamEndOfInputError {})?;
235        match token.value() {
236            token::Value::BeginGroup(_) => {
237                scope_depth += 1;
238            }
239            token::Value::EndGroup(_) => {
240                if scope_depth == 0 {
241                    return Ok(());
242                }
243                scope_depth -= 1;
244            }
245            _ => (),
246        }
247        result.push(token);
248    }
249}
250
251/// When parsed, this type consumes an arbitrary number of spaces from the input stream
252///
253/// TODO: we should audit all places Knuth uses this, and ensure we're using it too.
254///
255/// TeX.2021.406
256pub struct Spaces;
257
258impl Parsable for Spaces {
259    fn parse_impl<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<Self> {
260        while let Some(token) = input.next()? {
261            match token.value() {
262                token::Value::Space(_) => {
263                    continue;
264                }
265                _ => {
266                    input.back(token);
267                    break;
268                }
269            }
270        }
271        Ok(Spaces {})
272    }
273}
274
275/// When parsed, this type consumes an arbitrary number of spaces from the unexpanded input stream
276pub struct SpacesUnexpanded;
277
278impl Parsable for SpacesUnexpanded {
279    fn parse_impl<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<Self> {
280        let input = input.unexpanded();
281        while let Some(token) = input.next()? {
282            match token.value() {
283                token::Value::Space(_) => {
284                    continue;
285                }
286                _ => {
287                    input.back(token);
288                    break;
289                }
290            }
291        }
292        Ok(SpacesUnexpanded {})
293    }
294}
295
296impl Parsable for Option<char> {
297    fn parse_impl<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<Self> {
298        let Some(token) = input.next()? else {
299            return Ok(None);
300        };
301        let c = match token.value() {
302            token::Value::BeginGroup(_)
303            | token::Value::EndGroup(_)
304            | token::Value::MathShift(_)
305            | token::Value::AlignmentTab(_)
306            | token::Value::Parameter(_)
307            | token::Value::Superscript(_)
308            | token::Value::Subscript(_)
309            | token::Value::Space(_) => {
310                input.back(token);
311                return Ok(None);
312            }
313            token::Value::Letter(c) => c,
314            token::Value::Other(c) => c,
315            token::Value::CommandRef(command_ref) => {
316                match input.commands_map().get_command(&command_ref) {
317                    Some(command::Command::Character(c)) => *c,
318                    _ => {
319                        input.back(token);
320                        return Ok(None);
321                    }
322                }
323            }
324        };
325        Ok(Some(c))
326    }
327}
328
329/// When parsed, this type consumes an optional space from the token stream.
330pub struct OptionalSpace;
331
332impl Parsable for OptionalSpace {
333    fn parse_impl<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<Self> {
334        // TeX.2021.443
335        if let Some(next) = input.next()? {
336            if next.cat_code() != Some(CatCode::Space) {
337                input.back(next);
338            }
339        }
340        Ok(OptionalSpace {})
341    }
342}