texlang/vm/
streams.rs

1use std::path::PathBuf;
2
3use super::TexlangState;
4use crate::prelude as txl;
5use crate::token::trace;
6use crate::token::Token;
7use crate::*;
8
9/// A stream of tokens generated on demand.
10///
11/// This trait describes a general stream of tokens where the front of the stream may
12/// retrieved using [TokenStream::next] or peeked at using [TokenStream::peek].
13/// In practice, all [TokenStreams](TokenStream) in Texlang
14/// are either [ExecutionInput], [ExpansionInput] or [UnexpandedStream].
15/// This trait exists to allow a generic function to accept any of these types.
16///
17/// # Note on lazy loading
18///
19/// The simplest example of a stream is a vector of tokens. However, streams are more general
20/// than this and can encompass situations in which the full contents cannot be determined in
21/// advance. This can be thought of as "lazy loading" for the tokens.
22/// The classic example of this kind of stream comes from the following LaTeX
23/// snippet:
24/// ```tex
25/// \makeatletter \do@
26/// ```
27/// Assuming the default TeX catcode map, if we were to parse this input all at once we would
28/// get three tokens: the control sequence `makeatletter`, the control sequence `do`, and a
29/// single character token with value `@` and catcode "other". This is not the correct result,
30/// though: the first control sequence changes the tokenization rules such that `@` is now
31/// an admissible character in the name of a control sequence. The correct input is thus
32/// the control sequence `makeatletter` followed by the control sequence `do@`.
33pub trait TokenStream {
34    /// The type of the custom state in the VM.
35    type S: TexlangState;
36
37    /// Gets the next token in the stream or error if the stream is exhausted.
38    ///
39    /// This method is almost the same
40    /// as the `next` method in Rust's iterator trait, except a stream can return an error.
41    fn next_or_err<E: error::EndOfInputError>(&mut self, err: E) -> txl::Result<Token> {
42        match self.next() {
43            Ok(None) => Err(self.fatal_error(error::EofError::new(err))),
44            Ok(Some(token)) => Ok(token),
45            Err(err) => Err(err),
46        }
47    }
48
49    /// Gets the next token in the stream or `Ok(None)` if the stream is exhausted.
50    fn next(&mut self) -> txl::Result<Option<Token>>;
51
52    fn peek(&mut self) -> txl::Result<Option<Token>> {
53        let token_or = self.next()?;
54        if let Some(token) = token_or {
55            self.back(token);
56        }
57        Ok(token_or)
58    }
59
60    /// Returns a token to the front of the token stream.
61    fn back(&mut self, token: Token);
62
63    /// Returns a reference to the VM.
64    fn vm(&self) -> &vm::VM<Self::S>;
65
66    /// Informs the VM that a fatal error has occurred.
67    ///
68    /// This fatal error causes the VM to shutdown.
69    ///
70    /// The returned shutdown signal must be propagated
71    ///     by returning `Err(shutdown_signal)` to the calling code.
72    /// If the shutdown signal is ignored, the VM will eventually panic.
73    /// See [`super::ShutdownSignal`] for more information.
74    #[must_use]
75    fn fatal_error<E: error::TexError>(&mut self, err: E) -> super::ShutdownSignal;
76
77    /// Informs the VM that a recoverable error has occurred.
78    ///
79    /// The VM responds either with `Ok(())`,
80    ///     indicating that the error should be recovered from,
81    /// or `Err(ShutdownSignal{})`,
82    ///     indicating that the error is fatal and the VM is shutting down.
83    ///
84    /// In the error case,
85    ///     the returned shutdown signal must be propagated using Rust's `?` operator.
86    /// If the shutdown signal is ignored, the VM will eventually panic.
87    /// See [`super::ShutdownSignal`] for more information.
88    fn error<E: error::TexError>(&mut self, err: E) -> txl::Result<()>;
89
90    /// Returns a reference to the commands map.
91    #[inline]
92    fn commands_map(&self) -> &command::Map<Self::S> {
93        &self.vm().commands_map
94    }
95
96    /// Returns a reference to the custom state.
97    #[inline]
98    fn state(&self) -> &Self::S {
99        &self.vm().state
100    }
101}
102
103/// A [TokenStream] that performs expansion.
104///
105/// The unexpanded tokens are retrieved from the unexpanded stream returned by the
106/// [unexpanded](ExpandedStream::unexpanded) method.
107#[repr(transparent)]
108pub struct ExpandedStream<S>(UnexpandedStream<S>);
109
110impl<S> std::convert::AsMut<ExpandedStream<S>> for ExpandedStream<S> {
111    fn as_mut(&mut self) -> &mut ExpandedStream<S> {
112        self
113    }
114}
115
116impl<S: TexlangState> ExpandedStream<S> {
117    /// Returns the underlying unexpanded stream.
118    pub fn unexpanded(&mut self) -> &mut UnexpandedStream<S> {
119        &mut self.0
120    }
121
122    /// Expand the next token in the input.
123    ///
124    /// This method only expands a single token. If, after the expansion, the next token
125    /// is expandable it will not be expanded.
126    pub fn expand_once(&mut self) -> txl::Result<bool> {
127        stream::expand_once(self.vm_mut())
128    }
129
130    pub fn checkout_token_buffer(&mut self) -> Vec<Token> {
131        self.0 .0.internal.token_buffers.pop().unwrap_or_default().0
132    }
133
134    /// Return a token buffer, allowing it to be reused.
135    pub fn return_token_buffer(&mut self, mut token_buffer: Vec<Token>) {
136        token_buffer.clear();
137        self.0
138             .0
139            .internal
140            .token_buffers
141            .push(super::TokenBuffer(token_buffer))
142    }
143
144    pub(crate) fn vm_mut(&mut self) -> &mut vm::VM<S> {
145        &mut self.0 .0
146    }
147
148    /// Returns a mutable reference to the expanded tokens stack for the current input source.
149    ///
150    /// The tokens are a stack, so the next token is the last token in the vector.
151    #[inline]
152    pub fn expansions_mut(&mut self) -> &mut Vec<Token> {
153        self.0 .0.internal.expansions_mut()
154    }
155}
156
157impl<S: TexlangState> TokenStream for ExpandedStream<S> {
158    type S = S;
159
160    #[inline]
161    fn next(&mut self) -> txl::Result<Option<Token>> {
162        stream::next_expanded(&mut self.unexpanded().0)
163    }
164
165    #[inline]
166    fn vm(&self) -> &vm::VM<Self::S> {
167        &self.0 .0
168    }
169
170    #[inline]
171    fn back(&mut self, token: Token) {
172        self.0.back(token)
173    }
174    fn fatal_error<E: error::TexError>(&mut self, err: E) -> super::ShutdownSignal {
175        self.0 .0.fatal_error(err)
176    }
177    fn error<E: error::TexError>(&mut self, err: E) -> txl::Result<()> {
178        self.0 .0.error(err)
179    }
180}
181
182/// Stream that returns input tokens without performing expansion.
183///
184/// The unexpanded stream is used when reading tokens without performing expansion;
185/// e.g., when reading the replacement text for a macro defined using `\def`.
186///
187/// It be obtained from either the [ExecutionInput] or the [ExpansionInput]
188/// using the [ExpandedStream] trait methods.
189#[repr(transparent)]
190pub struct UnexpandedStream<S>(vm::VM<S>);
191
192impl<S: TexlangState> TokenStream for UnexpandedStream<S> {
193    type S = S;
194
195    #[inline]
196    fn next(&mut self) -> txl::Result<Option<Token>> {
197        stream::next_unexpanded(&mut self.0)
198    }
199
200    #[inline]
201    fn vm(&self) -> &vm::VM<S> {
202        &self.0
203    }
204
205    #[inline]
206    fn back(&mut self, token: Token) {
207        self.0.internal.expansions_mut().push(token)
208    }
209    fn fatal_error<E: error::TexError>(&mut self, err: E) -> super::ShutdownSignal {
210        self.0.fatal_error(err)
211    }
212    fn error<E: error::TexError>(&mut self, err: E) -> txl::Result<()> {
213        self.0.error(err)
214    }
215}
216
217/// Input type for expansion primitives.
218///
219/// This type provides:
220///
221/// - Access to the input stream (with or without expansion). Its implementation of the [TokenStream]
222///   trait returns expanded tokens.
223///   To read the input stream without performing expansion, use the
224///   [unexpanded](ExpandedStream::unexpanded) method.
225///
226/// - Read only access to the VM.
227///
228/// - The ability to push source code or token expansions to the front of the input stream.
229///   For source code use [ExpansionInput::push_source];
230///   for tokens use [ExpansionInput::push_expansion] or [ExpansionInput::expansions_mut].
231///
232/// - Access to token buffers using the [ExpansionInput::checkout_token_buffer] and
233///   [ExpansionInput::return_token_buffer] methods.
234///
235/// This type is also used in the parsing code for situations where both an
236/// [ExpansionInput] or [ExecutionInput] is accepted. We use this type because
237/// it has only read access to the VM, and so casting does not escalate privileges.
238#[repr(transparent)]
239// TODO: should this be in the command module, not in the vm module?
240pub struct ExpansionInput<S>(ExpandedStream<S>);
241
242impl<S> std::convert::AsMut<ExpandedStream<S>> for ExpansionInput<S> {
243    fn as_mut(&mut self) -> &mut ExpandedStream<S> {
244        &mut self.0
245    }
246}
247
248impl<S: TexlangState> TokenStream for ExpansionInput<S> {
249    type S = S;
250
251    fn next(&mut self) -> txl::Result<Option<Token>> {
252        self.0.next()
253    }
254
255    fn vm(&self) -> &vm::VM<Self::S> {
256        self.0.vm()
257    }
258
259    fn back(&mut self, token: Token) {
260        self.0.back(token);
261    }
262    fn fatal_error<E: error::TexError>(&mut self, err: E) -> super::ShutdownSignal {
263        self.0 .0 .0.fatal_error(err)
264    }
265    fn error<E: error::TexError>(&mut self, err: E) -> txl::Result<()> {
266        self.0 .0 .0.error(err)
267    }
268}
269
270impl<S> ExpansionInput<S> {
271    /// Creates a mutable reference to this type from the [VM](vm::VM) type.
272    #[inline]
273    pub fn new(vm: &mut vm::VM<S>) -> &mut ExpansionInput<S> {
274        unsafe { &mut *(vm as *mut vm::VM<S> as *mut ExpansionInput<S>) }
275    }
276}
277
278impl<S: TexlangState> ExpansionInput<S> {
279    /// Push source code to the front of the input stream.
280    #[inline]
281    pub fn push_source(
282        &mut self,
283        token: Token,
284        file_name: PathBuf,
285        source_code: String,
286    ) -> txl::Result<()> {
287        self.0
288             .0
289             .0
290            .internal
291            .push_source(Some(token), file_name, source_code)
292    }
293
294    /// End the current file.
295    ///
296    /// This method is used by `\endinput` primitive.
297    pub fn end_current_file(&mut self) {
298        self.0 .0 .0.internal.end_current_file()
299    }
300
301    pub fn push_string_tokens(&mut self, token: Token, s: &str) {
302        let trace_key = token.trace_key();
303        for c in s.chars().rev() {
304            let token = match c {
305                ' ' => token::Token::new_space(' ', trace_key),
306                _ => token::Token::new_letter(c, trace_key),
307            };
308            self.expansions_mut().push(token);
309        }
310    }
311}
312
313impl<S> ExpansionInput<S> {
314    #[inline]
315    pub fn unexpanded(&mut self) -> &mut UnexpandedStream<S> {
316        &mut self.0 .0
317    }
318
319    #[inline]
320    pub fn expanded(&mut self) -> &mut ExpandedStream<S> {
321        &mut self.0
322    }
323
324    /// Push tokens to the front of the input stream.
325    ///
326    /// The first token in the provided slice will be the next token read.
327    // TODO: destroy
328    #[inline]
329    pub fn push_expansion(&mut self, expansion: &[Token]) {
330        self.0 .0 .0.internal.push_expansion(expansion)
331    }
332
333    /// Returns a reference to the expanded tokens stack for the current input source.
334    ///
335    /// The tokens are a stack, so the next token is the last token in the vector.
336    ///
337    /// Adding tokens to the front of the input using this method can be more efficient
338    /// than using [ExpansionInput::push_expansion] because an allocation is avoided.
339    #[inline]
340    pub fn expansions(&self) -> &Vec<Token> {
341        self.0 .0 .0.internal.expansions()
342    }
343
344    /// Returns a mutable reference to the expanded tokens stack for the current input source.
345    ///
346    /// The tokens are a stack, so the next token is the last token in the vector.
347    ///
348    /// Adding tokens to the front of the input using this method can be more efficient
349    /// than using [ExpansionInput::push_expansion] because an allocation is avoided.
350    #[inline]
351    pub fn expansions_mut(&mut self) -> &mut Vec<Token> {
352        self.0 .0 .0.internal.expansions_mut()
353    }
354
355    #[inline]
356    pub fn state_and_expansions_mut(&mut self) -> (&S, &mut Vec<Token>) {
357        (&self.0 .0 .0.state, self.0 .0 .0.internal.expansions_mut())
358    }
359
360    /// Returns a vector than can be used as a token buffer, potentially without allocating memory.
361    ///
362    /// The returned vector is empty, but will generally have non-zero capacity from previous uses of the buffer.
363    /// Reusing the allocated memory results in fewer allocations overall.
364    /// This buffer mechanism was first introduced in a successful attempt to improve the performance of the
365    /// TeX macros implementation.
366    ///
367    /// When finished with the buffer, please return it using [return_token_buffer](ExpansionInput::return_token_buffer).
368    ///
369    /// This API may feel a bit awkward - it would seem nicer to return a mutable reference to a buffer instead.
370    /// Doing this while keeping the borrow checker happy is very difficult and (as is often the case) for good reason.
371    /// Token buffers are often used in macro expansion, and at any point in time multiple macros may be in
372    ///     the process of expansion.
373    /// This getting "the" token buffer to use for expansion would be incorrect, as the multiple expansions
374    /// would step on each other.
375    pub fn checkout_token_buffer(&mut self) -> Vec<Token> {
376        self.0
377             .0
378             .0
379            .internal
380            .token_buffers
381            .pop()
382            .unwrap_or_default()
383            .0
384    }
385
386    /// Return a token buffer, allowing it to be reused.
387    pub fn return_token_buffer(&mut self, mut token_buffer: Vec<Token>) {
388        token_buffer.clear();
389        self.0
390             .0
391             .0
392            .internal
393            .token_buffers
394            .push(super::TokenBuffer(token_buffer))
395    }
396}
397
398/// Input type for execution primitives.
399///
400/// This type provides:
401///
402/// - Access to the input stream (with or without expansion). Its implementation of the [TokenStream]
403///   trait returns expanded tokens.
404///   To read the input stream without performing expansion, use the
405///   [unexpanded](ExpandedStream::unexpanded) method.
406///
407/// - Mutable access to the state and the commands map
408///   the [ExecutionInput::state_mut]
409///   and [ExecutionInput::commands_map_mut] methods.
410#[repr(transparent)]
411pub struct ExecutionInput<S>(ExpandedStream<S>);
412
413impl<S> std::convert::AsMut<ExpandedStream<S>> for ExecutionInput<S> {
414    fn as_mut(&mut self) -> &mut ExpandedStream<S> {
415        &mut self.0
416    }
417}
418
419impl<S: TexlangState> TokenStream for ExecutionInput<S> {
420    type S = S;
421
422    fn next(&mut self) -> txl::Result<Option<Token>> {
423        self.0.next()
424    }
425
426    fn vm(&self) -> &vm::VM<Self::S> {
427        self.0.vm()
428    }
429
430    fn back(&mut self, token: Token) {
431        self.0.back(token);
432    }
433    fn fatal_error<E: error::TexError>(&mut self, err: E) -> super::ShutdownSignal {
434        self.0 .0 .0.fatal_error(err)
435    }
436    fn error<E: error::TexError>(&mut self, err: E) -> txl::Result<()> {
437        self.0 .0 .0.error(err)
438    }
439}
440
441impl<S: TexlangState> ExecutionInput<S> {
442    /// Shutdown the VM.
443    pub fn shutdown(&mut self) -> super::ShutdownSignal {
444        self.0 .0 .0.shutdown()
445    }
446}
447
448impl<S> ExecutionInput<S> {
449    /// Creates a mutable reference to this type from the [VM](vm::VM) type.
450    #[inline]
451    pub fn new(vm: &mut vm::VM<S>) -> &mut ExecutionInput<S> {
452        unsafe { &mut *(vm as *mut vm::VM<S> as *mut ExecutionInput<S>) }
453    }
454
455    #[inline]
456    pub fn unexpanded(&mut self) -> &mut UnexpandedStream<S> {
457        &mut self.0 .0
458    }
459
460    #[inline]
461    pub fn commands_map_mut(&mut self) -> &mut command::Map<S> {
462        &mut self.0 .0 .0.commands_map
463    }
464
465    /// Returns a mutable reference to the state.
466    #[inline]
467    pub fn state_mut(&mut self) -> &mut S {
468        &mut self.0 .0 .0.state
469    }
470    /// Returns a mutable reference to the tracer.
471    pub fn tracer_mut(&mut self) -> &mut trace::Tracer {
472        &mut self.0 .0 .0.internal.tracer
473    }
474    /// Returns a [vm::Parts] struct contains mutable references to different parts of the VM.
475    #[inline]
476    pub fn vm_parts(&mut self) -> vm::Parts<'_, S> {
477        let vm = &mut self.0 .0 .0;
478        vm::Parts {
479            state: &mut vm.state,
480            cs_name_interner: &mut vm.internal.cs_name_interner,
481            tracer: &mut vm.internal.tracer,
482        }
483    }
484
485    // TODO: pass in the token and keep it as a reference
486    pub fn begin_group(&mut self) {
487        self.0 .0 .0.begin_group()
488    }
489
490    #[inline]
491    pub(crate) fn groups(&mut self) -> &mut [variable::SaveStackElement<S>] {
492        &mut self.0 .0 .0.internal.save_stack
493    }
494
495    pub(crate) fn current_group_mut(&mut self) -> Option<(&mut variable::SaveStackElement<S>, &S)> {
496        match self.0 .0 .0.internal.save_stack.last_mut() {
497            None => None,
498            Some(g) => Some((g, &self.0 .0 .0.state)),
499        }
500    }
501    pub(crate) fn vm_mut(&mut self) -> &mut vm::VM<S> {
502        &mut self.0 .0 .0
503    }
504
505    /// Return a token buffer, allowing it to be reused.
506    pub fn return_token_buffer(&mut self, mut token_buffer: Vec<Token>) {
507        token_buffer.clear();
508        self.0
509             .0
510             .0
511            .internal
512            .token_buffers
513            .push(super::TokenBuffer(token_buffer))
514    }
515}
516
517impl<S: TexlangState> ExecutionInput<S> {
518    pub fn end_group(&mut self, token: Token) -> txl::Result<()> {
519        self.0 .0 .0.end_group(token)
520    }
521}
522
523mod stream {
524    use super::*;
525    use crate::token::lexer;
526    use crate::token::lexer::Config;
527
528    impl<T: TexlangState> Config for T {
529        #[inline]
530        fn cat_code(&self, c: char) -> crate::types::CatCode {
531            self.cat_code(c)
532        }
533        // TODO: implement \endlinechar
534        #[inline]
535        fn end_line_char(&self) -> Option<char> {
536            self.end_line_char()
537        }
538    }
539
540    #[inline]
541    pub fn next_unexpanded<S: TexlangState>(vm: &mut vm::VM<S>) -> txl::Result<Option<Token>> {
542        if let Some(token) = vm.internal.current_source.expansions.pop() {
543            return Ok(Some(token));
544        }
545        match vm.internal.current_source.root.next(
546            &vm.state,
547            &mut vm.internal.cs_name_interner,
548            false,
549        ) {
550            lexer::Result::Token(token) => {
551                return Ok(Some(token));
552            }
553            lexer::Result::InvalidCharacter(c, trace_key) => {
554                return Err(build_invalid_character_error(vm, c, trace_key));
555            }
556            // The EndOfLine case is never returned from the lexer but we silently handle it.
557            lexer::Result::EndOfLine | lexer::Result::EndOfInput => {}
558        }
559        if !vm.internal.pop_source() {
560            return Ok(None);
561        }
562        next_unexpanded(vm)
563    }
564
565    fn build_invalid_character_error<S: TexlangState>(
566        vm: &mut vm::VM<S>,
567        c: char,
568        trace_key: trace::Key,
569    ) -> vm::ShutdownSignal {
570        vm.fatal_error(lexer::InvalidCharacterError::new(vm, c, trace_key))
571    }
572
573    pub fn next_expanded<S: TexlangState>(vm: &mut vm::VM<S>) -> txl::Result<Option<Token>> {
574        let (token, command) = match next_unexpanded(vm)? {
575            None => return Ok(None),
576            Some(token) => match token.value() {
577                token::Value::CommandRef(command_ref) => {
578                    (token, vm.commands_map.get_command(&command_ref))
579                }
580                _ => return Ok(Some(token)),
581            },
582        };
583        match command {
584            Some(command::Command::Expansion(command, tag)) => {
585                let command = *command;
586                let tag = *tag;
587                match S::expansion_override_hook(token, ExpansionInput::new(vm), tag) {
588                    Ok(None) => (),
589                    Ok(Some(override_expansion)) => {
590                        return Ok(Some(override_expansion));
591                    }
592                    Err(err) => return Err(err),
593                };
594                vm.stack_push(token, error::OperationKind::Expansion);
595                let err_or = command(token, ExpansionInput::new(vm));
596                vm.stack_pop();
597                err_or?;
598                next_expanded(vm)
599            }
600            Some(command::Command::Macro(command)) => {
601                let command = command.clone();
602                command.call(token, ExpansionInput::new(vm))?;
603                next_expanded(vm)
604            }
605            _ => Ok(Some(token)),
606        }
607    }
608
609    pub fn expand_once<S: TexlangState>(vm: &mut vm::VM<S>) -> txl::Result<bool> {
610        let (token, command) = match next_unexpanded(vm)? {
611            None => return Ok(false),
612            Some(token) => match token.value() {
613                token::Value::CommandRef(command_ref) => {
614                    (token, vm.commands_map.get_command(&command_ref))
615                }
616                _ => {
617                    vm.internal.expansions_mut().push(token);
618                    return Ok(false);
619                }
620            },
621        };
622        match command {
623            Some(command::Command::Expansion(command, tag)) => {
624                let command = *command;
625                let tag = *tag;
626                match S::expansion_override_hook(token, ExpansionInput::new(vm), tag) {
627                    Ok(None) => (),
628                    Ok(Some(override_expansion)) => {
629                        vm.internal.expansions_mut().push(override_expansion);
630                        return Ok(true);
631                    }
632                    Err(err) => return Err(err),
633                };
634                vm.stack_push(token, error::OperationKind::Expansion);
635                let err_or = command(token, ExpansionInput::new(vm));
636                vm.stack_pop();
637                err_or?;
638                Ok(true)
639            }
640            Some(command::Command::Macro(command)) => {
641                let command = command.clone();
642                command.call(token, ExpansionInput::new(vm))?;
643                Ok(true)
644            }
645            _ => {
646                vm.internal.expansions_mut().push(token);
647                Ok(false)
648            }
649        }
650    }
651}