texlang/vm/
mod.rs

1//! The Texlang virtual machine (VM).
2//!
3//! This module contains the definition of the runtime VM,
4//!     various input streams that wrap the VM
5//!     and the main function that is used to run Texlang.
6//! See the VM documentation in the Texlang book for full documentation.
7
8use super::token::CsName;
9use crate::command;
10use crate::command::BuiltIn;
11use crate::command::Command;
12use crate::error;
13use crate::prelude as txl;
14use crate::texmacro;
15use crate::token;
16use crate::token::lexer;
17use crate::token::trace;
18use crate::token::CsNameInterner;
19use crate::token::Token;
20use crate::token::Value;
21use crate::types;
22use crate::variable;
23use std::collections::HashMap;
24use std::path::PathBuf;
25use texcraft_stdext::collections::groupingmap;
26
27#[cfg(feature = "serde")]
28pub mod serde;
29mod streams;
30pub use streams::*;
31
32/// Implementations of this trait determine how the VM handles non-execution-command tokens.
33///
34/// The main loop of the VM reads the next expanded token and performs
35///     some action based on the token.
36/// Many cases are handled automatically based on the semantics of the TeX language:
37///
38/// | token type | example | action |
39/// | -- | -- | -- |
40/// | execution command | `\def` | run the command |
41/// | variable command | `\count` | assign a value to the corresponding variable |
42/// | token alias | `\a` after `\let\a=a` | run the main VM loop for the token that is aliased |
43/// | begin group character | `{` | begin a group
44/// | end group character | `}` | end the current group
45///
46/// Note that the first three rows can arise from both control sequences and active character tokens.
47///
48/// The remaining cases are not specified by the TeX language but instead by
49///     the business logic of the TeX engine being built.
50/// The behavior in these cases is specified by implementing the associated handler.
51/// These cases and handlers are:
52///
53/// | token type | example | handler | default |
54/// | --- | --- | --- | --- |
55/// | character token | `b` | [character_handler](Handlers::character_handler) | do nothing
56/// | undefined command | `\b` where `\b` was never defined | [undefined_command_handler](Handlers::undefined_command_handler) | return an undefined control sequence error
57/// | unexpanded expansion command | `\the` in `\noexpand\the` | [unexpanded_expansion_command](Handlers::unexpanded_expansion_command) | do nothing
58///
59/// Each of the handlers has the same function signature as an execution command.
60pub trait Handlers<S: TexlangState> {
61    /// Handler to invoke for character tokens.
62    ///
63    /// This token is _not_ invoked for tokens whose category code is begin group (1), end group (2) or active character (13).
64    /// These cases are handled automatically by the VM based on the semantics of the TeX language.
65    ///
66    /// The default implementation is a no-op.
67    fn character_handler(
68        input: &mut ExecutionInput<S>,
69        token: token::Token,
70        character: char,
71    ) -> txl::Result<()> {
72        _ = (input, token, character);
73        Ok(())
74    }
75
76    /// Handler to invoke for math character tokens.
77    ///
78    /// The default implementation throws an error because math character tokens are
79    /// only valid in math mode which is implemented outside of the main VM loop.
80    fn math_character_handler(
81        input: &mut ExecutionInput<S>,
82        token: token::Token,
83        math_character: types::MathCode,
84    ) -> txl::Result<()> {
85        _ = math_character;
86        Err(input.fatal_error(error::SimpleTokenError::new(
87            token,
88            "math characters can only appear in math mode",
89        )))
90    }
91
92    /// Handler to invoke for a control sequence or active character for which no command is defined.
93    ///
94    /// The default implementation throws an undefined command error.
95    fn undefined_command_handler(
96        input: &mut ExecutionInput<S>,
97        token: token::Token,
98    ) -> txl::Result<()> {
99        Err(input.fatal_error(error::UndefinedCommandError::new(input.vm(), token)))
100    }
101
102    /// Handler to invoke for expansion commands that were not expanded.
103    ///
104    /// For example, in the TeX snippet `\noexpand\the`, this handler handles
105    /// the unexpanded `\the` token.
106    ///
107    /// The default implementation is a no-op.
108    fn unexpanded_expansion_command(
109        input: &mut ExecutionInput<S>,
110        token: token::Token,
111    ) -> txl::Result<()> {
112        _ = (token, input);
113        Ok(())
114    }
115
116    /// Handler to invoke when the input ends.
117    ///
118    /// In TeX the user is prompted to add additional input and if no
119    ///     input is provided a fatal error is thrown.
120    /// To end the VM without an error the user has to write `\end`
121    ///     or `\dump`.
122    ///
123    /// In this handler, if `Ok(())` is returned, the VM starts running again
124    ///     under the assumption that additional TeX source has been added to the VM.
125    /// Otherwise the shutdown signal causes the VM to stop.
126    ///
127    /// The default implementation shuts down the VM with no error.
128    fn end_of_input_handler(input: &mut ExecutionInput<S>) -> txl::Result<()> {
129        Err(input.shutdown())
130    }
131}
132
133#[derive(Default)]
134pub struct DefaultHandlers;
135
136impl<S: TexlangState> Handlers<S> for DefaultHandlers {}
137
138impl<S: TexlangState> VM<S> {
139    /// Run the VM.
140    ///
141    /// It is assumed that the VM has been preloaded with TeX source code using the
142    /// [VM::push_source] method.
143    pub fn run<H: Handlers<S>>(&mut self) -> Result<(), Box<error::TracedTexError>> {
144        self.run_impl::<H>();
145        match self.internal.shutdown_status.take() {
146            ShutdownStatus::None => unreachable!(),
147            ShutdownStatus::Normal => Ok(()),
148            ShutdownStatus::Error(traced_error) => Err(Box::new(traced_error)),
149        }
150    }
151    fn run_impl<H: Handlers<S>>(&mut self) -> ShutdownSignal {
152        let input = ExecutionInput::new(self);
153
154        loop {
155            let token = match input.next() {
156                Ok(None) => match H::end_of_input_handler(input) {
157                    Ok(_) => continue,
158                    Err(signal) => return signal,
159                },
160                Ok(Some(token)) => token,
161                Err(signal) => return signal,
162            };
163            let r = match token.value() {
164                Value::CommandRef(command_ref) => {
165                    match input.commands_map().get_command(&command_ref) {
166                        Some(Command::Execution(cmd, _)) => {
167                            let cmd = *cmd;
168                            input
169                                .vm_mut()
170                                .stack_push(token, error::OperationKind::Execution);
171                            let err_or = cmd(token, input);
172                            input.vm_mut().stack_pop();
173                            err_or
174                        }
175                        Some(Command::Variable(cmd)) => {
176                            let cmd = cmd.clone();
177                            let scope = S::variable_assignment_scope_hook(input.state_mut());
178                            cmd.set_value_using_input(token, input, scope)
179                        }
180                        Some(Command::CharacterTokenAlias(token_value)) => {
181                            // TODO: should add tests for when this is begin group and end group.
182                            input.back(Token::new_from_value(*token_value, token.trace_key()));
183                            Ok(())
184                        }
185                        Some(Command::Expansion(_, _)) | Some(Command::Macro(_)) => {
186                            H::unexpanded_expansion_command(input, token)
187                        }
188                        Some(Command::Character(c)) => {
189                            let token = Token::new_other(*c, token.trace_key()); // Remove
190                            H::character_handler(input, token, *c)
191                        }
192                        Some(Command::MathCharacter(c)) => {
193                            H::math_character_handler(input, token, *c)
194                        }
195                        Some(Command::Font(font)) => {
196                            let font = *font;
197                            let scope =
198                                TexlangState::variable_assignment_scope_hook(input.state_mut());
199                            let internal = &mut input.vm_mut().internal;
200                            match scope {
201                                groupingmap::Scope::Local => {
202                                    // If this is the first font assignment in this group,
203                                    // save the current value to the top of the stack. It will
204                                    // be restored from here when the group ends.
205                                    let current_font = internal.current_font;
206                                    if let Some(top) = internal.fonts_save_stack.last_mut() {
207                                        if top.is_none() {
208                                            *top = Some(current_font);
209                                        }
210                                    }
211                                }
212                                groupingmap::Scope::Global => {
213                                    // If this is a global font assignment, clear the stack
214                                    // entirely so that no font will be restored when groups end.
215                                    for font_or in &mut internal.fonts_save_stack {
216                                        *font_or = None;
217                                    }
218                                }
219                            }
220                            internal.current_font = font;
221                            input.state_mut().enable_font_hook(font);
222                            Ok(())
223                        }
224                        None => H::undefined_command_handler(input, token),
225                    }
226                }
227                Value::BeginGroup(_) => {
228                    input.begin_group();
229                    Ok(())
230                }
231                Value::EndGroup(_) => input.end_group(token),
232                Value::MathShift(c)
233                | Value::AlignmentTab(c)
234                | Value::Parameter(c)
235                | Value::Superscript(c)
236                | Value::Subscript(c)
237                | Value::Space(c)
238                | Value::Letter(c)
239                | Value::Other(c) => H::character_handler(input, token, c),
240            };
241            if let Err(signal) = r {
242                return signal;
243            }
244        }
245    }
246
247    pub(crate) fn shutdown(&mut self) -> ShutdownSignal {
248        self.internal.shutdown_status.transition_to_normal();
249        ShutdownSignal {}
250    }
251    pub(crate) fn fatal_error<E: error::TexError>(&mut self, err: E) -> ShutdownSignal {
252        let err: Box<dyn error::TexError> = Box::new(err);
253        let traced = error::TracedTexError::new(
254            err,
255            &self.internal.tracer,
256            &self.internal.cs_name_interner,
257            self.generate_stack_trace(),
258        );
259        self.internal.shutdown_status.transition_to_error(traced);
260        ShutdownSignal {}
261    }
262    pub(crate) fn error<E: error::TexError>(&mut self, err: E) -> txl::Result<()> {
263        let err: Box<dyn error::TexError> = Box::new(err);
264        let traced = error::TracedTexError::new(
265            err,
266            &self.internal.tracer,
267            &self.internal.cs_name_interner,
268            self.generate_stack_trace(),
269        );
270        match self.state.recoverable_error_hook(traced) {
271            Ok(_) => Ok(()),
272            Err(err) => {
273                let traced = error::TracedTexError::new(
274                    err,
275                    &self.internal.tracer,
276                    &self.internal.cs_name_interner,
277                    self.generate_stack_trace(),
278                );
279                self.internal.shutdown_status.transition_to_error(traced);
280                Err(ShutdownSignal {})
281            }
282        }
283    }
284}
285
286#[derive(Debug)]
287struct EndOfGroupError {
288    trace: token::Token,
289}
290
291impl error::TexError for EndOfGroupError {
292    fn kind(&self) -> error::Kind {
293        error::Kind::Token(self.trace)
294    }
295
296    fn title(&self) -> String {
297        "there is no group to end".into()
298    }
299}
300
301/// The Texlang virtual machine.
302pub struct VM<S> {
303    /// The state
304    pub state: S,
305
306    /// The commands map
307    pub commands_map: command::Map<S>,
308
309    /// The working directory which is used as the root for relative file paths
310    ///
311    /// This is [None] if the working directory could not be determined.
312    pub working_directory: Option<std::path::PathBuf>,
313
314    internal: Internal<S>,
315}
316
317/// Mutable references to different parts of the VM.
318pub struct Parts<'a, S> {
319    pub state: &'a mut S,
320    pub cs_name_interner: &'a mut token::CsNameInterner,
321    pub tracer: &'a mut trace::Tracer,
322}
323
324/// Implementations of this trait may be used as the state in a Texlang VM.
325///
326/// The most important thing to know about this trait is that it has no required methods.
327/// For any type it can be implemented trivially:
328/// ```
329/// # use texlang::traits::TexlangState;
330/// struct SomeNewType;
331///
332/// impl TexlangState for SomeNewType {}
333/// ```
334///
335/// Methods of the trait are invoked at certain points when the VM is running,
336///     and in general offer a way of customizing the behavior of the VM.
337/// The trait methods are all dispatched statically, which is important for performance.
338pub trait TexlangState: Sized {
339    /// Get the cat code for the provided character.
340    ///
341    /// The default implementation returns the cat code used in plain TeX.
342    fn cat_code(&self, c: char) -> types::CatCode {
343        types::CatCode::PLAIN_TEX_DEFAULTS
344            .get(c as usize)
345            .copied()
346            .unwrap_or_default()
347    }
348
349    /// Get current end line char, or [None] if it's undefined.
350    ///
351    /// The default implementation returns `Some(\r)`.
352    fn end_line_char(&self) -> Option<char> {
353        Some('\r')
354    }
355
356    /// Get the em width for the current font.
357    ///
358    /// The default implementation returns `12pt`.
359    fn em_width(&self) -> common::Scaled {
360        common::Scaled::ONE * 12
361    }
362
363    /// Get the ex height for the current font.
364    ///
365    /// The default implementation returns `12pt`.
366    fn ex_height(&self) -> common::Scaled {
367        common::Scaled::ONE * 12
368    }
369
370    /// Get the current magnification ratio (e.g. value of \mag).
371    ///
372    /// The default implementation returns `1000`, which corresponds to
373    /// no magnification.
374    fn magnification_ratio(&self) -> i32 {
375        1000
376    }
377
378    /// Hook that is invoked after a TeX macro is expanded.
379    ///
380    /// This hook is designed to support the `\tracingmacros` primitive.
381    fn post_macro_expansion_hook(
382        token: Token,
383        input: &ExpansionInput<Self>,
384        tex_macro: &texmacro::Macro,
385        arguments: &[&[Token]],
386        reversed_expansion: &[Token],
387    ) {
388        _ = (token, input, tex_macro, arguments, reversed_expansion);
389    }
390
391    /// Hook that potentially overrides the expansion of a command.
392    ///
393    /// This hook is invoked before an expandable token is expanded.
394    /// If the result of the hook is a non-empty, that result is considered the expansion of
395    ///   the token
396    /// The result of the hook is not expanded before being returned.
397    ///
398    /// This hook is designed to support the `\noexpand` primitive.
399    fn expansion_override_hook(
400        token: token::Token,
401        input: &mut ExpansionInput<Self>,
402        tag: Option<command::Tag>,
403    ) -> txl::Result<Option<Token>> {
404        _ = (token, input, tag);
405        Ok(None)
406    }
407
408    /// Hook that determines the scope of a variable assignment.
409    ///
410    /// This hook is designed to support the \global and \globaldefs commands.
411    fn variable_assignment_scope_hook(state: &mut Self) -> groupingmap::Scope {
412        _ = state;
413        groupingmap::Scope::Local
414    }
415
416    /// Hook that determines what to do when a recoverable error occurs.
417    ///
418    /// If the hook returns `Ok(())` then the recovery process should run.
419    /// If the hook returns an error, then that error should be returned from the enclosing
420    ///     function and propagated through the VM.
421    ///
422    /// Note that there is no requirement that an error returned from this hook
423    ///     is the same as the error provided to the hook.
424    /// For example, when Knuth's TeX is running in batch mode errors are
425    ///      logged but otherwise ignored.
426    /// However if 100 such errors occur, the interpreter fails.
427    /// To implement this in Texlang, the result of this function would be `Ok(())`
428    ///     for the first 99 errors,
429    ///     but after the 100th error a "too many errors" error would be returned from the hook.
430    /// Note that the returned error in this case is not the 100th error itself.
431    fn recoverable_error_hook(
432        &self,
433        error: error::TracedTexError,
434    ) -> Result<(), Box<dyn error::TexError>> {
435        _ = self;
436        Err(error.error)
437    }
438
439    /// Hook that is invoked when a font is enabled.
440    ///
441    /// For example, after the TeX snippet `\the \textfont 1`, this hook
442    /// is invoked for the font stored in `\textfont 1`.
443    /// The hook is also called if a font needs to be reenabled after
444    /// a group ends.
445    ///
446    /// The default implementation is a no-op.
447    fn enable_font_hook(&mut self, font: types::Font) {
448        _ = font
449    }
450
451    /// Returns whether the command corresponding to the provided tag references
452    /// the currnet font when provided as an argument to a variable.
453    ///
454    /// This is used to implement the `\font` primitive.
455    fn is_current_font_command(&self, tag: command::Tag) -> bool {
456        _ = tag;
457        false
458    }
459}
460
461impl TexlangState for () {}
462
463impl<S: Default> VM<S> {
464    /// Create a new VM with the provided built-in commands.
465    ///
466    /// If the state type satisfies the [`HasDefaultBuiltInCommands`] trait,
467    ///     and you are using the default built-ins,
468    ///     use the [`VM::new`] method instead.
469    pub fn new_with_built_in_commands(built_in_commands: HashMap<&str, BuiltIn<S>>) -> VM<S> {
470        let mut internal = Internal::new(Default::default());
471        let built_in_commands = built_in_commands
472            .into_iter()
473            .map(|(key, value)| (internal.cs_name_interner.get_or_intern(key), value))
474            .collect();
475        VM {
476            state: Default::default(),
477            commands_map: command::Map::new(built_in_commands),
478            internal,
479            working_directory: match std::env::current_dir() {
480                Ok(path_buf) => Some(path_buf),
481                Err(err) => {
482                    println!("failed to determine the working directory: {err}");
483                    None
484                }
485            },
486        }
487    }
488}
489
490impl<S: Default + HasDefaultBuiltInCommands> VM<S> {
491    /// Create a new VM.
492    pub fn new() -> VM<S> {
493        VM::<S>::new_with_built_in_commands(S::default_built_in_commands())
494    }
495}
496
497impl<S: Default + HasDefaultBuiltInCommands> Default for VM<S> {
498    fn default() -> Self {
499        Self::new()
500    }
501}
502
503/// Deserialize a Texlang VM using the provided built-in commands.
504///
505/// If the state type satisfies the [`HasDefaultBuiltInCommands`] trait,
506///     and you are deserializing using the default built-ins,
507///     you don't need to use this function.
508/// You can use the serde deserialize trait directly.
509/// See the [`serde` submodule](serde) for more information on deserialization.
510#[cfg(feature = "serde")]
511impl<'de, S: ::serde::Deserialize<'de>> VM<S> {
512    pub fn deserialize_with_built_in_commands<D: ::serde::Deserializer<'de>>(
513        deserializer: D,
514        built_in_commands: HashMap<&str, BuiltIn<S>>,
515    ) -> Result<Self, D::Error> {
516        serde::deserialize(deserializer, built_in_commands)
517    }
518}
519
520/// States that implement this trait have a default set of built-in commands associated to them.
521///
522/// In general in Texlang, the same state type can be used with different sets of built-in
523///     commands.
524/// However in many situations the state type has a specific set of built-ins
525///     associated to it.
526/// For example, the state type corresponding to pdfTeX is associated with the set of built-ins
527///     provided by pdfTeX.
528///
529/// This trait is used to specify this association.
530/// The benefit is that creating new VMs and deserializing VMs is a bit easier
531///     because the built-in commands don't need to be provided explicitly.
532/// Moreover, if a state implements this trait the associated VM implements serde's deserialize trait.
533pub trait HasDefaultBuiltInCommands: TexlangState {
534    fn default_built_in_commands() -> HashMap<&'static str, BuiltIn<Self>>;
535}
536
537#[cfg(feature = "serde")]
538impl<'de, S: ::serde::Deserialize<'de> + HasDefaultBuiltInCommands> ::serde::Deserialize<'de>
539    for VM<S>
540{
541    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
542    where
543        D: ::serde::Deserializer<'de>,
544    {
545        let built_ins = S::default_built_in_commands();
546        serde::deserialize(deserializer, built_ins)
547    }
548}
549
550impl<S: TexlangState> VM<S> {
551    /// Add new source code to the VM.
552    ///
553    /// TeX input source code is organized as a stack.
554    /// Pushing source code onto the stack will mean it is executed first.
555    pub fn push_source<T1: Into<PathBuf>, T2: Into<String>>(
556        &mut self,
557        file_name: T1,
558        source_code: T2,
559    ) -> txl::Result<()> {
560        self.internal
561            .push_source(None, file_name.into(), source_code.into())
562    }
563}
564
565impl<S> VM<S> {
566    /// Clear all source code from the VM.
567    pub fn clear_sources(&mut self) {
568        self.internal.clear_sources()
569    }
570
571    /// Return a regular hash map with all the commands as they are currently defined.
572    ///
573    /// This function is extremely slow and is only intended to be invoked on error paths.
574    pub fn get_commands_as_map_slow(&self) -> HashMap<&str, BuiltIn<S>> {
575        let map_1: HashMap<CsName, BuiltIn<S>> = self.commands_map.to_hash_map_slow();
576        let mut map = HashMap::new();
577        for (cs_name, cmd) in map_1 {
578            let cs_name_str = match self.internal.cs_name_interner.resolve(cs_name) {
579                None => continue,
580                Some(cs_name_str) => cs_name_str,
581            };
582            map.insert(cs_name_str, cmd);
583        }
584        map
585    }
586
587    /// Return a reference to the control sequence name string interner.
588    ///
589    /// This interner can be used to resolve [CsName] types into regular strings.
590    #[inline]
591    pub fn cs_name_interner(&self) -> &CsNameInterner {
592        &self.internal.cs_name_interner
593    }
594    #[inline]
595    /// TODO: just put the CS name interner in the VM?
596    pub fn cs_name_interner_mut(&mut self) -> &mut CsNameInterner {
597        &mut self.internal.cs_name_interner
598    }
599
600    fn begin_group(&mut self) {
601        self.commands_map.begin_group();
602        self.internal.save_stack.push(Default::default());
603        self.internal.fonts_save_stack.push(None);
604    }
605
606    pub fn trace(&self, token: Token) -> trace::SourceCodeTrace {
607        self.internal
608            .tracer
609            .trace(token, &self.internal.cs_name_interner)
610    }
611
612    pub fn trace_end_of_input(&self) -> trace::SourceCodeTrace {
613        self.internal.tracer.trace_end_of_input()
614    }
615
616    /// Returns the number of current sources on the source stack
617    pub fn num_current_sources(&self) -> usize {
618        self.internal.sources.len() + 1
619    }
620
621    pub fn generate_stack_trace(&self) -> Vec<error::StackTraceElement> {
622        self.internal
623            .execution_stack
624            .iter()
625            .map(|(op_kind, token)| error::StackTraceElement {
626                context: *op_kind,
627                token: *token,
628                trace: self
629                    .internal
630                    .tracer
631                    .trace(*token, &self.internal.cs_name_interner),
632            })
633            .collect()
634    }
635    pub(crate) fn stack_push(&mut self, token: Token, op_kind: error::OperationKind) {
636        self.internal.execution_stack.push((op_kind, token));
637    }
638    pub(crate) fn stack_pop(&mut self) {
639        self.internal.execution_stack.pop();
640    }
641    pub fn current_font(&self) -> types::Font {
642        self.internal.current_font
643    }
644}
645
646impl<S: TexlangState> VM<S> {
647    fn end_group(&mut self, token: token::Token) -> txl::Result<()> {
648        // Restore commands
649        match self.commands_map.end_group() {
650            Ok(()) => (),
651            Err(_) => return Err(self.fatal_error(EndOfGroupError { trace: token })),
652        }
653        // Restore variable values
654        let group = self.internal.save_stack.pop().unwrap();
655        group.restore(ExecutionInput::new(self));
656        // Restore fonts
657        if let Some(font) = self.internal.fonts_save_stack.pop().unwrap() {
658            self.internal.current_font = font;
659            self.state.enable_font_hook(font);
660        }
661        Ok(())
662    }
663}
664
665/// Parts of the VM that are private.
666// We have serde(bound="") because otherwise serde tries to put a `Default` bound on S.
667#[cfg_attr(
668    feature = "serde",
669    derive(::serde::Serialize, ::serde::Deserialize),
670    serde(bound = "")
671)]
672struct Internal<S> {
673    // The sources form a stack. We store the top element directly on the VM
674    // for performance reasons.
675    current_source: Source,
676    sources: Vec<Source>,
677
678    cs_name_interner: CsNameInterner,
679
680    tracer: trace::Tracer,
681
682    // Token buffers are thrown away in serialization - there's nothing we need to keep.
683    #[cfg_attr(feature = "serde", serde(skip))]
684    token_buffers: std::collections::BinaryHeap<TokenBuffer>,
685
686    // The save stack is handled manually in (de)serialization.
687    // We need to use special logic in combination with the command map in order to serialize the
688    // variable pointers that are in the stack.
689    #[cfg_attr(feature = "serde", serde(skip))]
690    save_stack: Vec<variable::SaveStackElement<S>>,
691
692    current_font: types::Font,
693    fonts_save_stack: Vec<Option<types::Font>>,
694    execution_stack: Vec<(error::OperationKind, Token)>,
695
696    // We assume the VM is never saved during shutdown.
697    #[cfg_attr(feature = "serde", serde(skip))]
698    shutdown_status: ShutdownStatus,
699}
700
701impl<S> Internal<S> {
702    fn new(cs_name_interner: CsNameInterner) -> Self {
703        Internal {
704            current_source: Default::default(),
705            sources: Default::default(),
706            cs_name_interner,
707            tracer: Default::default(),
708            token_buffers: Default::default(),
709            save_stack: Default::default(),
710            current_font: types::Font::NULL_FONT,
711            fonts_save_stack: Default::default(),
712            execution_stack: Default::default(),
713            shutdown_status: Default::default(),
714        }
715    }
716}
717impl<S: TexlangState> Internal<S> {
718    fn push_source(
719        &mut self,
720        token: Option<Token>,
721        file_name: PathBuf,
722        source_code: String,
723    ) -> txl::Result<()> {
724        let trace_key_range =
725            self.tracer
726                .register_source_code(token, trace::Origin::File(file_name), &source_code);
727        let mut new_source = Source::new(source_code, trace_key_range);
728        std::mem::swap(&mut new_source, &mut self.current_source);
729        // TODO: if the current top source is empty, we should skip this.
730        // Check this is working by looking at the JSON serialization.
731        self.sources.push(new_source);
732        Ok(())
733    }
734
735    fn end_current_file(&mut self) {
736        self.current_source.root.end()
737    }
738}
739impl<S> Internal<S> {
740    fn clear_sources(&mut self) {
741        self.current_source = Default::default();
742        self.sources.clear();
743    }
744
745    #[inline]
746    fn push_expansion(&mut self, expansion: &[Token]) {
747        self.current_source
748            .expansions
749            .extend(expansion.iter().rev());
750    }
751
752    #[inline]
753    fn expansions(&self) -> &Vec<Token> {
754        &self.current_source.expansions
755    }
756
757    #[inline]
758    fn expansions_mut(&mut self) -> &mut Vec<Token> {
759        &mut self.current_source.expansions
760    }
761
762    fn pop_source(&mut self) -> bool {
763        // We should set the current_source to be Default::default() if there is no additional source.
764        // Check this is working by looking at the JSON serialization.
765        match self.sources.pop() {
766            None => false,
767            Some(source) => {
768                self.current_source = source;
769                true
770            }
771        }
772    }
773}
774
775#[cfg_attr(feature = "serde", derive(::serde::Serialize, ::serde::Deserialize))]
776struct Source {
777    expansions: Vec<Token>,
778    root: lexer::Lexer,
779}
780
781impl Source {
782    pub fn new(source_code: String, trace_key_range: trace::KeyRange) -> Source {
783        Source {
784            expansions: Vec::with_capacity(32),
785            root: lexer::Lexer::new(source_code, trace_key_range),
786        }
787    }
788}
789
790impl Default for Source {
791    fn default() -> Self {
792        Source::new("".into(), trace::KeyRange::empty())
793    }
794}
795
796#[derive(Default)]
797struct TokenBuffer(Vec<Token>);
798
799impl PartialEq for TokenBuffer {
800    fn eq(&self, other: &Self) -> bool {
801        self.0.capacity() == other.0.capacity()
802    }
803}
804
805impl Eq for TokenBuffer {}
806
807impl PartialOrd for TokenBuffer {
808    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
809        Some(self.cmp(other))
810    }
811}
812
813impl Ord for TokenBuffer {
814    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
815        self.0.capacity().cmp(&other.0.capacity())
816    }
817}
818
819/// A signal that the VM is shutting down.
820///
821/// A value of this type is returned in the error payload of
822///     the [`Result`](crate::prelude::Result) of Texlang commands and basically all other Texlang functions.
823/// The only thing to do with the signal is to propagate it up the
824///     Rust call stack using Rust's `?` operator.
825/// Eventually the signal will reach the main VM loop, and the VM will stop.
826///
827/// The stop signal should _not_ be ignored or otherwise "handled".
828/// For example, this code is incorrect:
829///
830/// ```
831/// # use texlang::token;
832/// # use texlang::vm;
833/// # use texlang::traits::*;
834/// # use texlang::prelude as txl;
835/// fn execution_primitive_fn<S: TexlangState>(
836///    token: token::Token,
837///    input: &mut vm::ExecutionInput<S>,
838///) -> txl::Result<()> {
839///     let i = match i32::parse(input) {
840///         Ok(i) => i,
841///         Err(_shutdown_signal) => {
842///             // This is incorrect - the shutdown signal must be propagated!
843///             0
844///         }
845///     };
846///     println!["Parsed integer {i}"];
847///     Ok(())
848/// }
849/// ```
850///
851/// In this case the VM will eventually panic when it realizes that the shutdown was ignored.
852/// The correct code is this:
853///
854/// ```
855/// # use texlang::token;
856/// # use texlang::vm;
857/// # use texlang::traits::*;
858/// # use texlang::prelude as txl;
859/// fn execution_primitive_fn<S: TexlangState>(
860///    token: token::Token,
861///    input: &mut vm::ExecutionInput<S>,
862///) -> txl::Result<()> {
863///     let i = i32::parse(input)?;
864///     println!["Parsed integer {i}"];
865///     Ok(())
866/// }
867/// ```
868/// ## Generating the shutdown signal
869///
870/// The signal can originate either with a fatal error,
871///     or from a TeX control
872///     sequence that wants to stop execution (e.g. the `\end` primitive).
873#[derive(Debug)]
874pub struct ShutdownSignal {}
875
876#[derive(Debug, Default)]
877enum ShutdownStatus {
878    /// The VM is not shutting down.
879    #[default]
880    None,
881    /// The VM is shuting down for an expected reason.
882    Normal,
883    /// The VM is shuting down because of a fatal error.
884    Error(error::TracedTexError),
885}
886
887impl ShutdownStatus {
888    fn transition_to_normal(&mut self) {
889        if !matches!(self, ShutdownStatus::None) {
890            panic!("shutdown signal ignored")
891        }
892        *self = ShutdownStatus::Normal;
893    }
894    fn transition_to_error(&mut self, err: error::TracedTexError) {
895        if !matches!(self, ShutdownStatus::None) {
896            panic!("shutdown signal ignored")
897        }
898        *self = ShutdownStatus::Error(err);
899    }
900    fn take(&mut self) -> ShutdownStatus {
901        let mut s = ShutdownStatus::None;
902        std::mem::swap(self, &mut s);
903        s
904    }
905}
906
907/// Helper trait for implementing the component pattern in Texlang.
908///
909/// The component pattern is a ubiquitous design pattern in Texlang.
910/// It is used when implementing TeX commands that require state.
911/// An example of a stateful TeX command is `\year`, which needs to store the current year somewhere.
912///
913/// When the component pattern is used, a stateful TeX command
914///     can have a single implementation that
915///     is used by multiple TeX engines built with Texlang.
916/// Additionally, a specific TeX engine can compose many different
917///     stateful TeX commands together without worrying about conflicts between their state.
918/// The component pattern is Texlang's main solution to the problem of
919///     global mutable state that is pervasive in the original implementation of TeX.
920///
921/// In the component pattern, the state
922///     needed by a specific command like `\year` is isolated in a _component_, which is a concrete
923///     Rust type like a struct.
924/// This Rust type is the generic type `C` in the trait.
925/// The stateful command (e.g. `\year`) is defined in the same Rust module as the component.
926/// The internals of the component are made private to the module it is defined in.
927/// This means the state can only be mutated by the command (or commands) implemented in the module.
928///
929/// In order to function, the command needs to have access to an instance of the component in which
930///     the command will maintain its state.
931/// The `HasComponent` trait enforces this.
932/// Any VM state type that contains the component can implement the trait.
933/// The Rust code defining the
934///     command specifies the trait in its trait bounds, and uses the trait to access the component.
935///
936/// The pattern enables Texlang code to be composed as follows.
937/// Different VM states can include the same component and thus reuse the same commands.
938/// Combining multiple commands into one state just involves having the
939///     VM state include all of the relevant components.
940///
941/// Notes:
942///
943/// - In general state is shared by multiple commands. Such commands must be defined in the
944///   same Rust module to support this.
945///   For example, `\countdef` shares state with `\count`,
946///   and they are implemented together.
947///
948/// - Commands don't necessarily have state: for example, `\def`, `\advance` and `\the`.
949///   These commands
950///   are defined without trait bounds on the state, and work automatically with any TeX
951///   software built with Texlang.
952///
953/// - The easiest way to include a component in the state is to make it a direct field
954///   of the state.
955///   In this case the [implement_has_component] macro can be used to easily implement the
956///   trait.
957///   The Texlang standard library uses this approach.
958///
959/// ## The [TexlangState] requirement
960///
961/// This trait requires that the type also implements [TexlangState].
962/// This is only to reduce the number of trait bounds that need to be explicitly
963///     specified when implementing TeX commands.
964/// In general every command needs to have a bound of the form `S: TexlangState`.
965/// Commands that have a `HasComponent` bound don't need to include this other bound explicitly.
966pub trait HasComponent<C>: TexlangState {
967    /// Return a immutable reference to the component.
968    fn component(&self) -> &C;
969
970    /// Return a mutable reference to the component.
971    fn component_mut(&mut self) -> &mut C;
972}
973
974/// This macro is for implementing the [HasComponent] trait in the special (but common)
975///     case when the state is a struct and the component is a direct field of the struct.
976///
977/// ## Examples
978///
979/// Implementing a single component:
980///
981/// ```
982/// # mod library_1{
983/// #   pub struct Component;
984/// # }
985/// # use texlang::vm::implement_has_component;
986/// # use texlang::traits::*;
987/// #
988/// struct MyState {
989///     component: library_1::Component,
990/// }
991///
992/// impl TexlangState for MyState {}
993///
994/// implement_has_component![MyState{
995///     component: library_1::Component,
996/// }];
997/// ```
998///
999/// Implementing multiple components:
1000///
1001/// ```
1002/// # mod library_1{
1003/// #   pub struct Component;
1004/// # }
1005/// # mod library_2{
1006/// #   pub struct Component;
1007/// # }
1008/// # use texlang::vm::implement_has_component;
1009/// # use texlang::traits::*;
1010/// #
1011/// struct MyState {
1012///     component_1: library_1::Component,
1013///     component_2: library_2::Component,
1014/// }
1015///
1016/// impl TexlangState for MyState {}
1017///
1018/// implement_has_component![MyState{
1019///     component_1: library_1::Component,
1020///     component_2: library_2::Component,
1021/// }];
1022/// ```
1023#[macro_export]
1024macro_rules! implement_has_component {
1025    ($type: path {
1026        $( $field: ident: $component: path ),+ $(,)?
1027    }) => {
1028        $(
1029            impl ::texlang::vm::HasComponent<$component> for $type {
1030                #[inline]
1031                fn component(&self) -> &$component {
1032                    &self.$field
1033                }
1034                #[inline]
1035                fn component_mut(&mut self) -> &mut $component {
1036                    &mut self.$field
1037                }
1038            }
1039        )*
1040    };
1041}
1042
1043pub use implement_has_component;