texlang/vm/mod.rs
1//! The Texlang virtual machine (VM).
2//!
3//! This module contains the definition of the runtime VM,
4//! various input streams that wrap the VM
5//! and the main function that is used to run Texlang.
6//! See the VM documentation in the Texlang book for full documentation.
7
8use super::token::CsName;
9use crate::command;
10use crate::command::BuiltIn;
11use crate::command::Command;
12use crate::error;
13use crate::prelude as txl;
14use crate::texmacro;
15use crate::token;
16use crate::token::lexer;
17use crate::token::trace;
18use crate::token::CsNameInterner;
19use crate::token::Token;
20use crate::token::Value;
21use crate::types;
22use crate::variable;
23use std::collections::HashMap;
24use std::path::PathBuf;
25use texcraft_stdext::collections::groupingmap;
26
27#[cfg(feature = "serde")]
28pub mod serde;
29mod streams;
30pub use streams::*;
31
32/// Implementations of this trait determine how the VM handles non-execution-command tokens.
33///
34/// The main loop of the VM reads the next expanded token and performs
35/// some action based on the token.
36/// Many cases are handled automatically based on the semantics of the TeX language:
37///
38/// | token type | example | action |
39/// | -- | -- | -- |
40/// | execution command | `\def` | run the command |
41/// | variable command | `\count` | assign a value to the corresponding variable |
42/// | token alias | `\a` after `\let\a=a` | run the main VM loop for the token that is aliased |
43/// | begin group character | `{` | begin a group
44/// | end group character | `}` | end the current group
45///
46/// Note that the first three rows can arise from both control sequences and active character tokens.
47///
48/// The remaining cases are not specified by the TeX language but instead by
49/// the business logic of the TeX engine being built.
50/// The behavior in these cases is specified by implementing the associated handler.
51/// These cases and handlers are:
52///
53/// | token type | example | handler | default |
54/// | --- | --- | --- | --- |
55/// | character token | `b` | [character_handler](Handlers::character_handler) | do nothing
56/// | undefined command | `\b` where `\b` was never defined | [undefined_command_handler](Handlers::undefined_command_handler) | return an undefined control sequence error
57/// | unexpanded expansion command | `\the` in `\noexpand\the` | [unexpanded_expansion_command](Handlers::unexpanded_expansion_command) | do nothing
58///
59/// Each of the handlers has the same function signature as an execution command.
60pub trait Handlers<S: TexlangState> {
61 /// Handler to invoke for character tokens.
62 ///
63 /// This token is _not_ invoked for tokens whose category code is begin group (1), end group (2) or active character (13).
64 /// These cases are handled automatically by the VM based on the semantics of the TeX language.
65 ///
66 /// The default implementation is a no-op.
67 fn character_handler(
68 input: &mut ExecutionInput<S>,
69 token: token::Token,
70 character: char,
71 ) -> txl::Result<()> {
72 _ = (input, token, character);
73 Ok(())
74 }
75
76 /// Handler to invoke for math character tokens.
77 ///
78 /// The default implementation throws an error because math character tokens are
79 /// only valid in math mode which is implemented outside of the main VM loop.
80 fn math_character_handler(
81 input: &mut ExecutionInput<S>,
82 token: token::Token,
83 math_character: types::MathCode,
84 ) -> txl::Result<()> {
85 _ = math_character;
86 Err(input.fatal_error(error::SimpleTokenError::new(
87 token,
88 "math characters can only appear in math mode",
89 )))
90 }
91
92 /// Handler to invoke for a control sequence or active character for which no command is defined.
93 ///
94 /// The default implementation throws an undefined command error.
95 fn undefined_command_handler(
96 input: &mut ExecutionInput<S>,
97 token: token::Token,
98 ) -> txl::Result<()> {
99 Err(input.fatal_error(error::UndefinedCommandError::new(input.vm(), token)))
100 }
101
102 /// Handler to invoke for expansion commands that were not expanded.
103 ///
104 /// For example, in the TeX snippet `\noexpand\the`, this handler handles
105 /// the unexpanded `\the` token.
106 ///
107 /// The default implementation is a no-op.
108 fn unexpanded_expansion_command(
109 input: &mut ExecutionInput<S>,
110 token: token::Token,
111 ) -> txl::Result<()> {
112 _ = (token, input);
113 Ok(())
114 }
115
116 /// Handler to invoke when the input ends.
117 ///
118 /// In TeX the user is prompted to add additional input and if no
119 /// input is provided a fatal error is thrown.
120 /// To end the VM without an error the user has to write `\end`
121 /// or `\dump`.
122 ///
123 /// In this handler, if `Ok(())` is returned, the VM starts running again
124 /// under the assumption that additional TeX source has been added to the VM.
125 /// Otherwise the shutdown signal causes the VM to stop.
126 ///
127 /// The default implementation shuts down the VM with no error.
128 fn end_of_input_handler(input: &mut ExecutionInput<S>) -> txl::Result<()> {
129 Err(input.shutdown())
130 }
131}
132
133#[derive(Default)]
134pub struct DefaultHandlers;
135
136impl<S: TexlangState> Handlers<S> for DefaultHandlers {}
137
138impl<S: TexlangState> VM<S> {
139 /// Run the VM.
140 ///
141 /// It is assumed that the VM has been preloaded with TeX source code using the
142 /// [VM::push_source] method.
143 pub fn run<H: Handlers<S>>(&mut self) -> Result<(), Box<error::TracedTexError>> {
144 self.run_impl::<H>();
145 match self.internal.shutdown_status.take() {
146 ShutdownStatus::None => unreachable!(),
147 ShutdownStatus::Normal => Ok(()),
148 ShutdownStatus::Error(traced_error) => Err(Box::new(traced_error)),
149 }
150 }
151 fn run_impl<H: Handlers<S>>(&mut self) -> ShutdownSignal {
152 let input = ExecutionInput::new(self);
153
154 loop {
155 let token = match input.next() {
156 Ok(None) => match H::end_of_input_handler(input) {
157 Ok(_) => continue,
158 Err(signal) => return signal,
159 },
160 Ok(Some(token)) => token,
161 Err(signal) => return signal,
162 };
163 let r = match token.value() {
164 Value::CommandRef(command_ref) => {
165 match input.commands_map().get_command(&command_ref) {
166 Some(Command::Execution(cmd, _)) => {
167 let cmd = *cmd;
168 input
169 .vm_mut()
170 .stack_push(token, error::OperationKind::Execution);
171 let err_or = cmd(token, input);
172 input.vm_mut().stack_pop();
173 err_or
174 }
175 Some(Command::Variable(cmd)) => {
176 let cmd = cmd.clone();
177 let scope = S::variable_assignment_scope_hook(input.state_mut());
178 cmd.set_value_using_input(token, input, scope)
179 }
180 Some(Command::CharacterTokenAlias(token_value)) => {
181 // TODO: should add tests for when this is begin group and end group.
182 input.back(Token::new_from_value(*token_value, token.trace_key()));
183 Ok(())
184 }
185 Some(Command::Expansion(_, _)) | Some(Command::Macro(_)) => {
186 H::unexpanded_expansion_command(input, token)
187 }
188 Some(Command::Character(c)) => {
189 let token = Token::new_other(*c, token.trace_key()); // Remove
190 H::character_handler(input, token, *c)
191 }
192 Some(Command::MathCharacter(c)) => {
193 H::math_character_handler(input, token, *c)
194 }
195 Some(Command::Font(font)) => {
196 let font = *font;
197 let scope =
198 TexlangState::variable_assignment_scope_hook(input.state_mut());
199 let internal = &mut input.vm_mut().internal;
200 match scope {
201 groupingmap::Scope::Local => {
202 // If this is the first font assignment in this group,
203 // save the current value to the top of the stack. It will
204 // be restored from here when the group ends.
205 let current_font = internal.current_font;
206 if let Some(top) = internal.fonts_save_stack.last_mut() {
207 if top.is_none() {
208 *top = Some(current_font);
209 }
210 }
211 }
212 groupingmap::Scope::Global => {
213 // If this is a global font assignment, clear the stack
214 // entirely so that no font will be restored when groups end.
215 for font_or in &mut internal.fonts_save_stack {
216 *font_or = None;
217 }
218 }
219 }
220 internal.current_font = font;
221 input.state_mut().enable_font_hook(font);
222 Ok(())
223 }
224 None => H::undefined_command_handler(input, token),
225 }
226 }
227 Value::BeginGroup(_) => {
228 input.begin_group();
229 Ok(())
230 }
231 Value::EndGroup(_) => input.end_group(token),
232 Value::MathShift(c)
233 | Value::AlignmentTab(c)
234 | Value::Parameter(c)
235 | Value::Superscript(c)
236 | Value::Subscript(c)
237 | Value::Space(c)
238 | Value::Letter(c)
239 | Value::Other(c) => H::character_handler(input, token, c),
240 };
241 if let Err(signal) = r {
242 return signal;
243 }
244 }
245 }
246
247 pub(crate) fn shutdown(&mut self) -> ShutdownSignal {
248 self.internal.shutdown_status.transition_to_normal();
249 ShutdownSignal {}
250 }
251 pub(crate) fn fatal_error<E: error::TexError>(&mut self, err: E) -> ShutdownSignal {
252 let err: Box<dyn error::TexError> = Box::new(err);
253 let traced = error::TracedTexError::new(
254 err,
255 &self.internal.tracer,
256 &self.internal.cs_name_interner,
257 self.generate_stack_trace(),
258 );
259 self.internal.shutdown_status.transition_to_error(traced);
260 ShutdownSignal {}
261 }
262 pub(crate) fn error<E: error::TexError>(&mut self, err: E) -> txl::Result<()> {
263 let err: Box<dyn error::TexError> = Box::new(err);
264 let traced = error::TracedTexError::new(
265 err,
266 &self.internal.tracer,
267 &self.internal.cs_name_interner,
268 self.generate_stack_trace(),
269 );
270 match self.state.recoverable_error_hook(traced) {
271 Ok(_) => Ok(()),
272 Err(err) => {
273 let traced = error::TracedTexError::new(
274 err,
275 &self.internal.tracer,
276 &self.internal.cs_name_interner,
277 self.generate_stack_trace(),
278 );
279 self.internal.shutdown_status.transition_to_error(traced);
280 Err(ShutdownSignal {})
281 }
282 }
283 }
284}
285
286#[derive(Debug)]
287struct EndOfGroupError {
288 trace: token::Token,
289}
290
291impl error::TexError for EndOfGroupError {
292 fn kind(&self) -> error::Kind {
293 error::Kind::Token(self.trace)
294 }
295
296 fn title(&self) -> String {
297 "there is no group to end".into()
298 }
299}
300
301/// The Texlang virtual machine.
302pub struct VM<S> {
303 /// The state
304 pub state: S,
305
306 /// The commands map
307 pub commands_map: command::Map<S>,
308
309 /// The working directory which is used as the root for relative file paths
310 ///
311 /// This is [None] if the working directory could not be determined.
312 pub working_directory: Option<std::path::PathBuf>,
313
314 internal: Internal<S>,
315}
316
317/// Mutable references to different parts of the VM.
318pub struct Parts<'a, S> {
319 pub state: &'a mut S,
320 pub cs_name_interner: &'a mut token::CsNameInterner,
321 pub tracer: &'a mut trace::Tracer,
322}
323
324/// Implementations of this trait may be used as the state in a Texlang VM.
325///
326/// The most important thing to know about this trait is that it has no required methods.
327/// For any type it can be implemented trivially:
328/// ```
329/// # use texlang::traits::TexlangState;
330/// struct SomeNewType;
331///
332/// impl TexlangState for SomeNewType {}
333/// ```
334///
335/// Methods of the trait are invoked at certain points when the VM is running,
336/// and in general offer a way of customizing the behavior of the VM.
337/// The trait methods are all dispatched statically, which is important for performance.
338pub trait TexlangState: Sized {
339 /// Get the cat code for the provided character.
340 ///
341 /// The default implementation returns the cat code used in plain TeX.
342 fn cat_code(&self, c: char) -> types::CatCode {
343 types::CatCode::PLAIN_TEX_DEFAULTS
344 .get(c as usize)
345 .copied()
346 .unwrap_or_default()
347 }
348
349 /// Get current end line char, or [None] if it's undefined.
350 ///
351 /// The default implementation returns `Some(\r)`.
352 fn end_line_char(&self) -> Option<char> {
353 Some('\r')
354 }
355
356 /// Get the em width for the current font.
357 ///
358 /// The default implementation returns `12pt`.
359 fn em_width(&self) -> common::Scaled {
360 common::Scaled::ONE * 12
361 }
362
363 /// Get the ex height for the current font.
364 ///
365 /// The default implementation returns `12pt`.
366 fn ex_height(&self) -> common::Scaled {
367 common::Scaled::ONE * 12
368 }
369
370 /// Get the current magnification ratio (e.g. value of \mag).
371 ///
372 /// The default implementation returns `1000`, which corresponds to
373 /// no magnification.
374 fn magnification_ratio(&self) -> i32 {
375 1000
376 }
377
378 /// Hook that is invoked after a TeX macro is expanded.
379 ///
380 /// This hook is designed to support the `\tracingmacros` primitive.
381 fn post_macro_expansion_hook(
382 token: Token,
383 input: &ExpansionInput<Self>,
384 tex_macro: &texmacro::Macro,
385 arguments: &[&[Token]],
386 reversed_expansion: &[Token],
387 ) {
388 _ = (token, input, tex_macro, arguments, reversed_expansion);
389 }
390
391 /// Hook that potentially overrides the expansion of a command.
392 ///
393 /// This hook is invoked before an expandable token is expanded.
394 /// If the result of the hook is a non-empty, that result is considered the expansion of
395 /// the token
396 /// The result of the hook is not expanded before being returned.
397 ///
398 /// This hook is designed to support the `\noexpand` primitive.
399 fn expansion_override_hook(
400 token: token::Token,
401 input: &mut ExpansionInput<Self>,
402 tag: Option<command::Tag>,
403 ) -> txl::Result<Option<Token>> {
404 _ = (token, input, tag);
405 Ok(None)
406 }
407
408 /// Hook that determines the scope of a variable assignment.
409 ///
410 /// This hook is designed to support the \global and \globaldefs commands.
411 fn variable_assignment_scope_hook(state: &mut Self) -> groupingmap::Scope {
412 _ = state;
413 groupingmap::Scope::Local
414 }
415
416 /// Hook that determines what to do when a recoverable error occurs.
417 ///
418 /// If the hook returns `Ok(())` then the recovery process should run.
419 /// If the hook returns an error, then that error should be returned from the enclosing
420 /// function and propagated through the VM.
421 ///
422 /// Note that there is no requirement that an error returned from this hook
423 /// is the same as the error provided to the hook.
424 /// For example, when Knuth's TeX is running in batch mode errors are
425 /// logged but otherwise ignored.
426 /// However if 100 such errors occur, the interpreter fails.
427 /// To implement this in Texlang, the result of this function would be `Ok(())`
428 /// for the first 99 errors,
429 /// but after the 100th error a "too many errors" error would be returned from the hook.
430 /// Note that the returned error in this case is not the 100th error itself.
431 fn recoverable_error_hook(
432 &self,
433 error: error::TracedTexError,
434 ) -> Result<(), Box<dyn error::TexError>> {
435 _ = self;
436 Err(error.error)
437 }
438
439 /// Hook that is invoked when a font is enabled.
440 ///
441 /// For example, after the TeX snippet `\the \textfont 1`, this hook
442 /// is invoked for the font stored in `\textfont 1`.
443 /// The hook is also called if a font needs to be reenabled after
444 /// a group ends.
445 ///
446 /// The default implementation is a no-op.
447 fn enable_font_hook(&mut self, font: types::Font) {
448 _ = font
449 }
450
451 /// Returns whether the command corresponding to the provided tag references
452 /// the currnet font when provided as an argument to a variable.
453 ///
454 /// This is used to implement the `\font` primitive.
455 fn is_current_font_command(&self, tag: command::Tag) -> bool {
456 _ = tag;
457 false
458 }
459}
460
461impl TexlangState for () {}
462
463impl<S: Default> VM<S> {
464 /// Create a new VM with the provided built-in commands.
465 ///
466 /// If the state type satisfies the [`HasDefaultBuiltInCommands`] trait,
467 /// and you are using the default built-ins,
468 /// use the [`VM::new`] method instead.
469 pub fn new_with_built_in_commands(built_in_commands: HashMap<&str, BuiltIn<S>>) -> VM<S> {
470 let mut internal = Internal::new(Default::default());
471 let built_in_commands = built_in_commands
472 .into_iter()
473 .map(|(key, value)| (internal.cs_name_interner.get_or_intern(key), value))
474 .collect();
475 VM {
476 state: Default::default(),
477 commands_map: command::Map::new(built_in_commands),
478 internal,
479 working_directory: match std::env::current_dir() {
480 Ok(path_buf) => Some(path_buf),
481 Err(err) => {
482 println!("failed to determine the working directory: {err}");
483 None
484 }
485 },
486 }
487 }
488}
489
490impl<S: Default + HasDefaultBuiltInCommands> VM<S> {
491 /// Create a new VM.
492 pub fn new() -> VM<S> {
493 VM::<S>::new_with_built_in_commands(S::default_built_in_commands())
494 }
495}
496
497impl<S: Default + HasDefaultBuiltInCommands> Default for VM<S> {
498 fn default() -> Self {
499 Self::new()
500 }
501}
502
503/// Deserialize a Texlang VM using the provided built-in commands.
504///
505/// If the state type satisfies the [`HasDefaultBuiltInCommands`] trait,
506/// and you are deserializing using the default built-ins,
507/// you don't need to use this function.
508/// You can use the serde deserialize trait directly.
509/// See the [`serde` submodule](serde) for more information on deserialization.
510#[cfg(feature = "serde")]
511impl<'de, S: ::serde::Deserialize<'de>> VM<S> {
512 pub fn deserialize_with_built_in_commands<D: ::serde::Deserializer<'de>>(
513 deserializer: D,
514 built_in_commands: HashMap<&str, BuiltIn<S>>,
515 ) -> Result<Self, D::Error> {
516 serde::deserialize(deserializer, built_in_commands)
517 }
518}
519
520/// States that implement this trait have a default set of built-in commands associated to them.
521///
522/// In general in Texlang, the same state type can be used with different sets of built-in
523/// commands.
524/// However in many situations the state type has a specific set of built-ins
525/// associated to it.
526/// For example, the state type corresponding to pdfTeX is associated with the set of built-ins
527/// provided by pdfTeX.
528///
529/// This trait is used to specify this association.
530/// The benefit is that creating new VMs and deserializing VMs is a bit easier
531/// because the built-in commands don't need to be provided explicitly.
532/// Moreover, if a state implements this trait the associated VM implements serde's deserialize trait.
533pub trait HasDefaultBuiltInCommands: TexlangState {
534 fn default_built_in_commands() -> HashMap<&'static str, BuiltIn<Self>>;
535}
536
537#[cfg(feature = "serde")]
538impl<'de, S: ::serde::Deserialize<'de> + HasDefaultBuiltInCommands> ::serde::Deserialize<'de>
539 for VM<S>
540{
541 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
542 where
543 D: ::serde::Deserializer<'de>,
544 {
545 let built_ins = S::default_built_in_commands();
546 serde::deserialize(deserializer, built_ins)
547 }
548}
549
550impl<S: TexlangState> VM<S> {
551 /// Add new source code to the VM.
552 ///
553 /// TeX input source code is organized as a stack.
554 /// Pushing source code onto the stack will mean it is executed first.
555 pub fn push_source<T1: Into<PathBuf>, T2: Into<String>>(
556 &mut self,
557 file_name: T1,
558 source_code: T2,
559 ) -> txl::Result<()> {
560 self.internal
561 .push_source(None, file_name.into(), source_code.into())
562 }
563}
564
565impl<S> VM<S> {
566 /// Clear all source code from the VM.
567 pub fn clear_sources(&mut self) {
568 self.internal.clear_sources()
569 }
570
571 /// Return a regular hash map with all the commands as they are currently defined.
572 ///
573 /// This function is extremely slow and is only intended to be invoked on error paths.
574 pub fn get_commands_as_map_slow(&self) -> HashMap<&str, BuiltIn<S>> {
575 let map_1: HashMap<CsName, BuiltIn<S>> = self.commands_map.to_hash_map_slow();
576 let mut map = HashMap::new();
577 for (cs_name, cmd) in map_1 {
578 let cs_name_str = match self.internal.cs_name_interner.resolve(cs_name) {
579 None => continue,
580 Some(cs_name_str) => cs_name_str,
581 };
582 map.insert(cs_name_str, cmd);
583 }
584 map
585 }
586
587 /// Return a reference to the control sequence name string interner.
588 ///
589 /// This interner can be used to resolve [CsName] types into regular strings.
590 #[inline]
591 pub fn cs_name_interner(&self) -> &CsNameInterner {
592 &self.internal.cs_name_interner
593 }
594 #[inline]
595 /// TODO: just put the CS name interner in the VM?
596 pub fn cs_name_interner_mut(&mut self) -> &mut CsNameInterner {
597 &mut self.internal.cs_name_interner
598 }
599
600 fn begin_group(&mut self) {
601 self.commands_map.begin_group();
602 self.internal.save_stack.push(Default::default());
603 self.internal.fonts_save_stack.push(None);
604 }
605
606 pub fn trace(&self, token: Token) -> trace::SourceCodeTrace {
607 self.internal
608 .tracer
609 .trace(token, &self.internal.cs_name_interner)
610 }
611
612 pub fn trace_end_of_input(&self) -> trace::SourceCodeTrace {
613 self.internal.tracer.trace_end_of_input()
614 }
615
616 /// Returns the number of current sources on the source stack
617 pub fn num_current_sources(&self) -> usize {
618 self.internal.sources.len() + 1
619 }
620
621 pub fn generate_stack_trace(&self) -> Vec<error::StackTraceElement> {
622 self.internal
623 .execution_stack
624 .iter()
625 .map(|(op_kind, token)| error::StackTraceElement {
626 context: *op_kind,
627 token: *token,
628 trace: self
629 .internal
630 .tracer
631 .trace(*token, &self.internal.cs_name_interner),
632 })
633 .collect()
634 }
635 pub(crate) fn stack_push(&mut self, token: Token, op_kind: error::OperationKind) {
636 self.internal.execution_stack.push((op_kind, token));
637 }
638 pub(crate) fn stack_pop(&mut self) {
639 self.internal.execution_stack.pop();
640 }
641 pub fn current_font(&self) -> types::Font {
642 self.internal.current_font
643 }
644}
645
646impl<S: TexlangState> VM<S> {
647 fn end_group(&mut self, token: token::Token) -> txl::Result<()> {
648 // Restore commands
649 match self.commands_map.end_group() {
650 Ok(()) => (),
651 Err(_) => return Err(self.fatal_error(EndOfGroupError { trace: token })),
652 }
653 // Restore variable values
654 let group = self.internal.save_stack.pop().unwrap();
655 group.restore(ExecutionInput::new(self));
656 // Restore fonts
657 if let Some(font) = self.internal.fonts_save_stack.pop().unwrap() {
658 self.internal.current_font = font;
659 self.state.enable_font_hook(font);
660 }
661 Ok(())
662 }
663}
664
665/// Parts of the VM that are private.
666// We have serde(bound="") because otherwise serde tries to put a `Default` bound on S.
667#[cfg_attr(
668 feature = "serde",
669 derive(::serde::Serialize, ::serde::Deserialize),
670 serde(bound = "")
671)]
672struct Internal<S> {
673 // The sources form a stack. We store the top element directly on the VM
674 // for performance reasons.
675 current_source: Source,
676 sources: Vec<Source>,
677
678 cs_name_interner: CsNameInterner,
679
680 tracer: trace::Tracer,
681
682 // Token buffers are thrown away in serialization - there's nothing we need to keep.
683 #[cfg_attr(feature = "serde", serde(skip))]
684 token_buffers: std::collections::BinaryHeap<TokenBuffer>,
685
686 // The save stack is handled manually in (de)serialization.
687 // We need to use special logic in combination with the command map in order to serialize the
688 // variable pointers that are in the stack.
689 #[cfg_attr(feature = "serde", serde(skip))]
690 save_stack: Vec<variable::SaveStackElement<S>>,
691
692 current_font: types::Font,
693 fonts_save_stack: Vec<Option<types::Font>>,
694 execution_stack: Vec<(error::OperationKind, Token)>,
695
696 // We assume the VM is never saved during shutdown.
697 #[cfg_attr(feature = "serde", serde(skip))]
698 shutdown_status: ShutdownStatus,
699}
700
701impl<S> Internal<S> {
702 fn new(cs_name_interner: CsNameInterner) -> Self {
703 Internal {
704 current_source: Default::default(),
705 sources: Default::default(),
706 cs_name_interner,
707 tracer: Default::default(),
708 token_buffers: Default::default(),
709 save_stack: Default::default(),
710 current_font: types::Font::NULL_FONT,
711 fonts_save_stack: Default::default(),
712 execution_stack: Default::default(),
713 shutdown_status: Default::default(),
714 }
715 }
716}
717impl<S: TexlangState> Internal<S> {
718 fn push_source(
719 &mut self,
720 token: Option<Token>,
721 file_name: PathBuf,
722 source_code: String,
723 ) -> txl::Result<()> {
724 let trace_key_range =
725 self.tracer
726 .register_source_code(token, trace::Origin::File(file_name), &source_code);
727 let mut new_source = Source::new(source_code, trace_key_range);
728 std::mem::swap(&mut new_source, &mut self.current_source);
729 // TODO: if the current top source is empty, we should skip this.
730 // Check this is working by looking at the JSON serialization.
731 self.sources.push(new_source);
732 Ok(())
733 }
734
735 fn end_current_file(&mut self) {
736 self.current_source.root.end()
737 }
738}
739impl<S> Internal<S> {
740 fn clear_sources(&mut self) {
741 self.current_source = Default::default();
742 self.sources.clear();
743 }
744
745 #[inline]
746 fn push_expansion(&mut self, expansion: &[Token]) {
747 self.current_source
748 .expansions
749 .extend(expansion.iter().rev());
750 }
751
752 #[inline]
753 fn expansions(&self) -> &Vec<Token> {
754 &self.current_source.expansions
755 }
756
757 #[inline]
758 fn expansions_mut(&mut self) -> &mut Vec<Token> {
759 &mut self.current_source.expansions
760 }
761
762 fn pop_source(&mut self) -> bool {
763 // We should set the current_source to be Default::default() if there is no additional source.
764 // Check this is working by looking at the JSON serialization.
765 match self.sources.pop() {
766 None => false,
767 Some(source) => {
768 self.current_source = source;
769 true
770 }
771 }
772 }
773}
774
775#[cfg_attr(feature = "serde", derive(::serde::Serialize, ::serde::Deserialize))]
776struct Source {
777 expansions: Vec<Token>,
778 root: lexer::Lexer,
779}
780
781impl Source {
782 pub fn new(source_code: String, trace_key_range: trace::KeyRange) -> Source {
783 Source {
784 expansions: Vec::with_capacity(32),
785 root: lexer::Lexer::new(source_code, trace_key_range),
786 }
787 }
788}
789
790impl Default for Source {
791 fn default() -> Self {
792 Source::new("".into(), trace::KeyRange::empty())
793 }
794}
795
796#[derive(Default)]
797struct TokenBuffer(Vec<Token>);
798
799impl PartialEq for TokenBuffer {
800 fn eq(&self, other: &Self) -> bool {
801 self.0.capacity() == other.0.capacity()
802 }
803}
804
805impl Eq for TokenBuffer {}
806
807impl PartialOrd for TokenBuffer {
808 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
809 Some(self.cmp(other))
810 }
811}
812
813impl Ord for TokenBuffer {
814 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
815 self.0.capacity().cmp(&other.0.capacity())
816 }
817}
818
819/// A signal that the VM is shutting down.
820///
821/// A value of this type is returned in the error payload of
822/// the [`Result`](crate::prelude::Result) of Texlang commands and basically all other Texlang functions.
823/// The only thing to do with the signal is to propagate it up the
824/// Rust call stack using Rust's `?` operator.
825/// Eventually the signal will reach the main VM loop, and the VM will stop.
826///
827/// The stop signal should _not_ be ignored or otherwise "handled".
828/// For example, this code is incorrect:
829///
830/// ```
831/// # use texlang::token;
832/// # use texlang::vm;
833/// # use texlang::traits::*;
834/// # use texlang::prelude as txl;
835/// fn execution_primitive_fn<S: TexlangState>(
836/// token: token::Token,
837/// input: &mut vm::ExecutionInput<S>,
838///) -> txl::Result<()> {
839/// let i = match i32::parse(input) {
840/// Ok(i) => i,
841/// Err(_shutdown_signal) => {
842/// // This is incorrect - the shutdown signal must be propagated!
843/// 0
844/// }
845/// };
846/// println!["Parsed integer {i}"];
847/// Ok(())
848/// }
849/// ```
850///
851/// In this case the VM will eventually panic when it realizes that the shutdown was ignored.
852/// The correct code is this:
853///
854/// ```
855/// # use texlang::token;
856/// # use texlang::vm;
857/// # use texlang::traits::*;
858/// # use texlang::prelude as txl;
859/// fn execution_primitive_fn<S: TexlangState>(
860/// token: token::Token,
861/// input: &mut vm::ExecutionInput<S>,
862///) -> txl::Result<()> {
863/// let i = i32::parse(input)?;
864/// println!["Parsed integer {i}"];
865/// Ok(())
866/// }
867/// ```
868/// ## Generating the shutdown signal
869///
870/// The signal can originate either with a fatal error,
871/// or from a TeX control
872/// sequence that wants to stop execution (e.g. the `\end` primitive).
873#[derive(Debug)]
874pub struct ShutdownSignal {}
875
876#[derive(Debug, Default)]
877enum ShutdownStatus {
878 /// The VM is not shutting down.
879 #[default]
880 None,
881 /// The VM is shuting down for an expected reason.
882 Normal,
883 /// The VM is shuting down because of a fatal error.
884 Error(error::TracedTexError),
885}
886
887impl ShutdownStatus {
888 fn transition_to_normal(&mut self) {
889 if !matches!(self, ShutdownStatus::None) {
890 panic!("shutdown signal ignored")
891 }
892 *self = ShutdownStatus::Normal;
893 }
894 fn transition_to_error(&mut self, err: error::TracedTexError) {
895 if !matches!(self, ShutdownStatus::None) {
896 panic!("shutdown signal ignored")
897 }
898 *self = ShutdownStatus::Error(err);
899 }
900 fn take(&mut self) -> ShutdownStatus {
901 let mut s = ShutdownStatus::None;
902 std::mem::swap(self, &mut s);
903 s
904 }
905}
906
907/// Helper trait for implementing the component pattern in Texlang.
908///
909/// The component pattern is a ubiquitous design pattern in Texlang.
910/// It is used when implementing TeX commands that require state.
911/// An example of a stateful TeX command is `\year`, which needs to store the current year somewhere.
912///
913/// When the component pattern is used, a stateful TeX command
914/// can have a single implementation that
915/// is used by multiple TeX engines built with Texlang.
916/// Additionally, a specific TeX engine can compose many different
917/// stateful TeX commands together without worrying about conflicts between their state.
918/// The component pattern is Texlang's main solution to the problem of
919/// global mutable state that is pervasive in the original implementation of TeX.
920///
921/// In the component pattern, the state
922/// needed by a specific command like `\year` is isolated in a _component_, which is a concrete
923/// Rust type like a struct.
924/// This Rust type is the generic type `C` in the trait.
925/// The stateful command (e.g. `\year`) is defined in the same Rust module as the component.
926/// The internals of the component are made private to the module it is defined in.
927/// This means the state can only be mutated by the command (or commands) implemented in the module.
928///
929/// In order to function, the command needs to have access to an instance of the component in which
930/// the command will maintain its state.
931/// The `HasComponent` trait enforces this.
932/// Any VM state type that contains the component can implement the trait.
933/// The Rust code defining the
934/// command specifies the trait in its trait bounds, and uses the trait to access the component.
935///
936/// The pattern enables Texlang code to be composed as follows.
937/// Different VM states can include the same component and thus reuse the same commands.
938/// Combining multiple commands into one state just involves having the
939/// VM state include all of the relevant components.
940///
941/// Notes:
942///
943/// - In general state is shared by multiple commands. Such commands must be defined in the
944/// same Rust module to support this.
945/// For example, `\countdef` shares state with `\count`,
946/// and they are implemented together.
947///
948/// - Commands don't necessarily have state: for example, `\def`, `\advance` and `\the`.
949/// These commands
950/// are defined without trait bounds on the state, and work automatically with any TeX
951/// software built with Texlang.
952///
953/// - The easiest way to include a component in the state is to make it a direct field
954/// of the state.
955/// In this case the [implement_has_component] macro can be used to easily implement the
956/// trait.
957/// The Texlang standard library uses this approach.
958///
959/// ## The [TexlangState] requirement
960///
961/// This trait requires that the type also implements [TexlangState].
962/// This is only to reduce the number of trait bounds that need to be explicitly
963/// specified when implementing TeX commands.
964/// In general every command needs to have a bound of the form `S: TexlangState`.
965/// Commands that have a `HasComponent` bound don't need to include this other bound explicitly.
966pub trait HasComponent<C>: TexlangState {
967 /// Return a immutable reference to the component.
968 fn component(&self) -> &C;
969
970 /// Return a mutable reference to the component.
971 fn component_mut(&mut self) -> &mut C;
972}
973
974/// This macro is for implementing the [HasComponent] trait in the special (but common)
975/// case when the state is a struct and the component is a direct field of the struct.
976///
977/// ## Examples
978///
979/// Implementing a single component:
980///
981/// ```
982/// # mod library_1{
983/// # pub struct Component;
984/// # }
985/// # use texlang::vm::implement_has_component;
986/// # use texlang::traits::*;
987/// #
988/// struct MyState {
989/// component: library_1::Component,
990/// }
991///
992/// impl TexlangState for MyState {}
993///
994/// implement_has_component![MyState{
995/// component: library_1::Component,
996/// }];
997/// ```
998///
999/// Implementing multiple components:
1000///
1001/// ```
1002/// # mod library_1{
1003/// # pub struct Component;
1004/// # }
1005/// # mod library_2{
1006/// # pub struct Component;
1007/// # }
1008/// # use texlang::vm::implement_has_component;
1009/// # use texlang::traits::*;
1010/// #
1011/// struct MyState {
1012/// component_1: library_1::Component,
1013/// component_2: library_2::Component,
1014/// }
1015///
1016/// impl TexlangState for MyState {}
1017///
1018/// implement_has_component![MyState{
1019/// component_1: library_1::Component,
1020/// component_2: library_2::Component,
1021/// }];
1022/// ```
1023#[macro_export]
1024macro_rules! implement_has_component {
1025 ($type: path {
1026 $( $field: ident: $component: path ),+ $(,)?
1027 }) => {
1028 $(
1029 impl ::texlang::vm::HasComponent<$component> for $type {
1030 #[inline]
1031 fn component(&self) -> &$component {
1032 &self.$field
1033 }
1034 #[inline]
1035 fn component_mut(&mut self) -> &mut $component {
1036 &mut self.$field
1037 }
1038 }
1039 )*
1040 };
1041}
1042
1043pub use implement_has_component;