texlang/command/
mod.rs

1//! Texlang commands API
2//!
3//! # Texcraft commands API
4//!
5//! One of the most important parts of any TeX engine is the primitives that it provides.
6//!  This documentation describes the *Texcraft commands API*,
7//! which is the mechanism by which TeX engines add new primitives.
8//!
9//! A note on terminology: *commands* can be categorized into primitives,
10//! which are implemented in the TeX engine, and user defined macros,
11//!  which are created in specific TeX documents using primitives like `\def`.
12//! We often use the word command and primitive interchangeably here because in the context
13//! of implementing TeX engines they’re basically synonymous.
14//! A TeX engine could theoretically provide a native user defined macro...but it’s unlikely.
15//!
16//! ## Expansion vs execution
17//!
18//! Expansion and execution commands seem similar because they both optionally
19//! read input tokens and then make changes to the VM.
20//! However the differences are pretty significant in practice:
21//!
22//! |                                          | Expansion | Execution
23//! |------------------------------------------|-----------|-----------
24//! Can read tokens from the input stream?     | Yes       | Yes
25//! Can add tokens to the input stream>        | Yes       | It’s possible, but the API discourages it.[^futurelet]
26//! Can make changes to the state?             | No        | Yes
27//! Is evaluated when tokens are only being expanded, like in `\edef` | Yes | No
28//!
29//!
30//! [^futurelet]: `\futurelet` is an example of an execution command that does this.
31//!
32
33use crate::prelude as txl;
34use crate::texmacro;
35use crate::token;
36use crate::types;
37use crate::variable;
38use crate::vm;
39use std::num;
40use std::rc;
41use std::sync;
42
43pub(crate) mod map;
44
45pub use map::Map;
46
47/// The Rust type of expansion primitive functions.
48pub type ExpansionFn<S> =
49    fn(token: token::Token, input: &mut vm::ExpansionInput<S>) -> txl::Result<()>;
50
51/// The Rust type of execution primitive functions.
52pub type ExecutionFn<S> =
53    fn(token: token::Token, input: &mut vm::ExecutionInput<S>) -> txl::Result<()>;
54
55/// A TeX command.
56pub enum Command<S> {
57    /// An expansion primitive that is implemented in the engine.
58    ///
59    /// Examples: `\the`, `\ifnum`.
60    Expansion(ExpansionFn<S>, Option<Tag>),
61
62    /// A user defined macro.
63    ///
64    /// Examples: `\newcommand` and `\include` in LaTeX.
65    Macro(rc::Rc<texmacro::Macro>),
66
67    /// A non-expansion primitive that performs operations on the state.
68    ///
69    /// Examples: `\def`, `\par`.
70    Execution(ExecutionFn<S>, Option<Tag>),
71
72    /// A command that is used to reference a variable, like a parameter or a register.
73    ///
74    /// Such a command is *resolved* to get the variable using the function pointer it holds.
75    ///
76    /// Examples: `\count`, `\year`.
77    Variable(rc::Rc<variable::Command<S>>),
78
79    /// A command that aliases a character token.
80    ///
81    /// Depending on the context in which this command appears it may behave like a
82    ///   character (when typesetting) or like an unexpandable command (when parsing integers).
83    /// Created using `\let\cmd=<character>`.
84    CharacterTokenAlias(token::Value),
85
86    /// A command that references a character.
87    ///
88    /// These commands are generally created using `\countdef`.
89    /// In the main inner loop they result in a character being typeset.
90    /// In other contexts they are interpreted as numbers.
91    /// In Plain TeX, `\countdef 255` is used as a more efficient version of `\def{255 }`.
92    Character(char),
93
94    /// A command that references a math character.
95    ///
96    /// These commands are generally created using `\mathchardef`.
97    MathCharacter(types::MathCode),
98
99    /// A command that enables a font.
100    Font(types::Font),
101}
102
103impl<S> std::fmt::Display for Command<S> {
104    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
105        match self {
106            Command::Expansion(_, _) => write![f, "an expansion command"],
107            Command::Macro(_) => write![f, "a user-defined macro"],
108            Command::Execution(_, _) => write![f, "an execution command"],
109            Command::Variable(_) => write![f, "a variable command"],
110            Command::CharacterTokenAlias(_) => write![f, "a character token alias"],
111            Command::Character(_) => write![f, "a character command"],
112            Command::MathCharacter(_) => write![f, "a math character command"],
113            Command::Font(_) => write![f, "a font command"],
114        }
115    }
116}
117
118impl<S> Command<S> {
119    /// Gets the tag associated to this command, or [None] if the command has no tag.
120    pub fn tag(&self) -> Option<Tag> {
121        match self {
122            Command::Expansion(_, tag) => *tag,
123            Command::Execution(_, tag) => *tag,
124            Command::Macro(_)
125            | Command::Variable(_)
126            | Command::CharacterTokenAlias(_)
127            | Command::Character(_)
128            | Command::MathCharacter(_)
129            | Command::Font(_) => None,
130        }
131    }
132}
133
134/// A built-in command. This is a command provided at VM initialization.
135///
136/// This struct is simply a combination of a [Command] and a documentation string for the command.
137/// It is used when providing the built-in commands for a VM.
138pub struct BuiltIn<S> {
139    cmd: Command<S>,
140    doc: Option<&'static str>,
141}
142
143impl<S> BuiltIn<S> {
144    /// Create a new expansion built-in command.
145    pub fn new_expansion(t: ExpansionFn<S>) -> BuiltIn<S> {
146        t.into()
147    }
148
149    /// Create a new expansion built-in command.
150    pub fn new_execution(t: ExecutionFn<S>) -> BuiltIn<S> {
151        t.into()
152    }
153
154    /// Create a new variable built-in command.
155    pub fn new_variable(cmd: variable::Command<S>) -> BuiltIn<S> {
156        Command::Variable(rc::Rc::new(cmd)).into()
157    }
158
159    /// Create a new font built-in command.
160    pub fn new_font(font: types::Font) -> BuiltIn<S> {
161        Command::Font(font).into()
162    }
163
164    /// Set the tag for this built-in command.
165    pub fn with_tag(mut self, tag: Tag) -> BuiltIn<S> {
166        match &mut self.cmd {
167            Command::Expansion(_, t) => *t = Some(tag),
168            Command::Execution(_, t) => *t = Some(tag),
169            Command::Macro(_)
170            | Command::Variable(_)
171            | Command::CharacterTokenAlias(_)
172            | Command::Character(_)
173            | Command::MathCharacter(_)
174            | Command::Font(_) => {
175                panic!("cannot add a tag to this type of command")
176            }
177        }
178        self
179    }
180
181    // Set the doc for this built-in command.
182    pub fn with_doc(mut self, doc: &'static str) -> BuiltIn<S> {
183        self.doc = Some(doc);
184        self
185    }
186
187    pub fn cmd(&self) -> &Command<S> {
188        &self.cmd
189    }
190
191    pub fn doc(&self) -> Option<&'static str> {
192        self.doc
193    }
194}
195
196// We need to implement Clone manually as the derived implementation requires S to be Clone.
197impl<S> Clone for Command<S> {
198    fn clone(&self) -> Self {
199        match self {
200            Command::Expansion(e, t) => Command::Expansion::<S>(*e, *t),
201            Command::Macro(m) => Command::Macro(m.clone()),
202            Command::Execution(e, t) => Command::Execution(*e, *t),
203            Command::Variable(v) => Command::Variable(v.clone()),
204            Command::CharacterTokenAlias(tv) => Command::CharacterTokenAlias(*tv),
205            Command::Character(c) => Command::Character(*c),
206            Command::MathCharacter(c) => Command::MathCharacter(*c),
207            Command::Font(font) => Command::Font(*font),
208        }
209    }
210}
211
212// We need to implement Clone manually as the derived implementation requires S to be Clone.
213impl<S> Clone for BuiltIn<S> {
214    fn clone(&self) -> Self {
215        Self {
216            cmd: self.cmd.clone(),
217            doc: self.doc,
218        }
219    }
220}
221
222impl<S> From<ExpansionFn<S>> for BuiltIn<S> {
223    fn from(cmd: ExpansionFn<S>) -> Self {
224        Command::Expansion(cmd, None).into()
225    }
226}
227
228impl<S> From<rc::Rc<texmacro::Macro>> for BuiltIn<S> {
229    fn from(cmd: rc::Rc<texmacro::Macro>) -> Self {
230        Command::Macro(cmd).into()
231    }
232}
233
234impl<S> From<ExecutionFn<S>> for BuiltIn<S> {
235    fn from(cmd: ExecutionFn<S>) -> Self {
236        Command::Execution(cmd, None).into()
237    }
238}
239
240impl<S> From<variable::Command<S>> for BuiltIn<S> {
241    fn from(cmd: variable::Command<S>) -> Self {
242        Command::Variable(rc::Rc::new(cmd)).into()
243    }
244}
245
246impl<S> From<Command<S>> for BuiltIn<S> {
247    fn from(cmd: Command<S>) -> Self {
248        BuiltIn { cmd, doc: None }
249    }
250}
251
252/// A tag is a piece of metadata that is optionally attached to a command.
253///
254/// Tags are used to implement certain TeX language semantics.
255/// An example is TeX conditionals.
256/// When a TeX conditional statement evaluates to false, the `\if` command must scan
257///     the input stream until it finds either an `\else` or `\fi` command.
258/// (The tokens scanned in this process are in the true branch of the conditional,
259///     and must thus be discarded.)
260/// Tags are the mechanism by which the scanning algorithm can
261///     determine if a token corresponds to an `\else` of `\fi` command.
262/// Concretely, both `\else` of `\fi` command have unique tags associated to them.
263/// When scanning the stream,
264///     if a token is a command token then the tag for the associated command is
265///     compared to the known tags for `\else` and `\fi`.
266/// If the tags match, the true branch is finished.
267///
268/// In general, TeX commands interface with the VM in two ways.
269/// The first most common way is when the main VM loop or expansion loop encounters a command.
270/// The loop invokes the command's associated Rust function.
271/// One can think of the Rust function as providing the behavior of the command in this context.
272///
273/// The second way is when a different command, like a conditional command, performs some operation
274///     that is dependent on the commands it reads out of the input stream.
275/// In this context the commands in the input stream provide behavior using tags.
276/// The `\else` command having the specific else tag results in the conditional branch processing completing.
277///
278/// Note that the same tag can be used for multiple commands,
279/// but each command can only have one tag.
280///
281/// ## Implementation details
282///
283/// Tags are non-zero 32 bit integers.
284/// The first tag created has value 1, the second tag has value 2, and so on.
285/// A global mutex is used to store the next tag value.
286/// Tags have the property that `Option<Tag>` takes up 4 bytes in memory.
287#[derive(PartialEq, Eq, Clone, Copy, Debug, PartialOrd, Ord, Hash)]
288pub struct Tag(num::NonZeroU32);
289
290static NEXT_TAG_VALUE: sync::Mutex<u32> = sync::Mutex::new(1);
291
292impl Tag {
293    /// Creates a new unique tag.
294    ///
295    /// ```
296    /// # use texlang::command::Tag;
297    /// let tag_1 = Tag::new();
298    /// let tag_2 = Tag::new();
299    /// assert_ne!(tag_1, tag_2);
300    /// ```
301    // We suppress the clippy warning because creating a new tag is a global operation and
302    // shouldn't be done without explicit intention.
303    #[allow(clippy::new_without_default)]
304    pub fn new() -> Tag {
305        let mut n = NEXT_TAG_VALUE.lock().unwrap();
306        let tag = Tag(num::NonZeroU32::new(*n).unwrap());
307        *n = n.checked_add(1).unwrap();
308        tag
309    }
310}
311
312/// A static tag enables creating a tag in a static variable.
313///
314/// ```
315/// # use texlang::command::StaticTag;
316/// static TAG: StaticTag = StaticTag::new();
317///
318/// let first_get = TAG.get();
319/// let second_get = TAG.get();
320/// assert_eq!(first_get, second_get);
321/// ```
322pub struct StaticTag(std::sync::OnceLock<Tag>);
323
324impl Default for StaticTag {
325    fn default() -> Self {
326        StaticTag::new()
327    }
328}
329
330impl StaticTag {
331    /// Create a new static tag.
332    pub const fn new() -> StaticTag {
333        StaticTag(std::sync::OnceLock::new())
334    }
335
336    /// Get the actual [Tag] out of this [StaticTag].
337    /// Repeated calls to this function return the same tag.
338    ///
339    /// This is not a trivial getter.
340    /// The [Tag] is lazily constructed so even subsequent calls to this getter must do some work to check if the [Tag]
341    ///     exists or not.
342    /// For very hot code paths it is advised to cache the return value somewhere, for example in a relevant command's state.
343    pub fn get(&self) -> Tag {
344        *self.0.get_or_init(Tag::new)
345    }
346}
347
348/// A primitive key uniquely identifies a primitive.
349///
350/// If two commands have the same key, they are the same primitive (expansion, execution, or variable primitive)
351/// The function returns [None] if the command is not a primitive (a macro or a token alias).
352#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
353pub(crate) enum PrimitiveKey {
354    Execution(usize, Option<Tag>),
355    Expansion(usize, Option<Tag>),
356    Variable(variable::CommandKey),
357}
358
359impl PrimitiveKey {
360    pub(crate) fn new<S>(command: &Command<S>) -> Option<Self> {
361        match command {
362            Command::Expansion(f, tag) => Some(PrimitiveKey::Expansion(*f as usize, *tag)),
363            Command::Execution(f, tag) => Some(PrimitiveKey::Execution(*f as usize, *tag)),
364            Command::Variable(v) => Some(PrimitiveKey::Variable(v.key())),
365            Command::Macro(_)
366            | Command::CharacterTokenAlias(_)
367            | Command::Character(_)
368            | Command::MathCharacter(_)
369            | Command::Font(_) => None,
370        }
371    }
372}
373
374#[cfg(test)]
375mod tests {
376    use super::*;
377
378    #[test]
379    fn func_size() {
380        assert_eq!(std::mem::size_of::<Command<()>>(), 16);
381    }
382
383    static STATIC_TAG_1: StaticTag = StaticTag::new();
384    static STATIC_TAG_2: StaticTag = StaticTag::new();
385
386    #[test]
387    fn tag() {
388        let tag_1_val_1 = STATIC_TAG_1.get();
389        let tag_2_val_1 = STATIC_TAG_2.get();
390        let other_tag_1 = Tag::new();
391        let tag_1_val_2 = STATIC_TAG_1.get();
392        let tag_2_val_2 = STATIC_TAG_2.get();
393        let other_tag_2 = Tag::new();
394
395        assert_eq!(tag_1_val_1, tag_1_val_2);
396        assert_eq!(tag_2_val_1, tag_2_val_2);
397
398        assert_ne!(tag_1_val_1, tag_2_val_2);
399        assert_ne!(tag_1_val_1, other_tag_1);
400        assert_ne!(tag_1_val_1, other_tag_2);
401    }
402
403    #[test]
404    fn tag_size() {
405        assert_eq!(std::mem::size_of::<Option<Tag>>(), 4);
406    }
407}