texlang/command/mod.rs
1//! Texlang commands API
2//!
3//! # Texcraft commands API
4//!
5//! One of the most important parts of any TeX engine is the primitives that it provides.
6//! This documentation describes the *Texcraft commands API*,
7//! which is the mechanism by which TeX engines add new primitives.
8//!
9//! A note on terminology: *commands* can be categorized into primitives,
10//! which are implemented in the TeX engine, and user defined macros,
11//! which are created in specific TeX documents using primitives like `\def`.
12//! We often use the word command and primitive interchangeably here because in the context
13//! of implementing TeX engines they’re basically synonymous.
14//! A TeX engine could theoretically provide a native user defined macro...but it’s unlikely.
15//!
16//! ## Expansion vs execution
17//!
18//! Expansion and execution commands seem similar because they both optionally
19//! read input tokens and then make changes to the VM.
20//! However the differences are pretty significant in practice:
21//!
22//! | | Expansion | Execution
23//! |------------------------------------------|-----------|-----------
24//! Can read tokens from the input stream? | Yes | Yes
25//! Can add tokens to the input stream> | Yes | It’s possible, but the API discourages it.[^futurelet]
26//! Can make changes to the state? | No | Yes
27//! Is evaluated when tokens are only being expanded, like in `\edef` | Yes | No
28//!
29//!
30//! [^futurelet]: `\futurelet` is an example of an execution command that does this.
31//!
32
33use crate::prelude as txl;
34use crate::texmacro;
35use crate::token;
36use crate::types;
37use crate::variable;
38use crate::vm;
39use std::num;
40use std::rc;
41use std::sync;
42
43pub(crate) mod map;
44
45pub use map::Map;
46
47/// The Rust type of expansion primitive functions.
48pub type ExpansionFn<S> =
49 fn(token: token::Token, input: &mut vm::ExpansionInput<S>) -> txl::Result<()>;
50
51/// The Rust type of execution primitive functions.
52pub type ExecutionFn<S> =
53 fn(token: token::Token, input: &mut vm::ExecutionInput<S>) -> txl::Result<()>;
54
55/// A TeX command.
56pub enum Command<S> {
57 /// An expansion primitive that is implemented in the engine.
58 ///
59 /// Examples: `\the`, `\ifnum`.
60 Expansion(ExpansionFn<S>, Option<Tag>),
61
62 /// A user defined macro.
63 ///
64 /// Examples: `\newcommand` and `\include` in LaTeX.
65 Macro(rc::Rc<texmacro::Macro>),
66
67 /// A non-expansion primitive that performs operations on the state.
68 ///
69 /// Examples: `\def`, `\par`.
70 Execution(ExecutionFn<S>, Option<Tag>),
71
72 /// A command that is used to reference a variable, like a parameter or a register.
73 ///
74 /// Such a command is *resolved* to get the variable using the function pointer it holds.
75 ///
76 /// Examples: `\count`, `\year`.
77 Variable(rc::Rc<variable::Command<S>>),
78
79 /// A command that aliases a character token.
80 ///
81 /// Depending on the context in which this command appears it may behave like a
82 /// character (when typesetting) or like an unexpandable command (when parsing integers).
83 /// Created using `\let\cmd=<character>`.
84 CharacterTokenAlias(token::Value),
85
86 /// A command that references a character.
87 ///
88 /// These commands are generally created using `\countdef`.
89 /// In the main inner loop they result in a character being typeset.
90 /// In other contexts they are interpreted as numbers.
91 /// In Plain TeX, `\countdef 255` is used as a more efficient version of `\def{255 }`.
92 Character(char),
93
94 /// A command that references a math character.
95 ///
96 /// These commands are generally created using `\mathchardef`.
97 MathCharacter(types::MathCode),
98
99 /// A command that enables a font.
100 Font(types::Font),
101}
102
103impl<S> std::fmt::Display for Command<S> {
104 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
105 match self {
106 Command::Expansion(_, _) => write![f, "an expansion command"],
107 Command::Macro(_) => write![f, "a user-defined macro"],
108 Command::Execution(_, _) => write![f, "an execution command"],
109 Command::Variable(_) => write![f, "a variable command"],
110 Command::CharacterTokenAlias(_) => write![f, "a character token alias"],
111 Command::Character(_) => write![f, "a character command"],
112 Command::MathCharacter(_) => write![f, "a math character command"],
113 Command::Font(_) => write![f, "a font command"],
114 }
115 }
116}
117
118impl<S> Command<S> {
119 /// Gets the tag associated to this command, or [None] if the command has no tag.
120 pub fn tag(&self) -> Option<Tag> {
121 match self {
122 Command::Expansion(_, tag) => *tag,
123 Command::Execution(_, tag) => *tag,
124 Command::Macro(_)
125 | Command::Variable(_)
126 | Command::CharacterTokenAlias(_)
127 | Command::Character(_)
128 | Command::MathCharacter(_)
129 | Command::Font(_) => None,
130 }
131 }
132}
133
134/// A built-in command. This is a command provided at VM initialization.
135///
136/// This struct is simply a combination of a [Command] and a documentation string for the command.
137/// It is used when providing the built-in commands for a VM.
138pub struct BuiltIn<S> {
139 cmd: Command<S>,
140 doc: Option<&'static str>,
141}
142
143impl<S> BuiltIn<S> {
144 /// Create a new expansion built-in command.
145 pub fn new_expansion(t: ExpansionFn<S>) -> BuiltIn<S> {
146 t.into()
147 }
148
149 /// Create a new expansion built-in command.
150 pub fn new_execution(t: ExecutionFn<S>) -> BuiltIn<S> {
151 t.into()
152 }
153
154 /// Create a new variable built-in command.
155 pub fn new_variable(cmd: variable::Command<S>) -> BuiltIn<S> {
156 Command::Variable(rc::Rc::new(cmd)).into()
157 }
158
159 /// Create a new font built-in command.
160 pub fn new_font(font: types::Font) -> BuiltIn<S> {
161 Command::Font(font).into()
162 }
163
164 /// Set the tag for this built-in command.
165 pub fn with_tag(mut self, tag: Tag) -> BuiltIn<S> {
166 match &mut self.cmd {
167 Command::Expansion(_, t) => *t = Some(tag),
168 Command::Execution(_, t) => *t = Some(tag),
169 Command::Macro(_)
170 | Command::Variable(_)
171 | Command::CharacterTokenAlias(_)
172 | Command::Character(_)
173 | Command::MathCharacter(_)
174 | Command::Font(_) => {
175 panic!("cannot add a tag to this type of command")
176 }
177 }
178 self
179 }
180
181 // Set the doc for this built-in command.
182 pub fn with_doc(mut self, doc: &'static str) -> BuiltIn<S> {
183 self.doc = Some(doc);
184 self
185 }
186
187 pub fn cmd(&self) -> &Command<S> {
188 &self.cmd
189 }
190
191 pub fn doc(&self) -> Option<&'static str> {
192 self.doc
193 }
194}
195
196// We need to implement Clone manually as the derived implementation requires S to be Clone.
197impl<S> Clone for Command<S> {
198 fn clone(&self) -> Self {
199 match self {
200 Command::Expansion(e, t) => Command::Expansion::<S>(*e, *t),
201 Command::Macro(m) => Command::Macro(m.clone()),
202 Command::Execution(e, t) => Command::Execution(*e, *t),
203 Command::Variable(v) => Command::Variable(v.clone()),
204 Command::CharacterTokenAlias(tv) => Command::CharacterTokenAlias(*tv),
205 Command::Character(c) => Command::Character(*c),
206 Command::MathCharacter(c) => Command::MathCharacter(*c),
207 Command::Font(font) => Command::Font(*font),
208 }
209 }
210}
211
212// We need to implement Clone manually as the derived implementation requires S to be Clone.
213impl<S> Clone for BuiltIn<S> {
214 fn clone(&self) -> Self {
215 Self {
216 cmd: self.cmd.clone(),
217 doc: self.doc,
218 }
219 }
220}
221
222impl<S> From<ExpansionFn<S>> for BuiltIn<S> {
223 fn from(cmd: ExpansionFn<S>) -> Self {
224 Command::Expansion(cmd, None).into()
225 }
226}
227
228impl<S> From<rc::Rc<texmacro::Macro>> for BuiltIn<S> {
229 fn from(cmd: rc::Rc<texmacro::Macro>) -> Self {
230 Command::Macro(cmd).into()
231 }
232}
233
234impl<S> From<ExecutionFn<S>> for BuiltIn<S> {
235 fn from(cmd: ExecutionFn<S>) -> Self {
236 Command::Execution(cmd, None).into()
237 }
238}
239
240impl<S> From<variable::Command<S>> for BuiltIn<S> {
241 fn from(cmd: variable::Command<S>) -> Self {
242 Command::Variable(rc::Rc::new(cmd)).into()
243 }
244}
245
246impl<S> From<Command<S>> for BuiltIn<S> {
247 fn from(cmd: Command<S>) -> Self {
248 BuiltIn { cmd, doc: None }
249 }
250}
251
252/// A tag is a piece of metadata that is optionally attached to a command.
253///
254/// Tags are used to implement certain TeX language semantics.
255/// An example is TeX conditionals.
256/// When a TeX conditional statement evaluates to false, the `\if` command must scan
257/// the input stream until it finds either an `\else` or `\fi` command.
258/// (The tokens scanned in this process are in the true branch of the conditional,
259/// and must thus be discarded.)
260/// Tags are the mechanism by which the scanning algorithm can
261/// determine if a token corresponds to an `\else` of `\fi` command.
262/// Concretely, both `\else` of `\fi` command have unique tags associated to them.
263/// When scanning the stream,
264/// if a token is a command token then the tag for the associated command is
265/// compared to the known tags for `\else` and `\fi`.
266/// If the tags match, the true branch is finished.
267///
268/// In general, TeX commands interface with the VM in two ways.
269/// The first most common way is when the main VM loop or expansion loop encounters a command.
270/// The loop invokes the command's associated Rust function.
271/// One can think of the Rust function as providing the behavior of the command in this context.
272///
273/// The second way is when a different command, like a conditional command, performs some operation
274/// that is dependent on the commands it reads out of the input stream.
275/// In this context the commands in the input stream provide behavior using tags.
276/// The `\else` command having the specific else tag results in the conditional branch processing completing.
277///
278/// Note that the same tag can be used for multiple commands,
279/// but each command can only have one tag.
280///
281/// ## Implementation details
282///
283/// Tags are non-zero 32 bit integers.
284/// The first tag created has value 1, the second tag has value 2, and so on.
285/// A global mutex is used to store the next tag value.
286/// Tags have the property that `Option<Tag>` takes up 4 bytes in memory.
287#[derive(PartialEq, Eq, Clone, Copy, Debug, PartialOrd, Ord, Hash)]
288pub struct Tag(num::NonZeroU32);
289
290static NEXT_TAG_VALUE: sync::Mutex<u32> = sync::Mutex::new(1);
291
292impl Tag {
293 /// Creates a new unique tag.
294 ///
295 /// ```
296 /// # use texlang::command::Tag;
297 /// let tag_1 = Tag::new();
298 /// let tag_2 = Tag::new();
299 /// assert_ne!(tag_1, tag_2);
300 /// ```
301 // We suppress the clippy warning because creating a new tag is a global operation and
302 // shouldn't be done without explicit intention.
303 #[allow(clippy::new_without_default)]
304 pub fn new() -> Tag {
305 let mut n = NEXT_TAG_VALUE.lock().unwrap();
306 let tag = Tag(num::NonZeroU32::new(*n).unwrap());
307 *n = n.checked_add(1).unwrap();
308 tag
309 }
310}
311
312/// A static tag enables creating a tag in a static variable.
313///
314/// ```
315/// # use texlang::command::StaticTag;
316/// static TAG: StaticTag = StaticTag::new();
317///
318/// let first_get = TAG.get();
319/// let second_get = TAG.get();
320/// assert_eq!(first_get, second_get);
321/// ```
322pub struct StaticTag(std::sync::OnceLock<Tag>);
323
324impl Default for StaticTag {
325 fn default() -> Self {
326 StaticTag::new()
327 }
328}
329
330impl StaticTag {
331 /// Create a new static tag.
332 pub const fn new() -> StaticTag {
333 StaticTag(std::sync::OnceLock::new())
334 }
335
336 /// Get the actual [Tag] out of this [StaticTag].
337 /// Repeated calls to this function return the same tag.
338 ///
339 /// This is not a trivial getter.
340 /// The [Tag] is lazily constructed so even subsequent calls to this getter must do some work to check if the [Tag]
341 /// exists or not.
342 /// For very hot code paths it is advised to cache the return value somewhere, for example in a relevant command's state.
343 pub fn get(&self) -> Tag {
344 *self.0.get_or_init(Tag::new)
345 }
346}
347
348/// A primitive key uniquely identifies a primitive.
349///
350/// If two commands have the same key, they are the same primitive (expansion, execution, or variable primitive)
351/// The function returns [None] if the command is not a primitive (a macro or a token alias).
352#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
353pub(crate) enum PrimitiveKey {
354 Execution(usize, Option<Tag>),
355 Expansion(usize, Option<Tag>),
356 Variable(variable::CommandKey),
357}
358
359impl PrimitiveKey {
360 pub(crate) fn new<S>(command: &Command<S>) -> Option<Self> {
361 match command {
362 Command::Expansion(f, tag) => Some(PrimitiveKey::Expansion(*f as usize, *tag)),
363 Command::Execution(f, tag) => Some(PrimitiveKey::Execution(*f as usize, *tag)),
364 Command::Variable(v) => Some(PrimitiveKey::Variable(v.key())),
365 Command::Macro(_)
366 | Command::CharacterTokenAlias(_)
367 | Command::Character(_)
368 | Command::MathCharacter(_)
369 | Command::Font(_) => None,
370 }
371 }
372}
373
374#[cfg(test)]
375mod tests {
376 use super::*;
377
378 #[test]
379 fn func_size() {
380 assert_eq!(std::mem::size_of::<Command<()>>(), 16);
381 }
382
383 static STATIC_TAG_1: StaticTag = StaticTag::new();
384 static STATIC_TAG_2: StaticTag = StaticTag::new();
385
386 #[test]
387 fn tag() {
388 let tag_1_val_1 = STATIC_TAG_1.get();
389 let tag_2_val_1 = STATIC_TAG_2.get();
390 let other_tag_1 = Tag::new();
391 let tag_1_val_2 = STATIC_TAG_1.get();
392 let tag_2_val_2 = STATIC_TAG_2.get();
393 let other_tag_2 = Tag::new();
394
395 assert_eq!(tag_1_val_1, tag_1_val_2);
396 assert_eq!(tag_2_val_1, tag_2_val_2);
397
398 assert_ne!(tag_1_val_1, tag_2_val_2);
399 assert_ne!(tag_1_val_1, other_tag_1);
400 assert_ne!(tag_1_val_1, other_tag_2);
401 }
402
403 #[test]
404 fn tag_size() {
405 assert_eq!(std::mem::size_of::<Option<Tag>>(), 4);
406 }
407}