texlang_stdlib/
expansion.rs

1//! Commands that alter the expansion process
2
3use texlang::prelude as txl;
4use texlang::traits::*;
5use texlang::*;
6
7/// Get the `\noexpand` command.
8pub fn get_noexpand<S>() -> command::BuiltIn<S> {
9    command::BuiltIn::new_expansion(noexpand_fn).with_tag(NO_EXPAND_TAG.get())
10}
11
12static NO_EXPAND_TAG: command::StaticTag = command::StaticTag::new();
13
14fn noexpand_fn<S>(_: token::Token, _: &mut vm::ExpansionInput<S>) -> txl::Result<()> {
15    panic!(
16        "The \\noexpand expansion function is never invoked directly. \
17         Instead, the primitive operates through the \\noexpand hook, \
18         which is a method on the `TexlangState` trait. \
19         Ensure your Texcraft VM is configured to use this hook."
20    )
21}
22
23#[inline]
24pub fn noexpand_hook<S: TexlangState>(
25    token: token::Token,
26    input: &mut vm::ExpansionInput<S>,
27    tag: Option<command::Tag>,
28) -> txl::Result<Option<token::Token>> {
29    // Fast path: this is not the \noexpand command.
30    // We want this check to be inlined into the VM functions that perform expansion.
31    if tag != Some(NO_EXPAND_TAG.get()) {
32        return Ok(None);
33    }
34    // Slow path: this is the \noexpand command.
35    // We don't want this check to be inlined into the VM because it will take up space in the instruction cache.
36    noexpand_hook_finish(token, input)
37}
38
39#[cold]
40fn noexpand_hook_finish<S: TexlangState>(
41    _token: token::Token,
42    input: &mut vm::ExpansionInput<S>,
43) -> txl::Result<Option<token::Token>> {
44    Ok(Some(
45        input.unexpanded().next_or_err(NoExpandEndOfInputError {})?,
46    ))
47}
48
49#[derive(Debug)]
50struct NoExpandEndOfInputError;
51
52impl error::EndOfInputError for NoExpandEndOfInputError {
53    fn doing(&self) -> String {
54        r"determining which token to suppress expansion for".into()
55    }
56}
57
58/// Get the simple `\expandafter` command.
59///
60/// This is the simplest implementation of the command, and the
61///     same implementation as in the original TeX.
62pub fn get_expandafter_simple<S: TexlangState>() -> command::BuiltIn<S> {
63    command::BuiltIn::new_expansion(expandafter_simple_fn)
64}
65
66/// Get the optimized `\expandafter` command.
67///
68/// This is a more complex implementation of `\expandafter` that is optimized for handling
69/// repeated `\expandafter` tokens.
70/// It contains two optimizations, as described below.
71/// These optimizations where developed "theoretically" and with no benchmarking, so it
72/// remains to be seen if they actually make expansion faster.
73/// For this reason both the optimized and simple `\expandafter` implementations are maintained.
74///
75/// We now describe the two optimizations.
76///  
77/// **First**, `\expandafter` control sequences are often linked together in the following format:
78///
79/// ```tex
80/// \expandafter<token 1>\expandafter<token 2>...\expandafter<token n><token n+1>
81/// ```
82///
83/// Here, to expand the first `\expandafter` we just need to expand `<token n+1>`.
84/// In the original implementation of TeX this works via recursion: the ith `\expandafter`
85/// requests expansion of the second-to-next token, which is the (i+1)th `\expandafter`.
86/// After n recursions, the last token is finally expanded.
87/// In the optimized implementation here, the token stream is scanned ahead for as long as the pattern
88/// `\expandafter<token i>` repeats.
89/// The token `<token n+1>` is expanded and the intermediate `\expandafter` tokens are dropped from the input.
90/// This is still an O(n) operation, but results in only 1 Rust function stack being used, rather than n.
91///
92/// **Second**, `\expandafter` commands are often grouped together like this:
93///
94/// ```tex
95/// \expandafter\expandafter\expandafter\A\expandafter\B\C
96/// ```
97///
98/// This TeX code causes `\C` to be expanded first, then `\B\` and finally `\A`.
99/// When the leading `\expandafter` is expanded, the first optimization kicks in and `\C` will be expanded, leaving:
100///
101/// ```tex
102/// \expandafter\A\B\Cexpanded
103/// ```
104///
105/// The second optimization is that the leading `\expandafter` that is left over will also be expanded
106///     without yielding control to the main expansion loop.
107/// If, after this pass, the leading token is again an `\expandafter` token, it will be expanded too.
108/// This process continues repeatedly until no `\expandafter` tokens are left at the start of the token stream.
109pub fn get_expandafter_optimized<S: TexlangState>() -> command::BuiltIn<S> {
110    command::BuiltIn::new_expansion(expandafter_optimized_fn)
111}
112
113fn expandafter_simple_fn<S: TexlangState>(
114    _expandafter_token: token::Token,
115    input: &mut vm::ExpansionInput<S>,
116) -> txl::Result<()> {
117    let first = input
118        .unexpanded()
119        .next_or_err(EndOfInputError { first: true })?;
120    let second = input
121        .unexpanded()
122        .next_or_err(EndOfInputError { first: false })?;
123    input.back(second);
124    input.expanded().expand_once()?;
125    input.back(first);
126    Ok(())
127}
128
129fn expandafter_optimized_fn<S: TexlangState>(
130    expandafter_token: token::Token,
131    input: &mut vm::ExpansionInput<S>,
132) -> txl::Result<()> {
133    let mut buffer: Vec<token::Token> = input.checkout_token_buffer();
134    // The optimization implemented here is that if we have the following input:
135    // \xa <token 1> \xa <token 2> ... \xa <token N> <token N+1>
136    // we expand <token N+1> once.
137    loop {
138        let first = input
139            .unexpanded()
140            .next_or_err(EndOfInputError { first: true })?;
141        buffer.push(first);
142        let second = input
143            .unexpanded()
144            .next_or_err(EndOfInputError { first: false })?;
145        if second.value() != expandafter_token.value() {
146            input.back(second);
147            break;
148        }
149    }
150    input.expanded().expand_once()?;
151    input.expansions_mut().extend(buffer.iter().rev());
152    input.return_token_buffer(buffer);
153    Ok(())
154}
155
156#[derive(Debug)]
157struct EndOfInputError {
158    first: bool,
159}
160impl error::EndOfInputError for EndOfInputError {
161    fn doing(&self) -> String {
162        format![
163            r"reading the {} token after \expandafter",
164            if self.first { "first" } else { "second" }
165        ]
166    }
167}
168
169/// Get the `\relax` command.
170pub fn get_relax<S>() -> command::BuiltIn<S> {
171    command::BuiltIn::new_execution(|_, _| Ok(()))
172}
173
174#[cfg(test)]
175mod test {
176    use super::*;
177    use crate::prefix;
178    use std::collections::HashMap;
179    use texlang_testing::*;
180
181    #[derive(Default)]
182    pub struct State {
183        prefix: prefix::Component,
184        testing: TestingComponent,
185    }
186
187    impl TexlangState for State {
188        fn expansion_override_hook(
189            token: token::Token,
190            input: &mut vm::ExpansionInput<Self>,
191            tag: Option<command::Tag>,
192        ) -> txl::Result<Option<token::Token>> {
193            noexpand_hook(token, input, tag)
194        }
195    }
196
197    implement_has_component![State {
198        prefix: prefix::Component,
199        testing: TestingComponent,
200    }];
201
202    fn built_in_commands(optimized: bool) -> HashMap<&'static str, command::BuiltIn<State>> {
203        HashMap::from([
204            ("def", crate::def::get_def()),
205            ("let", crate::alias::get_let()),
206            ("noexpand", get_noexpand()),
207            ("integer", TestingComponent::get_integer()),
208            (
209                "xa",
210                match optimized {
211                    true => get_expandafter_optimized(),
212                    false => get_expandafter_simple(),
213                },
214            ),
215        ])
216    }
217
218    test_suite![
219        @option(TestOption::BuiltInCommandsDyn(Box::new(|| { built_in_commands(true) }))),
220        @option(TestOption::AllowUndefinedCommands(true)),
221        expansion_equality_tests(
222            (simple_case, r"\def\a{Hello}\noexpand\a", r"\a"),
223            (
224                expandafter_and_noexpand_1,
225                r"\def\a#1\b{Hello '#1'}\def\b{World}\a\b",
226                "Hello ''"
227            ),
228            (
229                expandafter_and_noexpand_2,
230                r"\def\a#1\b{Hello '#1'}\def\b{World}\a\b\b",
231                "Hello ''World"
232            ),
233            (
234                expandafter_and_noexpand_3,
235                r"\def\a#1\b{Hello '#1'}\def\b{World}\xa\a\b\b",
236                "Hello 'World'"
237            ),
238            (
239                expandafter_and_noexpand_4,
240                r"\def\a#1\b{Hello '#1'}\def\b{World}\xa\a\noexpand\b\b",
241                "Hello ''World"
242            ),
243            (
244                only_expands_once,
245                r"\def\A{\B}\def\B{Hello}\xa\noexpand\A",
246                r"\B",
247            ),
248            (
249                peek_consumes_noexpand_1,
250                r"\def\A{\B}\def\B{Hello}\integer = 1 \noexpand\A",
251                r"\A",
252            ),
253            (
254                peek_consumes_noexpand_2,
255                r"\def\A{\B}\def\B{Hello}\integer = 1\noexpand\A",
256                r"Hello",
257            ),
258            // peek
259        ),
260        fatal_error_tests((end_of_input, r"\noexpand"),),
261    ];
262
263    static PREFIX: &str = r"\def\mk#1#2{\def#1##1\notes##2\end{##1\notes##2#2\end}}\mk\a a\mk\b b\mk\c c\mk\d d\def\notes#1\end{#1}";
264    static POSTFIX: &str = r"\notes\end";
265
266    #[macro_export]
267    macro_rules! expandafter_test {
268        ( $( ( $name: ident, $lhs: expr, $rhs: expr ) ),* $(,)? ) => {
269            mod expandafter_simple {
270                use super::*;
271                test_suite![
272                    @option(TestOption::BuiltInCommandsDyn(Box::new(|| { built_in_commands(false) }))),
273                    expansion_equality_tests(
274                        $(
275                            ( $name, format!("{}{}{}", PREFIX, $lhs, POSTFIX), $rhs ),
276                        )*
277                    ),
278                ];
279            }
280            mod expandafter_optimized {
281                use super::*;
282                test_suite![
283                    @option(TestOption::BuiltInCommandsDyn(Box::new(|| { built_in_commands(true) }))),
284                    expansion_equality_tests(
285                        $(
286                            ( $name, format!("{}{}{}", PREFIX, $lhs, POSTFIX), $rhs ),
287                        )*
288                    ),
289                ];
290            }
291        };
292    }
293
294    expandafter_test![
295        // In the following test we alias \xa to \other so that the \expandafter optimizations don't kick in.
296        // This makes \other behave like the unoptimized \expandafter.
297        // It then triggers the case when the optimized \expandafter is only supposed to expand the input
298        // once, rather the repeatedly expanding to remove the \expandafter's at the front.
299        (
300            expandafter_only_once,
301            r"\let\other=\xa \other\noexpand\xa\xa\xa\a\b",
302            r"\noexpand\xa ba"
303        ),
304        (texbook_p374_3, r"\xa\a\b", r"ba"),
305        (texbook_p374_4, r"\xa\xa\xa\a\xa\b\c", "cba"),
306        (
307            texbook_p374_5,
308            r"\xa\xa\xa\xa\xa\xa\xa\a\xa\xa\xa\b\xa\c\d",
309            "dcba"
310        ),
311        /*
312        All of the following permutation cases were generated by hand, but there's actually
313        an algorithmic way to generate them. For each macro, count the number of \expandafter=\xa
314        tokens that come before. Then calculate the shift:
315        - if #\xa is 0, the shift is 0.
316        - if #\xa is 1, the shift is 1.
317        - if #\xa is 3, the shift is 2.
318        - if #\xa is 7, the shift is 3.
319        (In general if #\xa is 2^n-1, the shift is n-1.)
320        Now work backwards through the tokens `\a\b\c\d`. For each token, move it right by the associated
321        shift. You then get the result of the expansion.
322
323        For example: `\xa\xa\xa\a\xa\b\c\d`.
324        The #\xa numbers are: 3 1 0 0
325        The shifts are: 2 1 0 0
326        Then
327        - start:         `\a\b\c\d`
328        - shift \d by 0: `\a\b\c\d`
329        - shift \c by 0: `\a\b\c\d`
330        - shift \b by 1: `\a\c\b\d`
331        - shift \a by 2: `\c\b\a\d` <- this is the expansion
332        */
333        (permutation_abcd, r"\a\b\c\d", "abcd"),
334        (permutation_abdc, r"\a\b\xa\c\d", "abdc"),
335        (permutation_acbd, r"\a\xa\b\c\d", "acbd"),
336        (permutation_acdb, r"\a\xa\xa\xa\b\c\d", "acdb"),
337        (permutation_adbc, r"\a\xa\b\xa\c\d", "adbc"),
338        (permutation_adcb, r"\a\xa\xa\xa\b\xa\c\d", "adcb"),
339        (permutation_bacd, r"\xa\a\b\c\d", "bacd"),
340        (permutation_badc, r"\xa\a\b\xa\c\d", "badc"),
341        (permutation_bcad, r"\xa\xa\xa\a\b\c\d", "bcad"),
342        (permutation_bcda, r"\xa\xa\xa\xa\xa\xa\xa\a\b\c\d", "bcda"),
343        (permutation_bdac, r"\xa\xa\xa\a\b\xa\c\d", "bdac"),
344        (
345            permutation_bdca,
346            r"\xa\xa\xa\xa\xa\xa\xa\a\b\xa\c\d",
347            "bdca"
348        ),
349        (permutation_cabd, r"\xa\a\xa\b\c\d", "cabd"),
350        (permutation_cadb, r"\xa\a\xa\xa\xa\b\c\d", "cadb"),
351        (permutation_cbad, r"\xa\xa\xa\a\xa\b\c\d", "cbad"),
352        (
353            permutation_cbda,
354            r"\xa\xa\xa\xa\xa\xa\xa\a\xa\xa\xa\b\c\d",
355            "cdba"
356        ),
357        (permutation_cdab, r"\xa\xa\xa\a\xa\xa\xa\b\c\d", "cdab"),
358        (
359            permutation_cdba,
360            r"\xa\xa\xa\xa\xa\xa\xa\a\xa\xa\xa\b\c\d",
361            "cdba"
362        ),
363        (permutation_dabc, r"\xa\a\xa\b\xa\c\d", "dabc"),
364        (permutation_dacb, r"\xa\a\xa\xa\xa\b\xa\c\d", "dacb"),
365        (permutation_dbac, r"\xa\xa\xa\a\xa\b\xa\c\d", "dbac"),
366        (
367            permutation_dbca,
368            r"\xa\xa\xa\xa\xa\xa\xa\a\xa\b\xa\c\d",
369            "dbca"
370        ),
371        (permutation_dcab, r"\xa\xa\xa\a\xa\xa\xa\b\xa\c\d", "dcab"),
372        (
373            permutation_dcba,
374            r"\xa\xa\xa\xa\xa\xa\xa\a\xa\xa\xa\b\xa\c\d",
375            "dcba"
376        ),
377        (
378            expandafter_last_after_first_pass,
379            r"\xa\xa\xa\a\xa\xa\b\c\d",
380            "bdac"
381        ),
382    ];
383
384    fn run_expandafter_failure_test(input: &str, optimized: bool) {
385        let options = vec![TestOption::BuiltInCommandsDyn(Box::new(|| {
386            built_in_commands(optimized)
387        }))];
388        run_fatal_error_test(&input, &options, false);
389    }
390
391    #[macro_export]
392    macro_rules! expandafter_fatal_error_test {
393        ($( ( $name: ident, $input: expr), )+) => {
394            $(
395            mod $name {
396                #[test]
397                fn simple() {
398                    super::run_expandafter_failure_test($input, false);
399                }
400                #[test]
401                fn optimized() {
402                    super::run_expandafter_failure_test($input, true);
403                }
404            }
405            )+
406        };
407    }
408
409    expandafter_fatal_error_test![
410        (expandafter_missing_1st_token, r"\xa"),
411        (expandafter_missing_2nd_token, r"\xa\a"),
412        (expandafter_missing_1st_token_nested, r"\xa\xa\xa\a\xa\xa\b"),
413        (
414            expandafter_missing_2nd_token_nested,
415            r"\def\A{}\xa\xa\xa\A\A"
416        ),
417    ];
418}