texlang_stdlib/
def.rs

1//! User-defined macros (`\def` and friends)
2
3use texcraft_stdext::algorithms::substringsearch::Matcher;
4use texcraft_stdext::collections::groupingmap;
5use texcraft_stdext::collections::nevec::Nevec;
6use texcraft_stdext::nevec;
7use texlang::prelude as txl;
8use texlang::traits::*;
9use texlang::*;
10
11pub const DEF_DOC: &str = "Define a custom macro";
12
13/// Get the `\def` command.
14pub fn get_def<S: TexlangState>() -> command::BuiltIn<S> {
15    command::BuiltIn::new_execution(def_primitive_fn).with_tag(def_tag())
16}
17
18/// Get the `\gdef` command.
19pub fn get_gdef<S: TexlangState>() -> command::BuiltIn<S> {
20    command::BuiltIn::new_execution(gdef_primitive_fn).with_tag(def_tag())
21}
22
23static DEF_TAG: command::StaticTag = command::StaticTag::new();
24
25pub fn def_tag() -> command::Tag {
26    DEF_TAG.get()
27}
28
29fn def_primitive_fn<S: TexlangState>(
30    def_token: token::Token,
31    input: &mut vm::ExecutionInput<S>,
32) -> txl::Result<()> {
33    parse_and_set_macro(def_token, input, false)
34}
35
36fn gdef_primitive_fn<S: TexlangState>(
37    def_token: token::Token,
38    input: &mut vm::ExecutionInput<S>,
39) -> txl::Result<()> {
40    parse_and_set_macro(def_token, input, true)
41}
42
43fn parse_and_set_macro<S: TexlangState>(
44    _: token::Token,
45    input: &mut vm::ExecutionInput<S>,
46    set_globally_override: bool,
47) -> txl::Result<()> {
48    let mut scope = TexlangState::variable_assignment_scope_hook(input.state_mut());
49    if set_globally_override {
50        scope = groupingmap::Scope::Global;
51    }
52    let cmd_ref_or = Option::<token::CommandRef>::parse(input)?;
53    let PrefixAndParameters {
54        prefix,
55        raw_parameters,
56        replacement_end_token,
57        skip_replacement_scan,
58    } = parse_prefix_and_parameters(input.unexpanded())?;
59    let parameters: Vec<texmacro::Parameter> = raw_parameters
60        .into_iter()
61        .map(|a| match a {
62            RawParameter::Undelimited => texmacro::Parameter::Undelimited,
63            RawParameter::Delimited(vec) => texmacro::Parameter::Delimited(Matcher::new(vec)),
64        })
65        .collect();
66    let replacement = if skip_replacement_scan {
67        vec![]
68    } else {
69        let mut rs =
70            parse_replacement_text(input.unexpanded(), replacement_end_token, parameters.len())?;
71        for r in rs.iter_mut() {
72            if let texmacro::Replacement::Tokens(tokens) = r {
73                tokens.reverse();
74            }
75        }
76        rs
77    };
78    let user_defined_macro = texmacro::Macro::new(prefix, parameters, replacement);
79    if let Some(cmd_ref) = cmd_ref_or {
80        input
81            .commands_map_mut()
82            .insert_macro(cmd_ref, user_defined_macro, scope);
83    }
84    Ok(())
85}
86
87enum RawParameter {
88    Undelimited,
89    Delimited(Nevec<token::Value>),
90}
91
92impl RawParameter {
93    fn push(&mut self, t: token::Token) {
94        match self {
95            RawParameter::Undelimited => {
96                *self = RawParameter::Delimited(nevec![t.value()]);
97            }
98            RawParameter::Delimited(vec) => {
99                vec.push(t.value());
100            }
101        }
102    }
103}
104
105fn char_to_parameter_index(c: char) -> Option<usize> {
106    match c {
107        '1' => Some(0),
108        '2' => Some(1),
109        '3' => Some(2),
110        '4' => Some(3),
111        '5' => Some(4),
112        '6' => Some(5),
113        '7' => Some(6),
114        '8' => Some(7),
115        '9' => Some(8),
116        _ => None,
117    }
118}
119
120struct PrefixAndParameters {
121    prefix: Vec<token::Token>,
122    raw_parameters: Vec<RawParameter>,
123    // For the weird #{ edge case
124    replacement_end_token: Option<token::Token>,
125    skip_replacement_scan: bool,
126}
127
128/// TeX.2021.474
129fn parse_prefix_and_parameters<S: TexlangState>(
130    input: &mut vm::UnexpandedStream<S>,
131) -> txl::Result<PrefixAndParameters> {
132    let mut prefix = Vec::new();
133    let mut raw_parameters: Vec<RawParameter> = Vec::new();
134    let mut replacement_end_token = None;
135    let mut skip_replacement_scan = false;
136    loop {
137        let token = input.next_or_err(ParameterPartEndOfInputError {})?;
138        match token.value() {
139            token::Value::BeginGroup(_) => {
140                break;
141            }
142            token::Value::EndGroup(_) => {
143                input.error(error::SimpleTokenError::new(
144                    token,
145                    "unexpected end group token while parsing the parameter of a macro definition",
146                ))?;
147                skip_replacement_scan = true;
148                break;
149            }
150            token::Value::Parameter(_) => {
151                // TeX.2021.476
152                let parameter_token = input.next_or_err(ParameterPartEndOfInputError {})?;
153                // "parsing a parameter")?;
154                match parameter_token.value() {
155                    token::Value::BeginGroup(_) => {
156                        // In this case we end the group according to the special #{ rule
157                        replacement_end_token = Some(parameter_token);
158                        match raw_parameters.last_mut() {
159                            None => {
160                                prefix.push(parameter_token);
161                            }
162                            Some(spec) => {
163                                spec.push(parameter_token);
164                            }
165                        }
166                        break;
167                    }
168                    _ => {
169                        if raw_parameters.len() == 9 {
170                            input.error(error::SimpleTokenError::new(
171                                token,
172                                "Too many parameters; you already have 9",
173                            ))?;
174                            continue;
175                        }
176                        let parameter_index_correct = match parameter_token.char() {
177                            // control sequence
178                            None => false,
179                            // character token
180                            Some(c) => {
181                                match char_to_parameter_index(c) {
182                                    // non-numeric character token
183                                    None => false,
184                                    // numeric character token
185                                    Some(n) => n == raw_parameters.len(),
186                                }
187                            }
188                        };
189                        if !parameter_index_correct {
190                            input.error(InvalidParameterNumberError {
191                                parameter_number_token: parameter_token,
192                                parameters_so_far: raw_parameters.len(),
193                            })?;
194                            input.back(parameter_token);
195                        }
196                        raw_parameters.push(RawParameter::Undelimited);
197                    }
198                }
199            }
200            _ => match raw_parameters.last_mut() {
201                None => {
202                    prefix.push(token);
203                }
204                Some(parameter) => {
205                    parameter.push(token);
206                }
207            },
208        }
209    }
210    // We may end up here because the input ended in which case we should error.
211    // However this case will be handled when we try to scan the replacement
212    // text.
213    Ok(PrefixAndParameters {
214        prefix,
215        raw_parameters,
216        replacement_end_token,
217        skip_replacement_scan,
218    })
219}
220
221#[derive(Debug)]
222struct ParameterPartEndOfInputError;
223
224impl error::EndOfInputError for ParameterPartEndOfInputError {
225    fn doing(&self) -> String {
226        r"parsing the parameter part of a macro being defined by \def".into()
227    }
228}
229
230#[derive(Debug)]
231struct ReplacementPartEndOfInputError;
232
233impl error::EndOfInputError for ReplacementPartEndOfInputError {
234    fn doing(&self) -> String {
235        r"parsing the replacement part of a macro being defined by \def".into()
236    }
237}
238
239fn parse_replacement_text<S: TexlangState>(
240    input: &mut vm::UnexpandedStream<S>,
241    opt_final_token: Option<token::Token>,
242    num_parameters: usize,
243) -> txl::Result<Vec<texmacro::Replacement>> {
244    // TODO: could we use a pool of vectors to avoid some of the allocations here?
245    let mut result = vec![];
246    let mut scope_depth = 0;
247    let push = |result: &mut Vec<texmacro::Replacement>, token| match result.last_mut() {
248        Some(texmacro::Replacement::Tokens(tokens)) => {
249            tokens.push(token);
250        }
251        _ => {
252            result.push(texmacro::Replacement::Tokens(vec![token]));
253        }
254    };
255
256    loop {
257        let token = input.next_or_err(ReplacementPartEndOfInputError {})?;
258        // "parsing the replacement text of a macro")?;
259        match token.value() {
260            token::Value::BeginGroup(_) => {
261                scope_depth += 1;
262            }
263            token::Value::EndGroup(_) => {
264                if scope_depth == 0 {
265                    if let Some(final_token) = opt_final_token {
266                        push(&mut result, final_token);
267                    }
268                    return Ok(result);
269                }
270                scope_depth -= 1;
271            }
272            token::Value::Parameter(_) => {
273                let parameter_token = input.next_or_err(ReplacementPartEndOfInputError {})?;
274                let c = match parameter_token.value() {
275                    token::Value::Parameter(_) => {
276                        // ## case
277                        push(&mut result, parameter_token);
278                        continue;
279                    }
280                    _ => parameter_token.char(),
281                };
282                let valid_index_or = match c {
283                    // control sequence
284                    None => None,
285                    // character
286                    Some(c) => char_to_parameter_index(c).filter(|&n| n < num_parameters),
287                };
288                match valid_index_or {
289                    None => {
290                        // TeX.2021.479
291                        input.error(error::SimpleTokenError::new(
292                            parameter_token,
293                            "illegal parameter number",
294                        ))?;
295                        // Fallback to the ## case
296                        input.back(parameter_token);
297                        push(&mut result, token);
298                    }
299                    Some(valid_index) => {
300                        result.push(texmacro::Replacement::Parameter(valid_index));
301                    }
302                }
303                continue;
304            }
305            _ => {}
306        }
307
308        push(&mut result, token);
309    }
310}
311
312#[derive(Debug)]
313struct InvalidParameterNumberError {
314    parameter_number_token: token::Token,
315    parameters_so_far: usize,
316}
317
318impl error::TexError for InvalidParameterNumberError {
319    fn kind(&self) -> error::Kind {
320        error::Kind::Token(self.parameter_number_token)
321    }
322
323    fn title(&self) -> String {
324        "unexpected parameter".to_string()
325    }
326
327    fn notes(&self) -> Vec<error::display::Note> {
328        vec![format![
329            "this macro has {} parameter(s) so far, so parameter number #{} was expected.",
330            self.parameters_so_far,
331            self.parameters_so_far + 1
332        ]
333        .into()]
334    }
335}
336
337#[cfg(test)]
338mod test {
339    use std::collections::HashMap;
340
341    use super::*;
342    use crate::prefix;
343
344    #[derive(Default)]
345    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
346    struct State {
347        prefix: prefix::Component,
348        testing: texlang_testing::TestingComponent,
349    }
350
351    implement_has_component![State {
352        prefix: prefix::Component,
353        testing: texlang_testing::TestingComponent,
354    }];
355
356    impl TexlangState for State {
357        fn variable_assignment_scope_hook(
358            state: &mut Self,
359        ) -> texcraft_stdext::collections::groupingmap::Scope {
360            prefix::variable_assignment_scope_hook(state)
361        }
362        fn recoverable_error_hook(
363            &self,
364            recoverable_error: error::TracedTexError,
365        ) -> Result<(), Box<dyn error::TexError>> {
366            texlang_testing::TestingComponent::recoverable_error_hook(self, recoverable_error)
367        }
368    }
369
370    fn built_in_commands() -> HashMap<&'static str, command::BuiltIn<State>> {
371        HashMap::from([
372            ("def", get_def()),
373            ("gdef", get_gdef()),
374            ("global", prefix::get_global()),
375            ("assertGlobalIsFalse", prefix::get_assert_global_is_false()),
376        ])
377    }
378
379    texlang_testing::test_suite![
380        @option(texlang_testing::TestOption::BuiltInCommands(built_in_commands)),
381        @option(texlang_testing::TestOption::AllowUndefinedCommands(true)),
382        expansion_equality_tests(
383            (def_parsed_successfully, r"\def\A{abc}", ""),
384            (output_is_correct, r"\def\A{abc}\A", "abc"),
385            (active_char, r"\def~{abc}~", "abc"),
386            (output_twice, "\\def\\A{abc}\\A\\A", "abcabc"),
387            (parse_one_parameter, "\\def\\A#1{a-#1-b}", ""),
388            (one_undelimited_parameter, "\\def\\A#1{a-#1-b}\\A1", "a-1-b"),
389            (
390                one_undelimited_parameter_multiple_times,
391                "\\def\\A#1{#1 #1 #1}\\A1",
392                "1 1 1"
393            ),
394            (
395                one_undelimited_parameter_multiple_tokens,
396                "\\def\\A#1{a-#1-b}\\A{123}",
397                "a-123-b"
398            ),
399            (
400                two_undelimited_parameters,
401                "\\def\\A#1#2{#2-#1}\\A56",
402                "6-5"
403            ),
404            (
405                two_undelimited_parameters_multiple_token_inputs,
406                "\\def\\A#1#2{#2-#1}\\A{abc}{xyz}",
407                "xyz-abc"
408            ),
409            (
410                consume_prefix_correctly,
411                "\\def\\A fgh{567}\\A fghi",
412                "567i"
413            ),
414            (
415                one_undelimited_parameter_with_prefix,
416                "\\def\\A abc#1{y#1z}\\A abcdefg",
417                "ydzefg"
418            ),
419            (
420                one_undelimited_parameter_with_prefix_multiple_tokens,
421                "\\def\\A abc#1{y#1z}\\A abcdefg",
422                "ydzefg"
423            ),
424            (
425                one_delimited_parameter,
426                "\\def\\A #1xxx{y#1z}\\A abcxxx",
427                "yabcz"
428            ),
429            (
430                one_delimited_parameter_empty,
431                "\\def\\A #1xxx{y#1z}\\A xxx",
432                "yz"
433            ),
434            (
435                one_delimited_parameter_with_scope,
436                "\\def\\A #1xxx{#1}\\A abc{123xxx}xxx",
437                "abc{123xxx}"
438            ),
439            (
440                one_delimited_parameter_with_prefix,
441                "\\def\\A a#1c{x#1y}\\A abcdef",
442                "xbydef"
443            ),
444            (
445                two_delimited_parameters_with_prefix,
446                r"\def\A a#1c#2e{x#2y#1z}\A abcdef",
447                "xdybzf"
448            ),
449            (
450                one_delimited_parameter_grouped_value,
451                r"\def\A #1c{x#1y}\A {Hello}c",
452                "xHelloy"
453            ),
454            (
455                parameter_brace_special_case,
456                r"\def\A #{Mint says }\A{hello}",
457                "Mint says {hello}"
458            ),
459            (
460                grouping,
461                r"\def\A{Hello}\A{\def\A{World}\A}\A",
462                r"HelloWorldHello"
463            ),
464            (
465                grouping_global,
466                r"\def\A{Hello}\A{\global\def\A{World}\A}\A",
467                r"HelloWorldWorld"
468            ),
469            (
470                gdef,
471                r"\def\A{Hello}\A{\gdef\A{World}\A}\A",
472                r"HelloWorldWorld"
473            ),
474            (
475                gdef_global,
476                r"\def\A{Hello}\A{\global\gdef\A{World}\A}\A",
477                r"HelloWorldWorld"
478            ),
479            (
480                def_takes_global,
481                r"\global\def\A{Hello}\assertGlobalIsFalse",
482                r""
483            ),
484            (
485                gdef_takes_global,
486                r"\global\gdef\A{Hello}\assertGlobalIsFalse",
487                r""
488            ),
489            (
490                texbook_exercise_20_1,
491                r"\def\mustnt{I must not talk in class.}%
492          \def\five{\mustnt\mustnt\mustnt\mustnt\mustnt}%
493          \def\twenty{\five\five\five\five}%
494          \def\punishment{\twenty\twenty\twenty\twenty\twenty}%
495          \punishment",
496                "I must not talk in class.".repeat(100)
497            ),
498            (
499                texbook_exercise_20_2,
500                r"\def\a{\b}%
501          \def\b{A\def\a{B\def\a{C\def\a{\b}}}}%
502          \def\puzzle{\a\a\a\a\a}%
503          \puzzle",
504                "ABCAB"
505            ),
506            (
507                texbook_exercise_20_3_part_1,
508                "\\def\\row#1{(#1_1,\\ldots,#1_n)}\\row{\\bf x}",
509                "(\\bf x_1,\\ldots,\\bf x_n)"
510            ),
511            (
512                texbook_exercise_20_3_part_2,
513                "\\def\\row#1{(#1_1,\\ldots,#1_n)}\\row{{\\bf x}}",
514                "({\\bf x}_1,\\ldots,{\\bf x}_n)"
515            ),
516            (
517                texbook_exercise_20_4_part_1,
518                r#"\def\mustnt#1#2{I must not #1 in #2.}%
519           \def\five#1#2{\mustnt{#1}{#2}\mustnt{#1}{#2}\mustnt{#1}{#2}\mustnt{#1}{#2}\mustnt{#1}{#2}}%
520           \def\twenty#1#2{\five{#1}{#2}\five{#1}{#2}\five{#1}{#2}\five{#1}{#2}}%
521           \def\punishment#1#2{\twenty{#1}{#2}\twenty{#1}{#2}\twenty{#1}{#2}\twenty{#1}{#2}\twenty{#1}{#2}}%
522           \punishment{run}{the halls}"#,
523                "I must not run in the halls.".repeat(100)
524            ),
525            (
526                texbook_exercise_20_4_part_2,
527                r#"\def\mustnt{I must not \doit\ in \thatplace.}%
528           \def\five{\mustnt\mustnt\mustnt\mustnt\mustnt}%
529           \def\twenty{\five\five\five\five}%
530           \def\punishment#1#2{\def\doit{#1}\def\thatplace{#2}\twenty\twenty\twenty\twenty\twenty}%
531           \punishment{run}{the halls}"#,
532                r"I must not run\ in the halls.".repeat(100)
533            ),
534            (
535                texbook_exercise_20_5,
536                r"\def\a#1{\def\b##1{##1#1}}\a!\b{Hello}",
537                "Hello!"
538            ),
539            (
540                texbook_exercise_20_5_temp,
541                r"\def\b#1{#1!}\b{Hello}",
542                "Hello!"
543            ),
544            (
545                texbook_exercise_20_5_example_below,
546                "\\def\\a#1#{\\hbox to #1}\\a3pt{x}",
547                "\\hbox to 3pt{x}"
548            ),
549            (
550                texbook_exercise_20_6,
551                r"\def\b#1{And #1, World!}\def\a#{\b}\a{Hello}",
552                "And Hello, World!"
553            ),
554            (
555                space_in_undelimited_param_1,
556                r"\def\Hello#1#2{Hello-#1-#2-World}\Hello A B C",
557                r"Hello-A-B-World C",
558            ),
559            (
560                space_in_undelimited_param_2,
561                r"\def\Space{ }\def\Hello#1#2{Hello-#1-#2-World}\Hello\Space B C",
562                r"Hello- -B-World C",
563            ),
564        ),
565        serde_tests((
566            serde_basic,
567            r"\def\helloWorld{Hello World} ",
568            r"\helloWorld"
569        ),),
570        end_of_input_error_tests(
571            (end_of_input_scanning_target, r"\def"),
572            (end_of_input_scanning_argument_text, r"\def\A"),
573            (end_of_input_scanning_replacement, r"\def\A{"),
574            (end_of_input_scanning_nested_replacement, r"\def\A{{}"),
575            // TODO (end_of_input_reading_parameter_number, r"\def\A#"),
576            (end_of_input_scanning_argument, r"\def\A#1{X-#1-Z}\A Y{}\A"),
577            (
578                end_of_input_reading_value_for_parameter,
579                r"\def\A#1{#1}\A{correct}\A{this {is parameter 1 but it never ends}",
580            ),
581            // TODO (end_of_input_reading_prefix, r"\def\A abc{def}\A abc\A ab"),
582            (
583                end_of_input_reading_delimiter,
584                r"\def\A #1abc{#1}\A xyzabc\A {first parameter}ab",
585            ),
586        ),
587        recoverable_failure_tests(
588            (bad_token_target, r"\def a other stuff{}Hello", "Hello"),
589            (unexpected_token_argument, r"\def\A{Hello}\def\A }\A", ""),
590            (wrong_parameter_number_1, r"\def\A #2X{-#1-}\A Y2X", "-Y-"),
591            (wrong_parameter_number_2, r"\def\A #GX{-#1-}\A YGX", "-Y-"),
592            (wrong_parameter_number_3, r"\def\A #\def{-#1-}\A Y\def", "-Y-"),
593            (unexpected_end_group, r"\def\A{M}\def\A X}\A XY", r"Y"),
594            (too_many_parameters, r"\def\A#1#2#3#4#5#6#7#8#9#0{#9#8#7#6#5#4#3#2#1}\A abcdefghi", "ihgfedcba"),
595            (
596                invalid_parameter_in_replacement_1,
597                // invalid #1 becomes ##1
598                r"\def\A{\def\B##1{-#1-}}\A{}\B C",
599                "-C-",
600            ),
601            (
602                invalid_parameter_in_replacement_2,
603                // invalid #A becomes ##A
604                // we don't invoke the macro because this would output #A which is invalid.
605                // We instead just verify that the macro definition ends where it should.
606                r"\def\A{\def\B##1{-#A-}}Hello",
607                "Hello",
608            ),
609            (
610                invalid_parameter_in_replacement_3,
611                // invalid #\cs becomes ##\cs
612                // we don't invoke the macro because this would output #A which is invalid.
613                // We instead just verify that the macro definition ends where it should.
614                r"\def\A{\def\B##1{-#\cs-}}Hello",
615                "Hello",
616            ),
617            (prefix_does_not_match, r"\def\A abc{d}\A abdef", "ef"),
618        ),
619    ];
620}