texlang/command/
map.rs

1//! Map type
2use super::*;
3use std::borrow::Cow;
4use std::cell::{Ref, RefCell};
5use std::collections::HashMap;
6use std::fmt;
7use std::rc::Rc;
8use texcraft_stdext::collections::groupingmap;
9use texcraft_stdext::collections::groupingmap::GroupingHashMap;
10use texcraft_stdext::collections::groupingmap::GroupingVec;
11
12/// Map is a map type where the keys are control sequence names and the values are TeX commands.
13///
14/// There are a number of design goals for the map:
15///
16/// - Make retrieving the function for a command very fast. This is achieved primarily by storing
17///   command functions by themselves in an array. The index in the array is the control sequence
18///   name, which is an integer when interned. This implementation choice means fast-as-possible lookups.
19///   Storing the function by itself means there is good cache locality.
20///
21///
22/// - Insert built in commands at the start
23///   insert_built_in(cs_name, cmd),
24/// - Insert variable commands that aren't there at the start, but will be in the future. E.g., \countdef
25///   register_built_in(cmd) <- must have a unique ID. Is type ID stable across binary builds? Maybe need to
26///   use a string ID instead? But then need to feed that the library using this registered
27/// - Should we enforce that the previous two steps can only be done at creation time? Probably
28///   Maybe initialize the map using Map<cs_name, cmd> and `Vec<cmd>`. Can provide cs_name<->str wrapper at
29///   the VM level
30///
31/// - While running, insert macros
32///   insert_macro(&str, macro)
33/// - While running, alias commands by current name or using the special ID inserts
34///   alias_control_sequence(cs_name, cs_name) -> undefined control sequence error
35///   alias_registered_built_in(cs_name, cmd_id, variable_addr)
36///   alias_character(cs_name, token::Token)
37pub struct Map<S> {
38    commands: GroupingVec<Command<S>>,
39    active_char: GroupingHashMap<char, Command<S>>,
40
41    built_in_commands: HashMap<token::CsName, BuiltIn<S>>,
42    primitive_key_to_built_in_lazy: RefCell<Option<HashMap<PrimitiveKey, token::CsName>>>,
43    getters_key_to_built_in_lazy: RefCell<Option<HashMap<variable::GettersKey, token::CsName>>>,
44}
45
46impl<S> Map<S> {
47    pub(crate) fn new(built_in_commands: HashMap<token::CsName, BuiltIn<S>>) -> Map<S> {
48        Self {
49            commands: built_in_commands
50                .iter()
51                .map(|(k, v)| (k.to_usize(), v.cmd.clone()))
52                .collect(),
53            active_char: Default::default(),
54            built_in_commands,
55            primitive_key_to_built_in_lazy: Default::default(),
56            getters_key_to_built_in_lazy: Default::default(),
57        }
58    }
59
60    #[inline]
61    pub fn get_command(&self, command_ref: &token::CommandRef) -> Option<&Command<S>> {
62        match command_ref {
63            token::CommandRef::ControlSequence(name) => self.commands.get(&name.to_usize()),
64            token::CommandRef::ActiveCharacter(c) => self.active_char.get(c),
65        }
66    }
67
68    pub fn get_tag(&self, command_ref: &token::CommandRef) -> Option<Tag> {
69        let command = match command_ref {
70            token::CommandRef::ControlSequence(name) => self.commands.get(&name.to_usize()),
71            token::CommandRef::ActiveCharacter(c) => self.active_char.get(c),
72        };
73        command.map(Command::tag).unwrap_or(None)
74    }
75
76    pub fn built_in_commands(&self) -> &HashMap<token::CsName, BuiltIn<S>> {
77        &self.built_in_commands
78    }
79
80    pub fn get_command_slow(&self, command_ref: &token::CommandRef) -> Option<BuiltIn<S>> {
81        let command = self.get_command(command_ref)?;
82        if let Some(ref key) = PrimitiveKey::new(command) {
83            if let Some(built_in) = self.primitive_key_to_built_in().get(key) {
84                return self.built_in_commands.get(built_in).cloned();
85            }
86        }
87        Some(BuiltIn {
88            cmd: command.clone(),
89            doc: None,
90        })
91    }
92
93    #[inline]
94    pub fn insert_variable_command(
95        &mut self,
96        command_ref: token::CommandRef,
97        variable_command: variable::Command<S>,
98        scope: groupingmap::Scope,
99    ) {
100        self.insert(
101            command_ref,
102            Command::Variable(rc::Rc::new(variable_command)),
103            scope,
104        );
105    }
106
107    // TODO: reconsider this API of 4 setters ... it seems to be unnecessary complexity?
108    pub fn insert_macro(
109        &mut self,
110        name: token::CommandRef,
111        texmacro: texmacro::Macro,
112        scope: groupingmap::Scope,
113    ) {
114        self.insert(name, Command::Macro(rc::Rc::new(texmacro)), scope);
115    }
116
117    pub fn alias_control_sequence(
118        &mut self,
119        alias: token::CommandRef,
120        command: &token::CommandRef,
121        scope: groupingmap::Scope,
122    ) {
123        if let Some(command) = self.get_command(command) {
124            self.insert(alias, command.clone(), scope);
125        };
126    }
127
128    pub fn alias_token(
129        &mut self,
130        alias: token::CommandRef,
131        token: token::Token,
132        scope: groupingmap::Scope,
133    ) {
134        self.insert(alias, Command::CharacterTokenAlias(token.value()), scope);
135    }
136
137    pub fn insert(
138        &mut self,
139        command_ref: token::CommandRef,
140        func: Command<S>,
141        scope: groupingmap::Scope,
142    ) {
143        match command_ref {
144            token::CommandRef::ControlSequence(name) => {
145                let key = name.to_usize();
146                self.commands.insert(key, func, scope);
147            }
148            token::CommandRef::ActiveCharacter(c) => {
149                self.active_char.insert(c, func, scope);
150            }
151        }
152    }
153
154    // TODO: support active characters
155    pub fn to_hash_map_slow(&self) -> HashMap<token::CsName, BuiltIn<S>> {
156        let mut result = HashMap::new();
157        for (key, _value) in self.commands.backing_container().iter().enumerate() {
158            let cs_name = match token::CsName::try_from_usize(key) {
159                None => continue,
160                Some(cs_name) => cs_name,
161            };
162            let command_ref = token::CommandRef::ControlSequence(cs_name);
163            let cmd = match self.get_command_slow(&command_ref) {
164                None => continue,
165                Some(cmd) => cmd,
166            };
167            result.insert(cs_name, cmd);
168        }
169        result
170    }
171
172    pub(crate) fn begin_group(&mut self) {
173        self.commands.begin_group();
174    }
175
176    pub(crate) fn end_group(&mut self) -> std::result::Result<(), groupingmap::NoGroupToEndError> {
177        self.commands.end_group()?;
178        Ok(())
179    }
180
181    pub fn is_empty(&self) -> bool {
182        self.len() == 0
183    }
184
185    pub fn len(&self) -> usize {
186        self.commands.len()
187    }
188
189    fn primitive_key_to_built_in(&self) -> Ref<'_, HashMap<PrimitiveKey, token::CsName>> {
190        if let Ok(r) = Ref::filter_map(self.primitive_key_to_built_in_lazy.borrow(), Option::as_ref)
191        {
192            return r;
193        }
194        *self.primitive_key_to_built_in_lazy.borrow_mut() = Some(
195            self.built_in_commands
196                .iter()
197                .filter_map(|(cs_name, built_in)| {
198                    PrimitiveKey::new(built_in.cmd()).map(|key| (key, *cs_name))
199                })
200                .collect(),
201        );
202        self.primitive_key_to_built_in()
203    }
204
205    pub(crate) fn getters_key_to_built_in(
206        &self,
207    ) -> Ref<'_, HashMap<variable::GettersKey, token::CsName>> {
208        if let Ok(r) = Ref::filter_map(self.getters_key_to_built_in_lazy.borrow(), Option::as_ref) {
209            return r;
210        }
211        *self.getters_key_to_built_in_lazy.borrow_mut() = Some(
212            self.primitive_key_to_built_in()
213                .iter()
214                .filter_map(|(key, cs_name)| match key {
215                    PrimitiveKey::Variable(variable_command_key) => {
216                        Some((variable_command_key.getter_key(), *cs_name))
217                    }
218                    _ => None,
219                })
220                .collect(),
221        );
222        self.getters_key_to_built_in()
223    }
224}
225
226#[derive(Debug)]
227#[allow(dead_code)]
228pub struct InvalidAlias;
229
230impl fmt::Display for InvalidAlias {
231    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
232        write!(
233            f,
234            "invalid alias: the control sequence to alias is undefined"
235        )
236    }
237}
238
239impl std::error::Error for InvalidAlias {}
240
241#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
242enum SerializableCommand {
243    BuiltIn(token::CsName),
244    VariableArrayStatic(token::CsName, usize),
245    Macro(usize),
246    CharacterTokenAlias(token::Value),
247    Character(char),
248    MathCharacter(types::MathCode),
249    Font(types::Font),
250}
251
252#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
253pub(crate) struct SerializableMap<'a> {
254    // TODO: instead of serializing the map we could just serialize the iterator.
255    // This would be more efficient on the deserialization path.
256    // TODO: maybe the map should also serde itself using the iterator.
257    commands: GroupingHashMap<token::CsName, SerializableCommand>,
258    macros: Vec<Cow<'a, texmacro::Macro>>,
259}
260
261#[cfg(feature = "serde")]
262impl<State> serde::Serialize for Map<State> {
263    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
264    where
265        S: serde::Serializer,
266    {
267        let serializable_map = SerializableMap::new(self);
268        serializable_map.serialize(serializer)
269    }
270}
271
272impl<'a> SerializableMap<'a> {
273    fn new<S>(map: &'a Map<S>) -> Self {
274        let mut macros_de_dup = HashMap::<usize, usize>::new();
275        let mut macros: Vec<Cow<'a, texmacro::Macro>> = Default::default();
276        let primitive_key_to_built_in = map.primitive_key_to_built_in();
277        let getters_key_to_built_in = map.getters_key_to_built_in();
278        let commands: GroupingHashMap<token::CsName, SerializableCommand> = map
279            .commands
280            .iter_all()
281            .map(groupingmap::Item::adapt_map(
282                |(u, command): (usize, &Command<S>)| {
283                    let command: SerializableCommand = match command {
284                        Command::Expansion(_, _) | Command::Execution(_, _) => {
285                            let key = PrimitiveKey::new(command).unwrap();
286                            match primitive_key_to_built_in.get(&key) {
287                                None => todo!("return an error"),
288                                Some(built_in) => SerializableCommand::BuiltIn(*built_in),
289                            }
290                        }
291                        Command::Variable(variable_command) => {
292                            let key = PrimitiveKey::new(command).unwrap();
293                            if let Some(built_in) = primitive_key_to_built_in.get(&key) {
294                                SerializableCommand::BuiltIn(*built_in)
295                            } else {
296                                // As a fallback, we can serialize static references into arrays when
297                                // we've been provided with a way to reference the array.
298                                match variable_command.key() {
299                                    variable::CommandKey::ArrayStatic(getters_key, index) => {
300                                        let built_in =
301                                            getters_key_to_built_in.get(&getters_key).unwrap();
302                                        SerializableCommand::VariableArrayStatic(*built_in, index.0)
303                                    }
304                                    _ => todo!(),
305                                }
306                            }
307                        }
308                        Command::Macro(tex_macro) => {
309                            let rc_addr = Rc::as_ptr(tex_macro) as usize;
310                            let u = *macros_de_dup.entry(rc_addr).or_insert_with(|| {
311                                let u = macros.len();
312                                macros.push(Cow::Borrowed(tex_macro));
313                                u
314                            });
315                            SerializableCommand::Macro(u)
316                        }
317                        Command::CharacterTokenAlias(v) => {
318                            SerializableCommand::CharacterTokenAlias(*v)
319                        }
320                        Command::Character(c) => SerializableCommand::Character(*c),
321                        Command::MathCharacter(c) => SerializableCommand::MathCharacter(*c),
322                        Command::Font(font) => SerializableCommand::Font(*font),
323                    };
324
325                    let cs_name = token::CsName::try_from_usize(u).unwrap();
326                    (cs_name, command)
327                },
328            ))
329            .collect();
330        Self { commands, macros }
331    }
332
333    pub(crate) fn finish_deserialization<S>(
334        self,
335        built_in_commands: HashMap<token::CsName, BuiltIn<S>>,
336        interner: &token::CsNameInterner,
337    ) -> Map<S> {
338        let macros: Vec<Rc<texmacro::Macro>> = self
339            .macros
340            .into_iter()
341            .map(std::borrow::Cow::into_owned)
342            .map(Rc::new)
343            .collect();
344        let commands: GroupingVec<Command<S>> = self
345            .commands
346            .iter_all()
347            .map(groupingmap::Item::adapt_map(
348                |(cs_name, serialized_command): (token::CsName, &SerializableCommand)| {
349                    let command = match serialized_command {
350                        SerializableCommand::BuiltIn(cs_name) => {
351                            match built_in_commands.get(cs_name) {
352                                None => {
353                                    panic!(
354                                        "unknown control sequence {:?}",
355                                        interner.resolve(*cs_name)
356                                    )
357                                }
358                                Some(cmd) => cmd.cmd.clone(),
359                            }
360                        }
361                        SerializableCommand::VariableArrayStatic(cs_name, index) => {
362                            match &built_in_commands.get(cs_name).unwrap().cmd {
363                                Command::Variable(variable_command) => {
364                                    Command::Variable(std::rc::Rc::new(
365                                        variable_command.new_array_element(variable::Index(*index)),
366                                    ))
367                                }
368                                _ => todo!(),
369                            }
370                        }
371                        SerializableCommand::Macro(u) => {
372                            // TODO: error handling if the macro is missing
373                            Command::Macro(macros.get(*u).unwrap().clone())
374                        }
375                        SerializableCommand::CharacterTokenAlias(v) => {
376                            Command::CharacterTokenAlias(*v)
377                        }
378                        SerializableCommand::Character(c) => Command::Character(*c),
379                        SerializableCommand::MathCharacter(c) => Command::MathCharacter(*c),
380                        SerializableCommand::Font(font) => Command::Font(*font),
381                    };
382                    (cs_name.to_usize(), command)
383                },
384            ))
385            .collect();
386        Map {
387            commands,
388            active_char: Default::default(), // TODO
389            built_in_commands,
390            primitive_key_to_built_in_lazy: Default::default(),
391            getters_key_to_built_in_lazy: Default::default(),
392        }
393    }
394}