boxworks/
ds.rs

1//! Core data structures for the typesetting engine.
2//!
3//! This module contains the fundamental data structures for the Boxworks typesetting engine.
4//! As in TeX, the Boxworks is based around various lists (horizontal, vertical, etc.)
5//!     that contains elements (which themselves may be nested lists).
6//! The Rust representations of these lists and their elements are defined here.
7//!
8//! This module implements the entirety of TeX.2021 part 10, "data structures
9//! for boxes and their friends".
10
11use core;
12use core::GlueOrder;
13use core::Scaled as Number;
14use std::rc::Rc;
15
16/// Element of a horizontal list.
17#[derive(Debug)]
18pub enum Horizontal {
19    Char(Char),
20    HList(HList),
21    VList(VList),
22    Rule(Rule),
23    Mark(Mark),
24    Insertion(Insertion),
25    Adjust(Adjust),
26    Ligature(Ligature),
27    Discretionary(Discretionary),
28    Whatsit(Box<dyn Whatsit>),
29    Math(Math),
30    Glue(Glue),
31    Kern(Kern),
32    Penalty(Penalty),
33}
34
35macro_rules! horizontal_impl {
36    ( $( $variant: ident , )+ ) => {
37        impl PartialEq for Horizontal {
38            fn eq(&self, other: &Self) -> bool {
39                match (self, other) {
40                    $(
41                    (Self::$variant(l), Self::$variant(r)) => l == r,
42                    )+
43                    _ => false,
44                }
45            }
46        }
47        $(
48        impl From<$variant> for Horizontal {
49            fn from(value: $variant) -> Self {
50                Horizontal::$variant(value)
51            }
52        }
53        )+
54    };
55}
56
57horizontal_impl!(
58    Char,
59    HList,
60    VList,
61    Rule,
62    Mark,
63    Insertion,
64    Adjust,
65    Ligature,
66    Discretionary,
67    Math,
68    Glue,
69    Kern,
70    Penalty,
71);
72
73/// Element of a vertical list.
74#[derive(Debug)]
75pub enum Vertical {
76    HList(HList),
77    VList(VList),
78    Rule(Rule),
79    Mark(Mark),
80    Insertion(Insertion),
81    Whatsit(Box<dyn Whatsit>),
82    Math(Math),
83    Glue(Glue),
84    Kern(Kern),
85    Penalty(Penalty),
86}
87
88macro_rules! vertical_impl {
89    ( $( $variant: ident , )+ ) => {
90        impl PartialEq for Vertical {
91            fn eq(&self, other: &Self) -> bool {
92                match (self, other) {
93                    $(
94                    (Self::$variant(l), Self::$variant(r)) => l == r,
95                    )+
96                    _ => false,
97                }
98            }
99        }
100        $(
101        impl From<$variant> for Vertical {
102            fn from(value: $variant) -> Self {
103                Vertical::$variant(value)
104            }
105        }
106        )+
107    };
108}
109
110vertical_impl!(HList, VList, Rule, Mark, Insertion, Math, Glue, Kern, Penalty,);
111
112/// A character in a specific font.
113///
114/// This node can only appear in horizontal mode.
115///
116/// Described in TeX.2021.134.
117#[derive(Debug, PartialEq, Eq)]
118pub struct Char {
119    pub char: char,
120    pub font: u32,
121}
122
123/// A box made from a horizontal list.
124///
125/// Described in TeX.2021.135.
126#[derive(Debug, PartialEq)]
127pub struct HList {
128    pub height: Number,
129    pub width: Number,
130    pub depth: Number,
131    /// How much this box should be lowered (if it appears in a horizontal list),
132    /// or how much it should be moved to the right (if it appears in a vertical
133    /// list).
134    pub shift_amount: Number,
135    pub list: Vec<Horizontal>,
136    pub glue_ratio: GlueRatio,
137    pub glue_sign: GlueSign,
138    pub glue_order: GlueOrder,
139}
140
141/// Ratio by which glue should shrink or stretch.
142///
143/// This is one of the few (only?) places in TeX where a floating point
144/// number is used.
145/// In general TeX uses fixed point integers to ensure that the results are
146/// the same on every computer/CPU.
147/// But the exact semantics of the glue ratio don't affect the output, so
148/// using a float is okay.
149///
150/// Described in TeX.2021.109.
151#[derive(Default, Debug, PartialEq)]
152pub struct GlueRatio(pub f32);
153
154// TODO: ensure that glue ratio cannot be NaN
155impl Eq for GlueRatio {}
156
157/// Description of whether the glue should stretch, shrink, or remain rigid.
158#[derive(Debug, Default, PartialEq, Eq)]
159pub enum GlueSign {
160    Stretching,
161    Shrinking,
162    #[default]
163    Normal,
164}
165
166impl HList {
167    /// Returns a hlist node corresponding to the TeX snippet `\hbox{}`.
168    ///
169    /// Described in TeX.2021.136.
170    pub fn new_null_box() -> Self {
171        Self {
172            height: Number::ZERO,
173            width: Number::ZERO,
174            depth: Number::ZERO,
175            shift_amount: Number::ZERO,
176            list: vec![],
177            glue_ratio: GlueRatio(0.0),
178            glue_sign: GlueSign::Normal,
179            glue_order: GlueOrder::Normal,
180        }
181    }
182}
183
184impl Default for HList {
185    fn default() -> Self {
186        Self::new_null_box()
187    }
188}
189
190/// A box made from a vertical list.
191///
192/// This is the same as [HList], except the list inside holds [Vertical] nodes
193/// instead of [Horizontal] nodes.
194///
195/// Described in TeX.2021.137.
196#[derive(Debug, Default, PartialEq)]
197pub struct VList {
198    pub height: Number,
199    pub width: Number,
200    pub depth: Number,
201    pub shift_amount: Number,
202    pub list: Vec<Vertical>,
203    pub glue_ratio: GlueRatio,
204    pub glue_sign: GlueSign,
205    pub glue_order: GlueOrder,
206}
207
208/// A rule stands for a solid black rectangle.
209///
210/// It has width, depth and height fields.
211/// However if any of these dimensions is -2^30, the actual value will be
212/// determined by running rule up to the boundary of the innermost, enclosing box.
213/// This is called a "running dimension".
214/// The width is never running in an hlist; the height and depth are never running
215/// in a vlist.
216///
217/// Described in TeX.2021.138.
218#[derive(Debug, PartialEq, Eq)]
219pub struct Rule {
220    pub height: Number,
221    pub width: Number,
222    pub depth: Number,
223}
224
225impl Rule {
226    pub const RUNNING: Number = Number(-2 << 30);
227
228    /// Creates a new rule.
229    ///
230    /// All of the dimensions are running.
231    ///
232    /// Described in TeX.2021.139.
233    pub fn new() -> Self {
234        Self {
235            height: Self::RUNNING,
236            width: Self::RUNNING,
237            depth: Self::RUNNING,
238        }
239    }
240}
241
242impl Default for Rule {
243    fn default() -> Self {
244        Self::new()
245    }
246}
247
248/// Vertical material to be inserted.
249///
250/// This node is related to the TeX primitive `\insert`.
251///
252/// Described in TeX.2021.140.
253#[derive(Debug, PartialEq)]
254pub struct Insertion {
255    pub box_number: u8,
256    /// Slightly misnamed: it actually holds the natural height plus depth
257    /// of the vertical list being inserted.
258    pub height: Number,
259    /// Used in case this insertion is split.
260    pub split_max_depth: Number,
261    pub split_top_skip: core::Glue,
262    /// Penalty to be used if this insertion floats to a subsequent
263    /// page after a split insertion of the same class.
264    pub float_penalty: u32,
265    pub vlist: Vec<Vertical>,
266}
267
268/// Contents of a user's `\mark` text.
269///
270/// TODO: At time of writing I don't know what to do with this node.
271/// In Knuth's TeX it references a token list, but I don't want Boxworks
272/// to depend on Texlang. So for the moment just leaving a dummy list.
273///
274/// Described in TeX.2021.141.
275#[derive(Debug, PartialEq, Eq)]
276pub struct Mark {
277    pub list: Vec<()>,
278}
279
280/// Specifies material that will be moved out into the surrounding vertical list.
281///
282/// E.g., used to implement the TeX primitive `\vadjust`.
283///
284/// Described in TeX.2021.142.
285#[derive(Debug, PartialEq)]
286pub struct Adjust {
287    pub list: Vec<Vertical>,
288}
289
290/// A ligature.
291///
292/// Described in TeX.2021.143.
293#[derive(Debug, PartialEq, Eq)]
294pub struct Ligature {
295    pub included_left_boundary: bool,
296    pub included_right_boundary: bool,
297    pub char: char,
298    pub font: u32,
299    /// The original characters that were replaced by the ligature.
300    /// This is used if the engine needs to break apart the ligature
301    /// in order to perform hyphenation.
302    ///
303    /// While most ligatures come from 2 characters (e.g. ff), TeX's
304    /// lig/kern programming language allows for a single ligature to come
305    /// from arbitrarily many characters.
306    pub original_chars: Rc<str>,
307}
308
309// Two constructors for ligature nodes are provided in TeX.2021.144
310// but they don't seem that useful so I'm omitting them.
311
312/// A discretionary break.
313///
314/// The pre-break and post-break lists must only contain nodes
315/// of type char, kern, box, rule or ligature.
316/// We could have a specific node type for this, but for the moment
317/// we just piggy back on the hlist type.
318///
319/// Described in TeX.2021.145.
320#[derive(Debug, PartialEq)]
321pub struct Discretionary {
322    /// Material to insert before this node, if the break occurs here.
323    pub pre_break: Vec<Horizontal>,
324    /// Material to insert after this node, if the break occurs here.
325    pub post_break: Vec<Horizontal>,
326    /// Number of subsequent nodes to skip if the break occurs here.
327    pub replace_count: u32,
328}
329
330impl Discretionary {
331    pub fn new() -> Self {
332        Self {
333            pre_break: vec![],
334            post_break: vec![],
335            replace_count: 0,
336        }
337    }
338}
339
340impl Default for Discretionary {
341    fn default() -> Self {
342        Self::new()
343    }
344}
345
346/// A whatsit node
347///
348/// This is used to facilitate extensions to TeX.
349/// It's unclear right now how what the API of it will be, though
350/// it can be figured out by reading the Chapter 53 Extensions of
351/// TeX.
352///
353/// Knuth uses this node type to implement both `\write` and `\special`
354/// so we'll eventually find out.
355///
356/// Described in TeX.2021.146.
357pub trait Whatsit: std::fmt::Debug {}
358
359/// A marker placed before or after math mode.
360///
361/// Described in TeX.2021.147.
362#[derive(Debug, PartialEq, Eq)]
363pub enum Math {
364    Before,
365    After,
366}
367
368impl Horizontal {
369    /// Whether a glue node that comes after this node may be broken.
370    ///
371    /// For char nodes, this function is essentially undefined in Knuth's
372    /// TeX. More specifically, the value depends on the exact character code.
373    /// In TeX this function is never called for char nodes which is why this
374    /// is not a problem. Here, we return `true` for char nodes based on
375    /// my analysis of all places in Knuth's TeX where it is invoked:
376    ///
377    /// - TeX.2021.868: `precedes_break` is called on variable `cur_p` which
378    ///   is a pointer to a horizontal list. Before this call, the calling code
379    ///   first checks if the node is a character and if so follows the same
380    ///   code path. Thus returning `true` here is the right thing to do.
381    ///
382    /// - TeX.2021.973: the function is called on a variable `prev_p` which
383    ///   is a pointer to a vertical list and so the char case never arises.
384    ///
385    /// - TeX.2021.1000: same as the last case.
386    ///
387    /// This function is defined in TeX.2021.148.
388    pub fn precedes_break(&self) -> bool {
389        use Horizontal::*;
390        matches!(
391            self,
392            Char(_)
393                | HList(_)
394                | VList(_)
395                | Rule(_)
396                | Mark(_)
397                | Insertion(_)
398                | Adjust(_)
399                | Ligature(_)
400                | Discretionary(_)
401                | Whatsit(_)
402        )
403    }
404
405    /// Whether this node is discarded after a break.
406    ///
407    /// As with [Self::precedes_break], this function is essentially undefined
408    /// for char nodes in Knuth's TeX. However there is only one call site
409    /// (TeX.2021.879) and in that call site char nodes behave as if this
410    /// function returns true.
411    ///
412    /// This function is defined in TeX.2021.148.
413    pub fn non_discardable(&self) -> bool {
414        self.precedes_break()
415    }
416}
417
418impl Vertical {
419    /// Whether a glue node that comes after this node may be broken.
420    ///
421    /// This function is defined in TeX.2021.148.
422    pub fn precedes_break(&self) -> bool {
423        use Vertical::*;
424        matches!(
425            self,
426            HList(_) | VList(_) | Rule(_) | Mark(_) | Insertion(_) | Whatsit(_)
427        )
428    }
429}
430
431/// A piece of glue.
432///
433/// Described in TeX.2021.149.
434#[derive(Debug, PartialEq, Eq)]
435pub struct Glue {
436    pub value: core::Glue,
437    pub kind: GlueKind,
438}
439
440impl From<core::Glue> for Glue {
441    fn from(value: core::Glue) -> Self {
442        Self {
443            value,
444            kind: Default::default(),
445        }
446    }
447}
448
449/// The kind of a glue node.
450///
451/// Described in TeX.2021.149.
452#[derive(Debug, Default, PartialEq, Eq)]
453pub enum GlueKind {
454    #[default]
455    Normal,
456    ConditionalMath,
457    Math,
458    AlignedLeader,
459    CenteredLeader,
460    ExpandedLeader,
461}
462
463// TeX.2021.150 and TeX.2021.151 define the [font::Glue] type itself,
464// which is not in this crate.
465
466// Three constructors for glue nodes are provided in TeX.2021.152,
467// TeX.2021.153 and TeX.2021.154 but they don't seem that
468// useful so I'm omitting them.
469
470/// A kern.
471///
472/// Described in TeX.2021.155.
473#[derive(Debug, PartialEq, Eq)]
474pub struct Kern {
475    pub width: Number,
476    pub kind: KernKind,
477}
478
479/// The kind of a kern node.
480///
481/// Described in TeX.2021.155.
482#[derive(Debug, PartialEq, Eq)]
483pub enum KernKind {
484    /// Inserted from font information or math mode calculations.
485    Normal,
486    /// Inserted using e.g. TeX's `\kern` primitive.
487    Explicit,
488    /// Inserted from non-math accents.
489    Accent,
490    /// Inserted from e.g. `\mkern` specifications in math formulas.
491    Math,
492}
493
494// A constructor for kern nodes is provided in TeX.2021.156,
495// but it doesn't seem useful.
496
497/// A penalty.
498///
499/// Described in TeX.2021.157.
500#[derive(Debug, PartialEq, Eq)]
501pub struct Penalty {
502    pub value: i32,
503}
504
505impl Penalty {
506    /// Any penalty bigger than this is considered infinite and no
507    /// break will be allowed for such high values.
508    pub const INFINITE: i32 = 10000;
509
510    /// Any penalty smaller than this will result in a forced break.
511    pub const EJECT: i32 = -10000;
512}
513
514// A constructor for penalty nodes is provided in TeX.2021.157,
515// but it doesn't seem useful.
516
517// TODO: Unset node(s) in TeX.2021.159