boxworks/ds.rs
1//! Core data structures for the typesetting engine.
2//!
3//! This module contains the fundamental data structures for the Boxworks typesetting engine.
4//! As in TeX, the Boxworks is based around various lists (horizontal, vertical, etc.)
5//! that contains elements (which themselves may be nested lists).
6//! The Rust representations of these lists and their elements are defined here.
7//!
8//! This module implements the entirety of TeX.2021 part 10, "data structures
9//! for boxes and their friends".
10
11use core;
12use core::GlueOrder;
13use core::Scaled as Number;
14use std::rc::Rc;
15
16/// Element of a horizontal list.
17#[derive(Debug)]
18pub enum Horizontal {
19 Char(Char),
20 HList(HList),
21 VList(VList),
22 Rule(Rule),
23 Mark(Mark),
24 Insertion(Insertion),
25 Adjust(Adjust),
26 Ligature(Ligature),
27 Discretionary(Discretionary),
28 Whatsit(Box<dyn Whatsit>),
29 Math(Math),
30 Glue(Glue),
31 Kern(Kern),
32 Penalty(Penalty),
33}
34
35macro_rules! horizontal_impl {
36 ( $( $variant: ident , )+ ) => {
37 impl PartialEq for Horizontal {
38 fn eq(&self, other: &Self) -> bool {
39 match (self, other) {
40 $(
41 (Self::$variant(l), Self::$variant(r)) => l == r,
42 )+
43 _ => false,
44 }
45 }
46 }
47 $(
48 impl From<$variant> for Horizontal {
49 fn from(value: $variant) -> Self {
50 Horizontal::$variant(value)
51 }
52 }
53 )+
54 };
55}
56
57horizontal_impl!(
58 Char,
59 HList,
60 VList,
61 Rule,
62 Mark,
63 Insertion,
64 Adjust,
65 Ligature,
66 Discretionary,
67 Math,
68 Glue,
69 Kern,
70 Penalty,
71);
72
73/// Element of a vertical list.
74#[derive(Debug)]
75pub enum Vertical {
76 HList(HList),
77 VList(VList),
78 Rule(Rule),
79 Mark(Mark),
80 Insertion(Insertion),
81 Whatsit(Box<dyn Whatsit>),
82 Math(Math),
83 Glue(Glue),
84 Kern(Kern),
85 Penalty(Penalty),
86}
87
88macro_rules! vertical_impl {
89 ( $( $variant: ident , )+ ) => {
90 impl PartialEq for Vertical {
91 fn eq(&self, other: &Self) -> bool {
92 match (self, other) {
93 $(
94 (Self::$variant(l), Self::$variant(r)) => l == r,
95 )+
96 _ => false,
97 }
98 }
99 }
100 $(
101 impl From<$variant> for Vertical {
102 fn from(value: $variant) -> Self {
103 Vertical::$variant(value)
104 }
105 }
106 )+
107 };
108}
109
110vertical_impl!(HList, VList, Rule, Mark, Insertion, Math, Glue, Kern, Penalty,);
111
112/// A character in a specific font.
113///
114/// This node can only appear in horizontal mode.
115///
116/// Described in TeX.2021.134.
117#[derive(Debug, PartialEq, Eq)]
118pub struct Char {
119 pub char: char,
120 pub font: u32,
121}
122
123/// A box made from a horizontal list.
124///
125/// Described in TeX.2021.135.
126#[derive(Debug, PartialEq)]
127pub struct HList {
128 pub height: Number,
129 pub width: Number,
130 pub depth: Number,
131 /// How much this box should be lowered (if it appears in a horizontal list),
132 /// or how much it should be moved to the right (if it appears in a vertical
133 /// list).
134 pub shift_amount: Number,
135 pub list: Vec<Horizontal>,
136 pub glue_ratio: GlueRatio,
137 pub glue_sign: GlueSign,
138 pub glue_order: GlueOrder,
139}
140
141/// Ratio by which glue should shrink or stretch.
142///
143/// This is one of the few (only?) places in TeX where a floating point
144/// number is used.
145/// In general TeX uses fixed point integers to ensure that the results are
146/// the same on every computer/CPU.
147/// But the exact semantics of the glue ratio don't affect the output, so
148/// using a float is okay.
149///
150/// Described in TeX.2021.109.
151#[derive(Default, Debug, PartialEq)]
152pub struct GlueRatio(pub f32);
153
154// TODO: ensure that glue ratio cannot be NaN
155impl Eq for GlueRatio {}
156
157/// Description of whether the glue should stretch, shrink, or remain rigid.
158#[derive(Debug, Default, PartialEq, Eq)]
159pub enum GlueSign {
160 Stretching,
161 Shrinking,
162 #[default]
163 Normal,
164}
165
166impl HList {
167 /// Returns a hlist node corresponding to the TeX snippet `\hbox{}`.
168 ///
169 /// Described in TeX.2021.136.
170 pub fn new_null_box() -> Self {
171 Self {
172 height: Number::ZERO,
173 width: Number::ZERO,
174 depth: Number::ZERO,
175 shift_amount: Number::ZERO,
176 list: vec![],
177 glue_ratio: GlueRatio(0.0),
178 glue_sign: GlueSign::Normal,
179 glue_order: GlueOrder::Normal,
180 }
181 }
182}
183
184impl Default for HList {
185 fn default() -> Self {
186 Self::new_null_box()
187 }
188}
189
190/// A box made from a vertical list.
191///
192/// This is the same as [HList], except the list inside holds [Vertical] nodes
193/// instead of [Horizontal] nodes.
194///
195/// Described in TeX.2021.137.
196#[derive(Debug, Default, PartialEq)]
197pub struct VList {
198 pub height: Number,
199 pub width: Number,
200 pub depth: Number,
201 pub shift_amount: Number,
202 pub list: Vec<Vertical>,
203 pub glue_ratio: GlueRatio,
204 pub glue_sign: GlueSign,
205 pub glue_order: GlueOrder,
206}
207
208/// A rule stands for a solid black rectangle.
209///
210/// It has width, depth and height fields.
211/// However if any of these dimensions is -2^30, the actual value will be
212/// determined by running rule up to the boundary of the innermost, enclosing box.
213/// This is called a "running dimension".
214/// The width is never running in an hlist; the height and depth are never running
215/// in a vlist.
216///
217/// Described in TeX.2021.138.
218#[derive(Debug, PartialEq, Eq)]
219pub struct Rule {
220 pub height: Number,
221 pub width: Number,
222 pub depth: Number,
223}
224
225impl Rule {
226 pub const RUNNING: Number = Number(-2 << 30);
227
228 /// Creates a new rule.
229 ///
230 /// All of the dimensions are running.
231 ///
232 /// Described in TeX.2021.139.
233 pub fn new() -> Self {
234 Self {
235 height: Self::RUNNING,
236 width: Self::RUNNING,
237 depth: Self::RUNNING,
238 }
239 }
240}
241
242impl Default for Rule {
243 fn default() -> Self {
244 Self::new()
245 }
246}
247
248/// Vertical material to be inserted.
249///
250/// This node is related to the TeX primitive `\insert`.
251///
252/// Described in TeX.2021.140.
253#[derive(Debug, PartialEq)]
254pub struct Insertion {
255 pub box_number: u8,
256 /// Slightly misnamed: it actually holds the natural height plus depth
257 /// of the vertical list being inserted.
258 pub height: Number,
259 /// Used in case this insertion is split.
260 pub split_max_depth: Number,
261 pub split_top_skip: core::Glue,
262 /// Penalty to be used if this insertion floats to a subsequent
263 /// page after a split insertion of the same class.
264 pub float_penalty: u32,
265 pub vlist: Vec<Vertical>,
266}
267
268/// Contents of a user's `\mark` text.
269///
270/// TODO: At time of writing I don't know what to do with this node.
271/// In Knuth's TeX it references a token list, but I don't want Boxworks
272/// to depend on Texlang. So for the moment just leaving a dummy list.
273///
274/// Described in TeX.2021.141.
275#[derive(Debug, PartialEq, Eq)]
276pub struct Mark {
277 pub list: Vec<()>,
278}
279
280/// Specifies material that will be moved out into the surrounding vertical list.
281///
282/// E.g., used to implement the TeX primitive `\vadjust`.
283///
284/// Described in TeX.2021.142.
285#[derive(Debug, PartialEq)]
286pub struct Adjust {
287 pub list: Vec<Vertical>,
288}
289
290/// A ligature.
291///
292/// Described in TeX.2021.143.
293#[derive(Debug, PartialEq, Eq)]
294pub struct Ligature {
295 pub included_left_boundary: bool,
296 pub included_right_boundary: bool,
297 pub char: char,
298 pub font: u32,
299 /// The original characters that were replaced by the ligature.
300 /// This is used if the engine needs to break apart the ligature
301 /// in order to perform hyphenation.
302 ///
303 /// While most ligatures come from 2 characters (e.g. ff), TeX's
304 /// lig/kern programming language allows for a single ligature to come
305 /// from arbitrarily many characters.
306 pub original_chars: Rc<str>,
307}
308
309// Two constructors for ligature nodes are provided in TeX.2021.144
310// but they don't seem that useful so I'm omitting them.
311
312/// A discretionary break.
313///
314/// The pre-break and post-break lists must only contain nodes
315/// of type char, kern, box, rule or ligature.
316/// We could have a specific node type for this, but for the moment
317/// we just piggy back on the hlist type.
318///
319/// Described in TeX.2021.145.
320#[derive(Debug, PartialEq)]
321pub struct Discretionary {
322 /// Material to insert before this node, if the break occurs here.
323 pub pre_break: Vec<Horizontal>,
324 /// Material to insert after this node, if the break occurs here.
325 pub post_break: Vec<Horizontal>,
326 /// Number of subsequent nodes to skip if the break occurs here.
327 pub replace_count: u32,
328}
329
330impl Discretionary {
331 pub fn new() -> Self {
332 Self {
333 pre_break: vec![],
334 post_break: vec![],
335 replace_count: 0,
336 }
337 }
338}
339
340impl Default for Discretionary {
341 fn default() -> Self {
342 Self::new()
343 }
344}
345
346/// A whatsit node
347///
348/// This is used to facilitate extensions to TeX.
349/// It's unclear right now how what the API of it will be, though
350/// it can be figured out by reading the Chapter 53 Extensions of
351/// TeX.
352///
353/// Knuth uses this node type to implement both `\write` and `\special`
354/// so we'll eventually find out.
355///
356/// Described in TeX.2021.146.
357pub trait Whatsit: std::fmt::Debug {}
358
359/// A marker placed before or after math mode.
360///
361/// Described in TeX.2021.147.
362#[derive(Debug, PartialEq, Eq)]
363pub enum Math {
364 Before,
365 After,
366}
367
368impl Horizontal {
369 /// Whether a glue node that comes after this node may be broken.
370 ///
371 /// For char nodes, this function is essentially undefined in Knuth's
372 /// TeX. More specifically, the value depends on the exact character code.
373 /// In TeX this function is never called for char nodes which is why this
374 /// is not a problem. Here, we return `true` for char nodes based on
375 /// my analysis of all places in Knuth's TeX where it is invoked:
376 ///
377 /// - TeX.2021.868: `precedes_break` is called on variable `cur_p` which
378 /// is a pointer to a horizontal list. Before this call, the calling code
379 /// first checks if the node is a character and if so follows the same
380 /// code path. Thus returning `true` here is the right thing to do.
381 ///
382 /// - TeX.2021.973: the function is called on a variable `prev_p` which
383 /// is a pointer to a vertical list and so the char case never arises.
384 ///
385 /// - TeX.2021.1000: same as the last case.
386 ///
387 /// This function is defined in TeX.2021.148.
388 pub fn precedes_break(&self) -> bool {
389 use Horizontal::*;
390 matches!(
391 self,
392 Char(_)
393 | HList(_)
394 | VList(_)
395 | Rule(_)
396 | Mark(_)
397 | Insertion(_)
398 | Adjust(_)
399 | Ligature(_)
400 | Discretionary(_)
401 | Whatsit(_)
402 )
403 }
404
405 /// Whether this node is discarded after a break.
406 ///
407 /// As with [Self::precedes_break], this function is essentially undefined
408 /// for char nodes in Knuth's TeX. However there is only one call site
409 /// (TeX.2021.879) and in that call site char nodes behave as if this
410 /// function returns true.
411 ///
412 /// This function is defined in TeX.2021.148.
413 pub fn non_discardable(&self) -> bool {
414 self.precedes_break()
415 }
416}
417
418impl Vertical {
419 /// Whether a glue node that comes after this node may be broken.
420 ///
421 /// This function is defined in TeX.2021.148.
422 pub fn precedes_break(&self) -> bool {
423 use Vertical::*;
424 matches!(
425 self,
426 HList(_) | VList(_) | Rule(_) | Mark(_) | Insertion(_) | Whatsit(_)
427 )
428 }
429}
430
431/// A piece of glue.
432///
433/// Described in TeX.2021.149.
434#[derive(Debug, PartialEq, Eq)]
435pub struct Glue {
436 pub value: core::Glue,
437 pub kind: GlueKind,
438}
439
440impl From<core::Glue> for Glue {
441 fn from(value: core::Glue) -> Self {
442 Self {
443 value,
444 kind: Default::default(),
445 }
446 }
447}
448
449/// The kind of a glue node.
450///
451/// Described in TeX.2021.149.
452#[derive(Debug, Default, PartialEq, Eq)]
453pub enum GlueKind {
454 #[default]
455 Normal,
456 ConditionalMath,
457 Math,
458 AlignedLeader,
459 CenteredLeader,
460 ExpandedLeader,
461}
462
463// TeX.2021.150 and TeX.2021.151 define the [font::Glue] type itself,
464// which is not in this crate.
465
466// Three constructors for glue nodes are provided in TeX.2021.152,
467// TeX.2021.153 and TeX.2021.154 but they don't seem that
468// useful so I'm omitting them.
469
470/// A kern.
471///
472/// Described in TeX.2021.155.
473#[derive(Debug, PartialEq, Eq)]
474pub struct Kern {
475 pub width: Number,
476 pub kind: KernKind,
477}
478
479/// The kind of a kern node.
480///
481/// Described in TeX.2021.155.
482#[derive(Debug, PartialEq, Eq)]
483pub enum KernKind {
484 /// Inserted from font information or math mode calculations.
485 Normal,
486 /// Inserted using e.g. TeX's `\kern` primitive.
487 Explicit,
488 /// Inserted from non-math accents.
489 Accent,
490 /// Inserted from e.g. `\mkern` specifications in math formulas.
491 Math,
492}
493
494// A constructor for kern nodes is provided in TeX.2021.156,
495// but it doesn't seem useful.
496
497/// A penalty.
498///
499/// Described in TeX.2021.157.
500#[derive(Debug, PartialEq, Eq)]
501pub struct Penalty {
502 pub value: i32,
503}
504
505impl Penalty {
506 /// Any penalty bigger than this is considered infinite and no
507 /// break will be allowed for such high values.
508 pub const INFINITE: i32 = 10000;
509
510 /// Any penalty smaller than this will result in a forced break.
511 pub const EJECT: i32 = -10000;
512}
513
514// A constructor for penalty nodes is provided in TeX.2021.157,
515// but it doesn't seem useful.
516
517// TODO: Unset node(s) in TeX.2021.159