1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510
//! Core data structures for the typesetting engine.
//!
//! This module contains the fundamental data structures for the Boxworks typesetting engine.
//! As in TeX, the Boxworks is based around various lists (horizontal, vertical, etc.)
//! that contains elements (which themselves may be nested lists).
//! The Rust representations of these lists and their elements are defined here.
//!
//! This module implements the entirety of TeX.2021 part 10, "data structures
//! for boxes and their friends".
use core;
use core::GlueOrder;
use core::Scaled as Number;
use std::rc::Rc;
/// Element of a horizontal list.
#[derive(Debug)]
pub enum Horizontal {
Char(Char),
HList(HList),
VList(VList),
Rule(Rule),
Mark(Mark),
Insertion(Insertion),
Adjust(Adjust),
Ligature(Ligature),
Discretionary(Discretionary),
Whatsit(Box<dyn Whatsit>),
Math(Math),
Glue(Glue),
Kern(Kern),
Penalty(Penalty),
}
impl Eq for Horizontal {}
macro_rules! horizontal_impl {
( $( $variant: ident , )+ ) => {
impl PartialEq for Horizontal {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
$(
(Self::$variant(l), Self::$variant(r)) => l == r,
)+
_ =>false,
}
}
}
$(
impl From<$variant> for Horizontal {
fn from(value: $variant) -> Self {
Horizontal::$variant(value)
}
}
)+
};
}
horizontal_impl!(
Char,
HList,
VList,
Rule,
Mark,
Insertion,
Adjust,
Ligature,
Discretionary,
Math,
Glue,
Kern,
Penalty,
);
/// Element of a vertical list.
#[derive(Debug)]
pub enum Vertical {
HList(HList),
VList(VList),
Rule(Rule),
Mark(Mark),
Insertion(Insertion),
Whatsit(Box<dyn Whatsit>),
Math(Math),
Glue(Glue),
Kern(Kern),
Penalty(Penalty),
}
impl PartialEq for Vertical {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(Self::HList(l0), Self::HList(r0)) => l0 == r0,
(Self::VList(l0), Self::VList(r0)) => l0 == r0,
(Self::Rule(l0), Self::Rule(r0)) => l0 == r0,
(Self::Mark(l0), Self::Mark(r0)) => l0 == r0,
(Self::Insertion(l0), Self::Insertion(r0)) => l0 == r0,
(Self::Whatsit(_), Self::Whatsit(_)) => false,
(Self::Math(l0), Self::Math(r0)) => l0 == r0,
(Self::Glue(l0), Self::Glue(r0)) => l0 == r0,
(Self::Kern(l0), Self::Kern(r0)) => l0 == r0,
(Self::Penalty(l0), Self::Penalty(r0)) => l0 == r0,
_ => false,
}
}
}
impl Eq for Vertical {}
/// A character in a specific font.
///
/// This node can only appear in horizontal mode.
///
/// Described in TeX.2021.134.
#[derive(Debug, PartialEq, Eq)]
pub struct Char {
pub char: char,
pub font: u32,
}
/// A box made from a horizontal list.
///
/// Described in TeX.2021.135.
#[derive(Debug, PartialEq, Eq)]
pub struct HList {
pub height: Number,
pub width: Number,
pub depth: Number,
/// How much this box should be lowered (if it appears in a horizontal list),
/// or how much it should be moved to the right (if it appears in a vertical
/// list).
pub shift_amount: Number,
pub list: Vec<Horizontal>,
pub glue_ratio: GlueRatio,
pub glue_sign: GlueSign,
pub glue_order: GlueOrder,
}
/// Ratio by which glue should shrink or stretch.
///
/// This is one of the few (only?) places in TeX where a floating point
/// number is used.
/// In general TeX uses fixed point integers to ensure that the results are
/// the same on every computer/CPU.
/// But the exact semantics of the glue ratio don't affect the output, so
/// using a float is okay.
///
/// Described in TeX.2021.109.
#[derive(Debug, PartialEq)]
pub struct GlueRatio(pub f32);
// TODO: ensure that glue ratio cannot be NaN
impl Eq for GlueRatio {}
/// Description of whether the glue should stretch, shrink, or remain rigid.
#[derive(Debug, PartialEq, Eq)]
pub enum GlueSign {
Stretching,
Shrinking,
Normal,
}
impl HList {
/// Returns a hlist node corresponding to the TeX snippet `\hbox{}`.
///
/// Described in TeX.2021.136.
pub fn new_null_box() -> Self {
Self {
height: Number::ZERO,
width: Number::ZERO,
depth: Number::ZERO,
shift_amount: Number::ZERO,
list: vec![],
glue_ratio: GlueRatio(0.0),
glue_sign: GlueSign::Normal,
glue_order: GlueOrder::Normal,
}
}
}
impl Default for HList {
fn default() -> Self {
Self::new_null_box()
}
}
/// A box made from a vertical list.
///
/// This is the same as [HList], except the list inside holds [Vertical] nodes
/// instead of [Horizontal] nodes.
///
/// Described in TeX.2021.137.
#[derive(Debug, PartialEq, Eq)]
pub struct VList {
pub height: Number,
pub width: Number,
pub depth: Number,
pub shift_amount: Number,
pub list: Vec<Vertical>,
pub glue_ratio: GlueRatio,
pub glue_sign: GlueSign,
pub glue_order: GlueOrder,
}
/// A rule stands for a solid black rectangle.
///
/// It has width, depth and height fields.
/// However if any of these dimensions is -2^30, the actual value will be
/// determined by running rule up to the boundary of the innermost, enclosing box.
/// This is called a "running dimension".
/// The width is never running in an hlist; the height and depth are never running
/// in a vlist.
///
/// Described in TeX.2021.138.
#[derive(Debug, PartialEq, Eq)]
pub struct Rule {
pub height: Number,
pub width: Number,
pub depth: Number,
}
impl Rule {
pub const RUNNING: Number = Number(-2 << 30);
/// Creates a new rule.
///
/// All of the dimensions are running.
///
/// Described in TeX.2021.139.
pub fn new() -> Self {
Self {
height: Self::RUNNING,
width: Self::RUNNING,
depth: Self::RUNNING,
}
}
}
impl Default for Rule {
fn default() -> Self {
Self::new()
}
}
/// Vertical material to be inserted.
///
/// This node is related to the TeX primitive `\insert`.
///
/// Described in TeX.2021.140.
#[derive(Debug, PartialEq, Eq)]
pub struct Insertion {
pub box_number: u8,
/// Slightly misnamed: it actually holds the natural height plus depth
/// of the vertical list being inserted.
pub height: Number,
/// Used in case this insertion is split.
pub split_max_depth: Number,
pub split_top_skip: core::Glue,
/// Penalty to be used if this insertion floats to a subsequent
/// page after a split insertion of the same class.
pub float_penalty: u32,
pub vlist: Vec<Vertical>,
}
/// Contents of a user's `\mark` text.
///
/// TODO: At time of writing I don't know what to do with this node.
/// In Knuth's TeX it references a token list, but I don't want Boxworks
/// to depend on Texlang. So for the moment just leaving a dummy list.
///
/// Described in TeX.2021.141.
#[derive(Debug, PartialEq, Eq)]
pub struct Mark {
pub list: Vec<()>,
}
/// Specifies material that will be moved out into the surrounding vertical list.
///
/// E.g., used to implement the TeX primitive `\vadjust`.
///
/// Described in TeX.2021.142.
#[derive(Debug, PartialEq, Eq)]
pub struct Adjust {
pub list: Vec<Vertical>,
}
/// A ligature.
///
/// Described in TeX.2021.143.
#[derive(Debug, PartialEq, Eq)]
pub struct Ligature {
pub included_left_boundary: bool,
pub included_right_boundary: bool,
pub char: char,
pub font: u32,
/// The original characters that were replaced by the ligature.
/// This is used if the engine needs to break apart the ligature
/// in order to perform hyphenation.
pub original_chars: Rc<str>, // TODO: why not (char, char)
}
// Two constructors for ligature nodes are provided in TeX.2021.144
// but they don't seem that useful so I'm omitting them.
/// A discretionary break.
///
/// The pre-break and post-break lists must only contain nodes
/// of type char, kern, box, rule or ligature.
/// We could have a specific node type for this, but for the moment
/// we just piggy back on the hlist type.
///
/// Described in TeX.2021.145.
#[derive(Debug, PartialEq, Eq)]
pub struct Discretionary {
/// Material to insert before this node, if the break occurs here.
pub pre_break: Vec<Horizontal>,
/// Material to insert after this node, if the break occurs here.
pub post_break: Vec<Horizontal>,
/// Number of subsequent nodes to skip if the break occurs here.
pub replace_count: u32,
}
impl Discretionary {
pub fn new() -> Self {
Self {
pre_break: vec![],
post_break: vec![],
replace_count: 0,
}
}
}
impl Default for Discretionary {
fn default() -> Self {
Self::new()
}
}
/// A whatsit node
///
/// This is used to facilitate extensions to TeX.
/// It's unclear right now how what the API of it will be, though
/// it can be figured out by reading the Chapter 53 Extensions of
/// TeX.
///
/// Knuth uses this node type to implement both `\write` and `\special`
/// so we'll eventually find out.
///
/// Described in TeX.2021.146.
pub trait Whatsit: std::fmt::Debug {}
/// A marker placed before or after math mode.
///
/// Described in TeX.2021.147.
#[derive(Debug, PartialEq, Eq)]
pub enum Math {
Before,
After,
}
impl Horizontal {
/// Whether a glue node that comes after this node may be broken.
///
/// For char nodes, this function is essentially undefined in Knuth's
/// TeX. More specifically, the value depends on the exact character code.
/// In TeX this function is never called for char nodes which is why this
/// is not a problem. Here, we return `true` for char nodes based on
/// my analysis of all places in Knuth's TeX where it is invoked:
///
/// - TeX.2021.868: `precedes_break` is called on variable `cur_p` which
/// is a pointer to a horizontal list. Before this call, the calling code
/// first checks if the node is a character and if so follows the same
/// code path. Thus returning `true` here is the right thing to do.
///
/// - TeX.2021.973: the function is called on a variable `prev_p` which
/// is a pointer to a vertical list and so the char case never arises.
///
/// - TeX.2021.1000: same as the last case.
///
/// This function is defined in TeX.2021.148.
pub fn precedes_break(&self) -> bool {
use Horizontal::*;
matches!(
self,
Char(_)
| HList(_)
| VList(_)
| Rule(_)
| Mark(_)
| Insertion(_)
| Adjust(_)
| Ligature(_)
| Discretionary(_)
| Whatsit(_)
)
}
/// Whether this node is discarded after a break.
///
/// As with [Self::precedes_break], this function is essentially undefined
/// for char nodes in Knuth's TeX. However there is only one call site
/// (TeX.2021.879) and in that call site char nodes behave as if this
/// function returns true.
///
/// This function is defined in TeX.2021.148.
pub fn non_discardable(&self) -> bool {
self.precedes_break()
}
}
impl Vertical {
/// Whether a glue node that comes after this node may be broken.
///
/// This function is defined in TeX.2021.148.
pub fn precedes_break(&self) -> bool {
use Vertical::*;
matches!(
self,
HList(_) | VList(_) | Rule(_) | Mark(_) | Insertion(_) | Whatsit(_)
)
}
}
/// A piece of glue.
///
/// Described in TeX.2021.149.
#[derive(Debug, PartialEq, Eq)]
pub struct Glue {
pub value: core::Glue,
pub kind: GlueKind,
}
impl From<core::Glue> for Glue {
fn from(value: core::Glue) -> Self {
Self {
value,
kind: Default::default(),
}
}
}
/// The kind of a glue node.
///
/// Described in TeX.2021.149.
#[derive(Debug, Default, PartialEq, Eq)]
pub enum GlueKind {
#[default]
Normal,
ConditionalMath,
Math,
AlignedLeader,
CenteredLeader,
ExpandedLeader,
}
// TeX.2021.150 and TeX.2021.151 define the [font::Glue] type itself,
// which is not in this crate.
// Three constructors for glue nodes are provided in TeX.2021.152,
// TeX.2021.153 and TeX.2021.154 but they don't seem that
// useful so I'm omitting them.
/// A kern.
///
/// Described in TeX.2021.155.
#[derive(Debug, PartialEq, Eq)]
pub struct Kern {
pub width: Number,
pub kind: KernKind,
}
/// The kind of a kern node.
///
/// Described in TeX.2021.155.
#[derive(Debug, PartialEq, Eq)]
pub enum KernKind {
/// Inserted from font information or math mode calculations.
Normal,
/// Inserted using e.g. TeX's `\kern` primitive.
Explicit,
/// Inserted from non-math accents.
Accent,
/// Inserted from e.g. `\mkern` specifications in math formulas.
Math,
}
// A constructor for kern nodes is provided in TeX.2021.156,
// but it doesn't seem useful.
/// A penalty.
///
/// Described in TeX.2021.157.
#[derive(Debug, PartialEq, Eq)]
pub struct Penalty {
pub value: i32,
}
impl Penalty {
/// Any penalty bigger than this is considered infinite and no
/// break will be allowed for such high values.
pub const INFINITE: i32 = 10000;
/// Any penalty smaller than this will result in a forced break.
pub const EJECT: i32 = -10000;
}
// A constructor for penalty nodes is provided in TeX.2021.157,
// but it doesn't seem useful.
// TODO: Unset node(s) in TeX.2021.159