texlang/types/catcode.rs
1use CatCode::*;
2
3/// Enum representing all 16 category codes in TeX.
4///
5/// Each variant's documentation contains an example character which is mapped to that category code in plainTeX.
6#[derive(Debug, Copy, Clone, Eq, PartialEq, Default)]
7#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
8pub enum CatCode {
9 /// Marks the beginning of a control sequence.
10 /// Example: `\`.
11 ///
12 /// This category code is never seen outside of the lexer.
13 Escape = 0,
14 /// Begins a new group.
15 /// Example: `{`.
16 BeginGroup = 1,
17 /// Ends an existing new group.
18 /// Example: `}`.
19 EndGroup = 2,
20 /// Starts or ends math mode.
21 /// Example: `$`.
22 MathShift = 3,
23 /// Used in typesetting tables to align cells.
24 /// Example: `&`.
25 AlignmentTab = 4,
26 /// Marks a new line in the input.
27 /// Example: `\n`.
28 ///
29 /// This code behaves similarly to [Space], but has two additional properties.
30 /// First, two or more consecutive new lines, modulo intervening [Space] characters, create a `\par` control sequence
31 /// instead of a regular space.
32 /// Second, this code terminates a comment that started with a [Comment] character.
33 ///
34 /// This category code is never seen outside of the lexer.
35 EndOfLine = 5,
36 /// Marks the beginning of a parameter number.
37 /// It must generally be followed by a digit.
38 /// Example: `#`.
39 Parameter = 6,
40 /// Puts following character or group in a superscript.
41 /// Example: `^`.
42 Superscript = 7,
43 /// Puts following character or group in a subscript.
44 /// Example: `_`.
45 Subscript = 8,
46 /// Character that is ignored by the lexer.
47 /// Example: ASCII null (0).
48 ///
49 /// This category code is never seen outside of the lexer.
50 Ignored = 9,
51 /// Whitespace. Example: ` `.
52 Space = 10,
53 /// A character that can be used as a control sequence name.
54 /// Examples: `[a-zA-z]`.
55 Letter = 11,
56 /// A character than cannot be used as a control sequence name.
57 /// Example: `@`.
58 #[default]
59 Other = 12,
60 /// A single character that behaves like a control sequence.
61 /// Example: `~`.
62 Active = 13,
63 /// Marks the beginning of a comment.
64 /// All characters until the next [EndOfLine] are ignored.
65 /// Example: `%`.
66 ///
67 /// This category code is never seen outside of the lexer.
68 Comment = 14,
69 /// An invalid character.
70 /// If this is encountered in the input, the lexer will return an error.
71 /// Example: ASCII delete (127).
72 ///
73 /// This category code is never seen outside of the lexer.
74 Invalid = 15,
75}
76
77impl TryFrom<u8> for CatCode {
78 type Error = ();
79
80 fn try_from(value: u8) -> Result<Self, Self::Error> {
81 match value {
82 0 => Ok(Escape),
83 1 => Ok(BeginGroup),
84 2 => Ok(EndGroup),
85 3 => Ok(MathShift),
86 4 => Ok(AlignmentTab),
87 5 => Ok(EndOfLine),
88 6 => Ok(Parameter),
89 7 => Ok(Superscript),
90 8 => Ok(Subscript),
91 9 => Ok(Ignored),
92 10 => Ok(Space),
93 11 => Ok(Letter),
94 12 => Ok(Other),
95 13 => Ok(Active),
96 14 => Ok(Comment),
97 15 => Ok(Invalid),
98 _ => Err(()),
99 }
100 }
101}
102
103impl std::fmt::Display for CatCode {
104 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
105 write!(
106 f,
107 "{} ({})",
108 *self as u8,
109 match self {
110 Escape => "escape",
111 BeginGroup => "begin group",
112 EndGroup => "end group",
113 MathShift => "math shift",
114 AlignmentTab => "alignment tab",
115 EndOfLine => "end of line",
116 Parameter => "parameter",
117 Superscript => "superscript",
118 Subscript => "subscript",
119 Ignored => "ignored",
120 Space => "space",
121 Letter => "letter",
122 Other => "other",
123 Active => "active",
124 Comment => "comment",
125 Invalid => "invalid",
126 }
127 )?;
128 Ok(())
129 }
130}
131
132impl CatCode {
133 /// Default category codes in INITEX for all ASCII characters.
134 ///
135 /// To find the category code for an ASCII character,
136 /// convert it to an integer and use it as an index for the array.
137 ///
138 /// This list was compiled by reading the source code TeX '82,
139 /// specifically section 232 in the "TeX: the program".
140 /// These defaults are also described in the TeXBook p343.
141 pub const INITEX_DEFAULTS: [CatCode; 128] = [
142 Ignored, // ASCII null
143 Other, Other, Other, Other, Other, Other, Other, Other, Other,
144 EndOfLine, // carriage return
145 Other, Other, Other, Other, Other, Other, Other, Other, Other, Other, Other, Other, Other,
146 Other, Other, Other, Other, Other, Other, Other, Other, Space, // space
147 Other, Other, Other, Other, Comment, // %
148 Other, Other, Other, Other, Other, Other, Other, Other, Other, Other, Other, Other, Other,
149 Other, Other, Other, Other, Other, Other, Other, Other, Other, Other, Other, Other, Other,
150 Other, Letter, // A
151 Letter, // B
152 Letter, // C
153 Letter, // D
154 Letter, // E
155 Letter, // F
156 Letter, // G
157 Letter, // H
158 Letter, // I
159 Letter, // J
160 Letter, // K
161 Letter, // L
162 Letter, // M
163 Letter, // N
164 Letter, // O
165 Letter, // P
166 Letter, // Q
167 Letter, // R
168 Letter, // S
169 Letter, // T
170 Letter, // U
171 Letter, // V
172 Letter, // W
173 Letter, // X
174 Letter, // Y
175 Letter, // Z
176 Other, Escape, // \
177 Other, Other, Other, Other, Letter, // a
178 Letter, // b
179 Letter, // c
180 Letter, // d
181 Letter, // e
182 Letter, // f
183 Letter, // g
184 Letter, // h
185 Letter, // i
186 Letter, // j
187 Letter, // k
188 Letter, // l
189 Letter, // m
190 Letter, // n
191 Letter, // o
192 Letter, // p
193 Letter, // q
194 Letter, // r
195 Letter, // s
196 Letter, // t
197 Letter, // u
198 Letter, // v
199 Letter, // w
200 Letter, // x
201 Letter, // y
202 Letter, // z
203 Other, Other, Other, Other, Invalid, // ASCII delete
204 ];
205
206 /// Default category codes in plainTeX for all ASCII characters.
207 ///
208 /// To find the category code for an ASCII character,
209 /// convert it to an integer and use it as an index for the array.
210 ///
211 /// This list was compiled by starting with [INITEX_DEFAULTS](CatCode::INITEX_DEFAULTS) and then applying
212 /// all category code changes in the plainTeX format.
213 /// These changes are described on p343 of the TeXBook.
214 pub const PLAIN_TEX_DEFAULTS: [CatCode; 128] = [
215 Ignored, // ASCII null
216 Other,
217 Other,
218 Other,
219 Other,
220 Other,
221 Other,
222 Other,
223 Other,
224 Space, // horizontal tab (\t)
225 EndOfLine, // line feed (\n)
226 Other,
227 Active, // ASCII form-feed
228 EndOfLine, // carriage return (\r)
229 Other,
230 Other,
231 Other,
232 Other,
233 Other,
234 Other,
235 Other,
236 Other,
237 Other,
238 Other,
239 Other,
240 Other,
241 Other,
242 Other,
243 Other,
244 Other,
245 Other,
246 Other,
247 Space, // space
248 Other,
249 Other,
250 Parameter, // #
251 MathShift, // $
252 Comment, // %
253 AlignmentTab, // &
254 Other,
255 Other,
256 Other,
257 Other,
258 Other,
259 Other,
260 Other,
261 Other,
262 Other,
263 Other,
264 Other,
265 Other,
266 Other,
267 Other,
268 Other,
269 Other,
270 Other,
271 Other,
272 Other,
273 Other,
274 Other,
275 Other,
276 Other,
277 Other,
278 Other,
279 Other,
280 Letter, // A
281 Letter, // B
282 Letter, // C
283 Letter, // D
284 Letter, // E
285 Letter, // F
286 Letter, // G
287 Letter, // H
288 Letter, // I
289 Letter, // J
290 Letter, // K
291 Letter, // L
292 Letter, // M
293 Letter, // N
294 Letter, // O
295 Letter, // P
296 Letter, // Q
297 Letter, // R
298 Letter, // S
299 Letter, // T
300 Letter, // U
301 Letter, // V
302 Letter, // W
303 Letter, // X
304 Letter, // Y
305 Letter, // Z
306 Other,
307 Escape, // \
308 Other,
309 Superscript, // ^
310 Subscript, // _
311 Other,
312 Letter, // a
313 Letter, // b
314 Letter, // c
315 Letter, // d
316 Letter, // e
317 Letter, // f
318 Letter, // g
319 Letter, // h
320 Letter, // i
321 Letter, // j
322 Letter, // k
323 Letter, // l
324 Letter, // m
325 Letter, // n
326 Letter, // o
327 Letter, // p
328 Letter, // q
329 Letter, // r
330 Letter, // s
331 Letter, // t
332 Letter, // u
333 Letter, // v
334 Letter, // w
335 Letter, // x
336 Letter, // y
337 Letter, // z
338 BeginGroup, // {
339 Other,
340 EndGroup, // }
341 Active, // ~
342 Invalid, // ASCII delete
343 ];
344}
345
346#[cfg(test)]
347mod tests {
348 use super::*;
349
350 #[test]
351 fn serialize_and_deserialize_cat_code() {
352 let all_raw_cat_codes = vec![
353 CatCode::BeginGroup,
354 CatCode::EndGroup,
355 CatCode::MathShift,
356 CatCode::AlignmentTab,
357 CatCode::Parameter,
358 CatCode::Superscript,
359 CatCode::Subscript,
360 CatCode::Space,
361 CatCode::Letter,
362 CatCode::Other,
363 CatCode::Active,
364 CatCode::Escape,
365 CatCode::EndOfLine,
366 CatCode::Ignored,
367 CatCode::Comment,
368 CatCode::Invalid,
369 ];
370 for cat_code in all_raw_cat_codes {
371 let u: u8 = cat_code as u8;
372 let recovered: CatCode = u.try_into().unwrap();
373 assert_eq!(recovered, cat_code);
374 }
375 }
376}