1use crate::token::{CsNameInterner, Token, Value};
37use std::collections::BTreeMap;
38use std::ops::Bound::Included;
39use std::path::PathBuf;
40
41use super::CommandRef;
42
43#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
47#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
48pub struct Key(u32);
49
50impl Key {
51 #[cfg(test)]
52 pub(crate) fn dummy() -> Key {
53 Key(u32::MAX)
54 }
55
56 #[cfg(test)]
57 pub fn as_u32(&self) -> u32 {
58 self.0
59 }
60}
61
62#[derive(Debug)]
64#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
65pub struct KeyRange {
66 next: u32,
67 limit: u32,
68}
69
70impl KeyRange {
71 #[allow(clippy::should_implement_trait)]
75 pub fn next(&mut self) -> Key {
76 if self.next >= self.limit {
77 panic!["requested more trace keys than are in the range"]
78 }
79 let n = self.next;
80 self.next += 1;
81 Key(n)
82 }
83
84 pub fn peek(&mut self) -> Key {
88 if self.next >= self.limit {
89 panic!["requested more trace keys than are in the range"]
90 }
91 Key(self.next)
92 }
93
94 pub fn advance_by(&mut self, u: usize) {
98 self.next = self.next.checked_add(u.try_into().unwrap()).unwrap();
99 }
100}
101
102impl KeyRange {
103 pub fn empty() -> KeyRange {
104 KeyRange { next: 0, limit: 0 }
105 }
106
107 #[cfg(test)]
108 pub fn for_testing() -> KeyRange {
109 KeyRange {
110 next: 0,
111 limit: u32::MAX,
112 }
113 }
114}
115
116#[derive(Debug, PartialEq, Eq, Clone)]
118#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
119pub struct SourceCodeTrace {
120 pub origin: Origin,
122 pub line_content: String,
124 pub line_number: usize,
126 pub index: usize,
128 pub value: String,
130 pub token: Option<Token>,
133}
134
135#[derive(Default)]
137#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
138pub struct Tracer {
139 checkpoints: BTreeMap<u32, Checkpoint>,
140 next_key: u32,
141
142 last_external_input: Option<u32>,
145}
146
147#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
149#[derive(Debug, PartialEq, Eq, Clone)]
150pub enum Origin {
151 File(PathBuf),
152 Terminal,
153}
154
155impl Tracer {
156 pub fn register_source_code(
164 &mut self,
165 token: Option<Token>,
166 origin: Origin,
167 source_code: &str,
168 ) -> KeyRange {
169 let len = match u32::try_from(source_code.len()) {
170 Err(_) => {
171 panic!(
172 "source code too big ({} bytes); max is 2^32={} bytes",
173 source_code.len(),
174 u32::MAX
175 )
176 }
177 Ok(0) => 1_u32,
179 Ok(limit) => limit + 1,
182 };
183 let range = KeyRange {
184 next: self.next_key,
185 limit: self.next_key + len,
186 };
187 self.checkpoints.insert(
188 range.next,
189 Checkpoint::SourceCode {
190 origin,
191 content: source_code.to_string(),
192 },
193 );
194 if token.is_none() {
195 self.last_external_input = Some(self.next_key);
196 }
197 self.next_key = range.limit;
198 range
199 }
200
201 pub fn trace(&self, token: Token, cs_name_interner: &CsNameInterner) -> SourceCodeTrace {
203 let value = match token.value() {
204 Value::CommandRef(CommandRef::ControlSequence(cs_name)) => {
205 format!["\\{}", cs_name_interner.resolve(cs_name).unwrap()]
206 }
207 _ => token.char().unwrap().to_string(),
209 };
210
211 let (&first_key, checkpoint) = self
212 .checkpoints
213 .range((Included(&0), Included(&token.trace_key.0)))
214 .next_back()
215 .unwrap();
216
217 match checkpoint {
218 Checkpoint::SourceCode { origin, content } => {
219 let char_offset = (token.trace_key().0 - first_key) as usize;
220 let mut line_number = 1;
221 let mut byte_line_start = 0;
222 let mut char_line_start = 0;
223 for (char_index, (byte_index, c)) in content.char_indices().enumerate() {
224 if char_index == char_offset {
225 break;
226 }
227 if c == '\n' {
228 byte_line_start = byte_index + 1;
229 char_line_start = char_index + 1;
230 line_number += 1;
231 }
232 }
233 let position = char_offset - char_line_start;
234 let tail = &content[byte_line_start..];
235 let line_content = match tail.split_once('\n') {
236 None => tail.to_string(),
237 Some((a, _)) => a.to_string(),
238 };
239 SourceCodeTrace {
240 origin: origin.clone(),
241 line_content,
242 line_number,
243 index: position,
244 value,
245 token: Some(token),
246 }
247 }
248 }
249 }
250
251 pub fn trace_end_of_input(&self) -> SourceCodeTrace {
252 let f = self
253 .checkpoints
254 .get(&self.last_external_input.unwrap())
255 .unwrap();
256 match f {
257 Checkpoint::SourceCode { origin, content } => {
258 let mut last_line: (usize, usize) = (0, 0);
260 let mut last_non_empty_line: (usize, usize) = (0, 0);
261 for (i, c) in content.char_indices() {
262 if !c.is_whitespace() {
263 last_non_empty_line = last_line;
264 } else if c == '\n' {
265 last_line.0 += 1;
266 last_line.1 = i + 1;
267 }
268 }
269 let last_line = content[last_non_empty_line.1..].trim_end();
270 SourceCodeTrace {
271 origin: origin.clone(),
272 line_content: last_line.to_string(),
273 line_number: last_non_empty_line.0 + 1,
274 index: last_line.len(),
275 value: " ".to_string(),
276 token: None,
277 }
278 }
279 }
280 }
281}
282
283#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
284enum Checkpoint {
285 SourceCode {
286 origin: Origin,
287 content: String, },
289}
290
291#[cfg(test)]
292mod tests {
293 use super::*;
294
295 #[test]
296 fn one_source_code() {
297 let file_name: PathBuf = "input.tex".into();
298 let origin = Origin::File(file_name);
299 let line_1 = "hël".to_string();
300 let line_2 = "wor\\cömmand".to_string();
301 let line_3 = "hël".to_string();
302 let source_code = format!("{}\n{}\n{}", line_1, line_2, line_3);
303
304 let mut tracer: Tracer = Default::default();
305 let mut interner: CsNameInterner = Default::default();
306 let command = interner.get_or_intern("command");
307 let mut range = tracer.register_source_code(None, origin.clone(), &source_code);
308 let mut tokens = vec![
309 Token::new_letter('h', range.next()),
310 Token::new_letter('e', range.next()),
311 Token::new_letter('l', range.next()),
312 Token::new_space('\n', range.next()),
313 Token::new_letter('w', range.next()),
314 Token::new_letter('o', range.next()),
315 Token::new_letter('r', range.next()),
316 Token::new_control_sequence(command, range.next()),
317 ];
318 for _ in 0.."command".len() {
319 range.next();
320 }
321 let mut extra_tokens = vec![
322 Token::new_space('\n', range.next()),
323 Token::new_letter('h', range.next()),
324 Token::new_letter('e', range.next()),
325 Token::new_letter('l', range.next()),
326 ];
327 tokens.append(&mut extra_tokens);
328
329 let got_traces: Vec<SourceCodeTrace> = tokens
330 .iter()
331 .map(|token| tracer.trace(*token, &interner))
332 .collect();
333
334 let want_traces = vec![
335 SourceCodeTrace {
336 origin: origin.clone(),
337 line_content: line_1.clone(),
338 line_number: 1,
339 index: 0,
340 value: "h".to_string(),
341 token: Some(tokens[0]),
342 },
343 SourceCodeTrace {
344 origin: origin.clone(),
345 line_content: line_1.clone(),
346 line_number: 1,
347 index: 1,
348 value: "e".to_string(),
349 token: Some(tokens[1]),
350 },
351 SourceCodeTrace {
352 origin: origin.clone(),
353 line_content: line_1.clone(),
354 line_number: 1,
355 index: 2,
356 value: "l".to_string(),
357 token: Some(tokens[2]),
358 },
359 SourceCodeTrace {
360 origin: origin.clone(),
361 line_content: line_1.clone(),
362 line_number: 1,
363 index: 3,
364 value: "\n".to_string(),
365 token: Some(tokens[3]),
366 },
367 SourceCodeTrace {
368 origin: origin.clone(),
369 line_content: line_2.clone(),
370 line_number: 2,
371 index: 0,
372 value: "w".to_string(),
373 token: Some(tokens[4]),
374 },
375 SourceCodeTrace {
376 origin: origin.clone(),
377 line_content: line_2.clone(),
378 line_number: 2,
379 index: 1,
380 value: "o".to_string(),
381 token: Some(tokens[5]),
382 },
383 SourceCodeTrace {
384 origin: origin.clone(),
385 line_content: line_2.clone(),
386 line_number: 2,
387 index: 2,
388 value: "r".to_string(),
389 token: Some(tokens[6]),
390 },
391 SourceCodeTrace {
392 origin: origin.clone(),
393 line_content: line_2.clone(),
394 line_number: 2,
395 index: 3,
396 value: "\\command".to_string(),
397 token: Some(tokens[7]),
398 },
399 SourceCodeTrace {
400 origin: origin.clone(),
401 line_content: line_2.clone(),
402 line_number: 2,
403 index: 11,
404 value: "\n".to_string(),
405 token: Some(tokens[8]),
406 },
407 SourceCodeTrace {
408 origin: origin.clone(),
409 line_content: line_3.clone(),
410 line_number: 3,
411 index: 0,
412 value: "h".to_string(),
413 token: Some(tokens[9]),
414 },
415 SourceCodeTrace {
416 origin: origin.clone(),
417 line_content: line_3.clone(),
418 line_number: 3,
419 index: 1,
420 value: "e".to_string(),
421 token: Some(tokens[10]),
422 },
423 SourceCodeTrace {
424 origin: origin.clone(),
425 line_content: line_3.clone(),
426 line_number: 3,
427 index: 2,
428 value: "l".to_string(),
429 token: Some(tokens[11]),
430 },
431 ];
432 assert_eq!(want_traces, got_traces);
433 }
434
435 #[test]
436 fn multiple_source_code() {
437 let mut tokens = Vec::new();
438 let mut tracer: Tracer = Default::default();
439 let interner: CsNameInterner = Default::default();
440
441 let file_1 = Origin::File("a.tex".into());
442 let file_1_content = "a".to_string();
443 let mut range = tracer.register_source_code(None, file_1.clone(), &file_1_content);
444 tokens.push(Token::new_letter('a', range.next()));
445
446 let file_2 = Origin::File("b.tex".into());
447 let file_2_content = "b".to_string();
448 let mut range = tracer.register_source_code(None, file_2.clone(), &file_2_content);
449 tokens.push(Token::new_letter('b', range.next()));
450
451 let file_3 = Origin::Terminal;
452 let file_3_content = "c".to_string();
453 let mut range = tracer.register_source_code(None, file_3.clone(), &file_3_content);
454 tokens.push(Token::new_letter('c', range.next()));
455
456 let got_traces: Vec<SourceCodeTrace> = tokens
457 .iter()
458 .map(|token| tracer.trace(*token, &interner))
459 .collect();
460
461 let want_traces = vec![
462 SourceCodeTrace {
463 origin: file_1,
464 line_content: file_1_content,
465 line_number: 1,
466 index: 0,
467 value: "a".to_string(),
468 token: Some(tokens[0]),
469 },
470 SourceCodeTrace {
471 origin: file_2,
472 line_content: file_2_content,
473 line_number: 1,
474 index: 0,
475 value: "b".to_string(),
476 token: Some(tokens[1]),
477 },
478 SourceCodeTrace {
479 origin: file_3,
480 line_content: file_3_content,
481 line_number: 1,
482 index: 0,
483 value: "c".to_string(),
484 token: Some(tokens[2]),
485 },
486 ];
487 assert_eq!(want_traces, got_traces);
488 }
489}