rustc_utils/source_map/
range.rs

1use std::{
2  cell::RefCell, collections::hash_map::Entry, default::Default, ffi::OsStr,
3  path::PathBuf, sync::Arc,
4};
5
6use anyhow::{Context, Result, bail, ensure};
7use rustc_data_structures::fx::FxHashMap as HashMap;
8use rustc_hir::{
9  BodyId,
10  intravisit::{self, Visitor},
11};
12use rustc_index::IndexVec;
13use rustc_middle::ty::TyCtxt;
14use rustc_span::{FileName, RealFileName, SourceFile, Span, source_map::SourceMap};
15#[cfg(feature = "serde")]
16use serde::Serialize;
17#[cfg(feature = "ts-rs")]
18use ts_rs::TS;
19
20use super::filename::{Filename, FilenameIndex};
21use crate::cache::Cache;
22
23struct CharByteMapping {
24  byte_to_char: HashMap<BytePos, CharPos>,
25  char_to_byte: HashMap<CharPos, BytePos>,
26}
27
28impl CharByteMapping {
29  pub fn build(file: &SourceFile) -> Self {
30    let mut byte_to_char = HashMap::default();
31    let mut char_to_byte = HashMap::default();
32
33    macro_rules! check_insert {
34      ($bpos:expr, $cpos:expr) => {
35        let _r = byte_to_char.insert($bpos, $cpos);
36        debug_assert!(
37          _r.is_none(),
38          "byte_to_char: bytepos={:?}, charpos={:?}",
39          $bpos,
40          $cpos
41        );
42        let _r = char_to_byte.insert($cpos, $bpos);
43        debug_assert!(
44          _r.is_none(),
45          "char_to_byte: bytepos={:?}, charpos={:?}",
46          $bpos,
47          $cpos
48        );
49      };
50    }
51
52    for line in 0 .. file.count_lines() {
53      let line_str = file.get_line(line).unwrap();
54      let line_bounds = file.line_bounds(line);
55      let line_start = line_bounds.start.0 as usize;
56      let mut last_column = 0;
57      let mut last_offset = 0;
58      for (column, (byte_offset, c)) in line_str.char_indices().enumerate() {
59        let bpos = BytePos(line_start + byte_offset);
60        let cpos = CharPos { line, column };
61        check_insert!(bpos, cpos);
62        last_column = column + 1;
63        last_offset = byte_offset + c.len_utf8();
64      }
65
66      let bpos = BytePos(line_start + last_offset);
67      let cpos = CharPos {
68        line,
69        column: last_column,
70      };
71      check_insert!(bpos, cpos);
72    }
73
74    CharByteMapping {
75      byte_to_char,
76      char_to_byte,
77    }
78  }
79
80  #[allow(unused)]
81  pub fn byte_to_char(&self, pos: BytePos) -> CharPos {
82    *self
83      .byte_to_char
84      .get(&pos)
85      .unwrap_or_else(|| panic!("Could not find char pos for {pos:?}"))
86  }
87
88  pub fn char_to_byte(&self, pos: CharPos) -> BytePos {
89    *self
90      .char_to_byte
91      .get(&pos)
92      .unwrap_or_else(|| panic!("Could not find byte pos for {pos:?}"))
93  }
94}
95
96#[derive(Default)]
97pub struct RangeContext {
98  filenames: IndexVec<FilenameIndex, Filename>,
99  path_mapping: HashMap<FilenameIndex, Arc<SourceFile>>,
100  char_byte_mapping: Cache<FilenameIndex, CharByteMapping>,
101}
102
103thread_local! {
104  static CONTEXT: RefCell<RangeContext> = RefCell::new(RangeContext::default());
105}
106
107impl Filename {
108  fn intern_with_ctx(self, ctx: &mut RangeContext) -> FilenameIndex {
109    let existing = ctx.filenames.iter_enumerated().find(|(_, f)| &self == *f);
110    match existing {
111      Some((index, _)) => index,
112      None => ctx.filenames.push(self),
113    }
114  }
115
116  pub fn intern<T: ?Sized + AsRef<OsStr>>(t: &T) -> FilenameIndex {
117    let filename = Filename(PathBuf::from(t));
118    CONTEXT.with(|ctx| filename.intern_with_ctx(&mut ctx.borrow_mut()))
119  }
120}
121
122impl FilenameIndex {
123  pub fn find_source_file(self, source_map: &SourceMap) -> Result<Arc<SourceFile>> {
124    CONTEXT.with(|ctx| {
125      let ctx = &mut *ctx.borrow_mut();
126      match ctx.path_mapping.entry(self) {
127        Entry::Occupied(entry) => Ok(Arc::clone(entry.get())),
128        Entry::Vacant(entry) => {
129          let files = source_map.files();
130          ensure!(
131            ctx.filenames.get(self).is_some(),
132            "Missing file index!"
133          );
134          let filename = &ctx.filenames[self];
135          let filename = filename.0
136            .canonicalize()
137            .unwrap_or_else(|_| filename.0.clone());
138          let rustc_filename = files
139            .iter()
140            .map(|file| &file.name)
141            .find(|name| match &name {
142              // rustc seems to store relative paths to files in the workspace, so if filename is absolute,
143              // we can compare them using Path::ends_with
144              FileName::Real(RealFileName::LocalPath(other)) => {
145                let canonical = other.canonicalize();
146                let other = canonical.as_ref().unwrap_or(other);
147                filename.ends_with(other)
148              }
149              _ => false,
150            })
151            .with_context(|| {
152              format!(
153                "Could not find SourceFile for path: {}. Available SourceFiles were: [{}]",
154                filename.display(),
155                files
156                  .iter()
157                  .filter_map(|file| match &file.name {
158                    FileName::Real(RealFileName::LocalPath(other)) =>
159                      Some(format!("{}", other.display())),
160                    _ => None,
161                  })
162                  .collect::<Vec<_>>()
163                  .join(", ")
164              )
165            })?;
166          let file = source_map.get_source_file(rustc_filename).unwrap();
167          entry.insert(Arc::clone(&file));
168          Ok(file)
169        }
170      }
171    })
172  }
173}
174
175#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
176#[cfg_attr(feature = "serde", derive(Serialize))]
177#[cfg_attr(feature = "ts-rs", derive(TS))]
178pub struct BytePos(pub usize);
179
180/// CharPos is designed to match VSCode's vscode.Position type.
181/// Both line and column are 0-based.
182///
183/// A previous version of CharPos used a global character-based index,
184/// naively thinking this was the same as VSCode's notion of an "offset".
185/// However, for files using CRLF line endings, VSCode ignores the `\r`
186/// when computing offsets, while Rustc does not. The unhappy compromise
187/// is to use line-column as a common coordinate system, which is robust
188/// to choice of line endings.
189#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
190#[cfg_attr(feature = "serde", derive(Serialize))]
191#[cfg_attr(feature = "ts-rs", derive(TS))]
192pub struct CharPos {
193  pub line: usize,
194  pub column: usize,
195}
196
197#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
198#[cfg_attr(feature = "serde", derive(Serialize))]
199#[cfg_attr(feature = "ts-rs", derive(TS))]
200pub struct ByteRange {
201  pub start: BytePos,
202  pub end: BytePos,
203  pub filename: FilenameIndex,
204}
205
206/// Data structure for sharing spans outside rustc.
207///
208/// Rustc uses byte indexes to describe ranges of source code, whereas
209/// most Javascript-based editors I've encountered (e.g. VSCode) use
210/// character-based (really grapheme-based) indexes. This data structure
211/// along with [`ByteRange`] helps convert between the two representations.
212#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
213#[cfg_attr(feature = "serde", derive(Serialize))]
214#[cfg_attr(feature = "ts-rs", derive(TS))]
215pub struct CharRange {
216  pub start: CharPos,
217  pub end: CharPos,
218  pub filename: FilenameIndex,
219}
220
221impl ByteRange {
222  pub fn as_char_range(&self, source_map: &SourceMap) -> CharRange {
223    let file = self.filename.find_source_file(source_map).unwrap();
224
225    CONTEXT.with(|ctx| {
226      let ctx = ctx.borrow();
227      let mapping: &CharByteMapping = ctx
228        .char_byte_mapping
229        .get(&self.filename, |_| CharByteMapping::build(&file));
230
231      let char_start = mapping.byte_to_char(self.start);
232      let char_end = mapping.byte_to_char(self.end);
233
234      CharRange {
235        start: char_start,
236        end: char_end,
237        filename: self.filename,
238      }
239    })
240  }
241
242  pub fn from_char_range(
243    char_start: CharPos,
244    char_end: CharPos,
245    filename: FilenameIndex,
246    source_map: &SourceMap,
247  ) -> Result<ByteRange> {
248    let file = filename.find_source_file(source_map)?;
249
250    CONTEXT.with(|ctx| {
251      let ctx = ctx.borrow();
252      let mapping = ctx
253        .char_byte_mapping
254        .get(&filename, |_| CharByteMapping::build(&file));
255      let byte_start = mapping.char_to_byte(char_start);
256      let byte_end = mapping.char_to_byte(char_end);
257      Ok(ByteRange {
258        start: byte_start,
259        end: byte_end,
260        filename,
261      })
262    })
263  }
264
265  pub fn from_span(span: Span, source_map: &SourceMap) -> Result<Self> {
266    CONTEXT.with(|ctx| {
267      let mut ctx = ctx.borrow_mut();
268
269      log::trace!("Converting to range: {span:?}");
270      let file = source_map.lookup_source_file(span.lo());
271      let filename = match &file.name {
272        FileName::Real(RealFileName::LocalPath(filename)) => {
273          Filename(filename.clone()).intern_with_ctx(&mut ctx)
274        }
275        filename => bail!("Range::from_span doesn't support {filename:?}"),
276      };
277
278      ensure!(
279        source_map.ensure_source_file_source_present(&file),
280        "Could not load source for file: {:?}",
281        file.name
282      );
283      let external = file.external_src.borrow();
284      let _src = file
285        .src
286        .as_deref()
287        .map(String::as_str)
288        .unwrap_or_else(|| external.get_source().as_ref().unwrap());
289
290      let byte_start = BytePos(span.lo().0 as usize);
291      let byte_end = BytePos(span.hi().0 as usize);
292
293      Ok(ByteRange {
294        start: byte_start,
295        end: byte_end,
296        filename,
297      })
298    })
299  }
300
301  pub fn substr(&self, s: &str) -> String {
302    s[self.start.0 .. self.end.0].to_string()
303  }
304}
305
306impl CharRange {
307  pub fn from_span(span: Span, source_map: &SourceMap) -> Result<Self> {
308    let byte_range = ByteRange::from_span(span, source_map)?;
309    Ok(byte_range.as_char_range(source_map))
310  }
311}
312
313/// Used to convert objects into a [`Span`] with access to [`TyCtxt`]
314pub trait ToSpan {
315  fn to_span(&self, tcx: TyCtxt) -> Result<Span>;
316}
317
318impl ToSpan for ByteRange {
319  fn to_span(&self, _tcx: TyCtxt) -> Result<Span> {
320    Ok(Span::with_root_ctxt(
321      rustc_span::BytePos(u32::try_from(self.start.0).unwrap()),
322      rustc_span::BytePos(u32::try_from(self.end.0).unwrap()),
323    ))
324  }
325}
326
327impl ToSpan for CharRange {
328  fn to_span(&self, tcx: TyCtxt) -> Result<Span> {
329    let range = ByteRange::from_char_range(
330      self.start,
331      self.end,
332      self.filename,
333      tcx.sess.source_map(),
334    )?;
335    range.to_span(tcx)
336  }
337}
338
339fn qpath_to_span(tcx: TyCtxt, qpath: String) -> Result<Span> {
340  struct Finder<'tcx> {
341    tcx: TyCtxt<'tcx>,
342    qpath: String,
343    span: Option<Span>,
344  }
345
346  impl<'tcx> Visitor<'tcx> for Finder<'tcx> {
347    fn visit_nested_body(&mut self, id: BodyId) {
348      intravisit::walk_body(self, self.tcx.hir_body(id));
349
350      let local_def_id = self.tcx.hir_body_owner_def_id(id);
351      let function_path = self
352        .tcx
353        .def_path(local_def_id.to_def_id())
354        .to_string_no_crate_verbose();
355      if function_path[2 ..] == self.qpath {
356        self.span = Some(self.tcx.hir_span(id.hir_id));
357      }
358    }
359  }
360
361  let mut finder = Finder {
362    tcx,
363    qpath,
364    span: None,
365  };
366  tcx.hir_visit_all_item_likes_in_crate(&mut finder);
367  finder
368    .span
369    .with_context(|| format!("No function with qpath {}", finder.qpath))
370}
371
372/// An externally-provided identifier of a function
373pub enum FunctionIdentifier {
374  /// Name of a function
375  Qpath(String),
376
377  /// Range of code possibly inside a function
378  Range(CharRange),
379}
380
381impl ToSpan for FunctionIdentifier {
382  fn to_span(&self, tcx: TyCtxt) -> Result<Span> {
383    match self {
384      FunctionIdentifier::Qpath(qpath) => qpath_to_span(tcx, qpath.clone()),
385      FunctionIdentifier::Range(range) => range.to_span(tcx),
386    }
387  }
388}
389
390#[cfg(test)]
391mod test {
392  use super::*;
393  use crate::test_utils::{self, CompileResult};
394
395  #[test]
396  fn test_range() {
397    let emoji = "🦀";
398    let input = &format!(
399      r#"fn main() {{
400  let x = "{emoji}";
401}}
402
403// mysterious bytes
404"#
405    );
406
407    test_utils::CompileBuilder::new(input).compile(|CompileResult { tcx }| {
408      let source_map = tcx.sess.source_map();
409      let filename = Filename::intern("dummy.rs");
410      filename.find_source_file(source_map).unwrap();
411
412      let id = FunctionIdentifier::Qpath(String::from("main"));
413      id.to_span(tcx).unwrap();
414
415      let id = FunctionIdentifier::Qpath(String::from("foobar"));
416      id.to_span(tcx).unwrap_err();
417
418      let id = FunctionIdentifier::Range(CharRange {
419        start: CharPos { line: 0, column: 0 },
420        end: CharPos { line: 0, column: 1 },
421        filename,
422      });
423      id.to_span(tcx).unwrap();
424
425      let emoji_index = input.find(emoji).unwrap();
426      let byte_range = ByteRange {
427        start: BytePos(emoji_index),
428        end: BytePos(emoji_index + emoji.len()),
429        filename,
430      };
431      let char_range = byte_range.as_char_range(source_map);
432      let emoji_line = 1;
433      let emoji_column = 11;
434      assert_eq!(char_range, CharRange {
435        start: CharPos {
436          line: emoji_line,
437          column: emoji_column
438        },
439        end: CharPos {
440          line: emoji_line,
441          column: emoji_column + 1
442        },
443        filename
444      });
445    });
446  }
447}