Skip to content

Commit 5314d80

Browse files
Prevent private/hidden items to create ambiguities in intra-doc links
1 parent a1eceec commit 5314d80

File tree

3 files changed

+193
-40
lines changed

3 files changed

+193
-40
lines changed

src/librustdoc/core.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ use crate::clean::{self, ItemId};
3030
use crate::config::{Options as RustdocOptions, OutputFormat, RenderOptions};
3131
use crate::formats::cache::Cache;
3232
use crate::passes::Condition::*;
33+
use crate::passes::collect_intra_doc_links::LinkCollector;
3334
use crate::passes::{self};
3435

3536
pub(crate) struct DocContext<'tcx> {
@@ -440,6 +441,10 @@ pub(crate) fn run_global_ctxt(
440441
}
441442
}
442443

444+
let (mut krate, LinkCollector { visited_links, ambiguous_links, .. }) =
445+
tcx.sess.time("collect_intra_doc_links", || {
446+
passes::collect_intra_doc_links::collect_intra_doc_links(krate, &mut ctxt)
447+
});
443448
tcx.sess.time("check_lint_expectations", || tcx.check_expectations(Some(sym::rustdoc)));
444449

445450
if let Some(guar) = tcx.dcx().has_errors() {
@@ -448,6 +453,9 @@ pub(crate) fn run_global_ctxt(
448453

449454
krate = tcx.sess.time("create_format_cache", || Cache::populate(&mut ctxt, krate));
450455

456+
let mut collector = LinkCollector { cx: &mut ctxt, visited_links, ambiguous_links };
457+
collector.resolve_ambiguities();
458+
451459
Ok((krate, ctxt.render_options, ctxt.cache))
452460
}
453461

src/librustdoc/passes/collect_intra_doc_links.rs

Lines changed: 185 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use rustc_data_structures::intern::Interned;
1414
use rustc_errors::{Applicability, Diag, DiagMessage};
1515
use rustc_hir::def::Namespace::*;
1616
use rustc_hir::def::{DefKind, Namespace, PerNS};
17-
use rustc_hir::def_id::{CRATE_DEF_ID, DefId};
17+
use rustc_hir::def_id::{CRATE_DEF_ID, DefId, LOCAL_CRATE};
1818
use rustc_hir::{Mutability, Safety};
1919
use rustc_middle::ty::{Ty, TyCtxt};
2020
use rustc_middle::{bug, span_bug, ty};
@@ -30,23 +30,30 @@ use smallvec::{SmallVec, smallvec};
3030
use tracing::{debug, info, instrument, trace};
3131

3232
use crate::clean::utils::find_nearest_parent_module;
33-
use crate::clean::{self, Crate, Item, ItemLink, PrimitiveType};
33+
use crate::clean::{self, Crate, Item, ItemId, ItemLink, PrimitiveType};
3434
use crate::core::DocContext;
3535
use crate::html::markdown::{MarkdownLink, MarkdownLinkRange, markdown_links};
3636
use crate::lint::{BROKEN_INTRA_DOC_LINKS, PRIVATE_INTRA_DOC_LINKS};
37-
use crate::passes::Pass;
3837
use crate::visit::DocVisitor;
3938

40-
pub(crate) const COLLECT_INTRA_DOC_LINKS: Pass = Pass {
41-
name: "collect-intra-doc-links",
42-
run: collect_intra_doc_links,
43-
description: "resolves intra-doc links",
44-
};
45-
46-
fn collect_intra_doc_links(krate: Crate, cx: &mut DocContext<'_>) -> Crate {
47-
let mut collector = LinkCollector { cx, visited_links: FxHashMap::default() };
39+
pub(crate) fn collect_intra_doc_links<'a, 'tcx>(
40+
krate: Crate,
41+
cx: &'a mut DocContext<'tcx>,
42+
) -> (Crate, LinkCollector<'a, 'tcx>) {
43+
let mut collector = LinkCollector {
44+
cx,
45+
visited_links: FxHashMap::default(),
46+
ambiguous_links: FxHashMap::default(),
47+
};
4848
collector.visit_crate(&krate);
49-
krate
49+
(krate, collector)
50+
}
51+
52+
pub(crate) struct AmbiguousLinks {
53+
pub(crate) disambiguator: Option<Disambiguator>,
54+
pub(crate) link_text: Box<str>,
55+
pub(crate) diag_info: OwnedDiagnosticInfo,
56+
pub(crate) resolved: Vec<(Res, Option<UrlFragment>)>,
5057
}
5158

5259
fn filter_assoc_items_by_name_and_namespace<'a>(
@@ -61,7 +68,7 @@ fn filter_assoc_items_by_name_and_namespace<'a>(
6168
}
6269

6370
#[derive(Copy, Clone, Debug, Hash, PartialEq)]
64-
enum Res {
71+
pub(crate) enum Res {
6572
Def(DefKind, DefId),
6673
Primitive(PrimitiveType),
6774
}
@@ -234,7 +241,7 @@ impl UrlFragment {
234241
}
235242

236243
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
237-
struct ResolutionInfo {
244+
pub(crate) struct ResolutionInfo {
238245
item_id: DefId,
239246
module_id: DefId,
240247
dis: Option<Disambiguator>,
@@ -243,18 +250,54 @@ struct ResolutionInfo {
243250
}
244251

245252
#[derive(Clone)]
246-
struct DiagnosticInfo<'a> {
253+
pub(crate) struct DiagnosticInfo<'a> {
247254
item: &'a Item,
248255
dox: &'a str,
249256
ori_link: &'a str,
250257
link_range: MarkdownLinkRange,
251258
}
252259

253-
struct LinkCollector<'a, 'tcx> {
254-
cx: &'a mut DocContext<'tcx>,
260+
pub(crate) struct OwnedDiagnosticInfo {
261+
item: Item,
262+
ori_link: String,
263+
link_range: MarkdownLinkRange,
264+
}
265+
266+
impl From<DiagnosticInfo<'_>> for OwnedDiagnosticInfo {
267+
fn from(f: DiagnosticInfo<'_>) -> Self {
268+
Self {
269+
item: f.item.clone(),
270+
ori_link: f.ori_link.to_string(),
271+
link_range: f.link_range.clone(),
272+
}
273+
}
274+
}
275+
276+
impl OwnedDiagnosticInfo {
277+
pub(crate) fn into_info(&self) -> DiagnosticInfo<'_> {
278+
DiagnosticInfo {
279+
item: &self.item,
280+
ori_link: &self.ori_link,
281+
dox: "",
282+
link_range: self.link_range.clone(),
283+
}
284+
}
285+
}
286+
287+
pub(crate) struct LinkCollector<'a, 'tcx> {
288+
pub(crate) cx: &'a mut DocContext<'tcx>,
255289
/// Cache the resolved links so we can avoid resolving (and emitting errors for) the same link.
256290
/// The link will be `None` if it could not be resolved (i.e. the error was cached).
257-
visited_links: FxHashMap<ResolutionInfo, Option<(Res, Option<UrlFragment>)>>,
291+
pub(crate) visited_links: FxHashMap<ResolutionInfo, Option<(Res, Option<UrlFragment>)>>,
292+
/// These links are ambiguous. We need for the cache to have its paths filled. Unfortunately,
293+
/// if we run the `LinkCollector` pass after `Cache::populate`, a lot of items that we need
294+
/// to go through will be removed, making a lot of intra-doc links to not be inferred.
295+
///
296+
/// So instead, we store the ambiguous links and we wait for cache paths to be filled before
297+
/// inferring them (if possible).
298+
///
299+
/// Key is `(item ID, path str)`.
300+
pub(crate) ambiguous_links: FxHashMap<(ItemId, String), AmbiguousLinks>,
258301
}
259302

260303
impl<'a, 'tcx> LinkCollector<'a, 'tcx> {
@@ -993,14 +1036,17 @@ impl LinkCollector<'_, '_> {
9931036
_ => find_nearest_parent_module(self.cx.tcx, item_id).unwrap(),
9941037
};
9951038
for md_link in preprocessed_markdown_links(&doc) {
996-
let link = self.resolve_link(&doc, item, item_id, module_id, &md_link);
997-
if let Some(link) = link {
998-
self.cx.cache.intra_doc_links.entry(item.item_id).or_default().insert(link);
1039+
if let Some(link) = self.resolve_link(&doc, item, item_id, module_id, &md_link) {
1040+
self.save_link(item.item_id, link);
9991041
}
10001042
}
10011043
}
10021044
}
10031045

1046+
pub(crate) fn save_link(&mut self, item_id: ItemId, link: ItemLink) {
1047+
self.cx.cache.intra_doc_links.entry(item_id).or_default().insert(link);
1048+
}
1049+
10041050
/// This is the entry point for resolving an intra-doc link.
10051051
///
10061052
/// FIXME(jynelson): this is way too many arguments
@@ -1024,7 +1070,7 @@ impl LinkCollector<'_, '_> {
10241070
pp_link.as_ref().map_err(|err| err.report(self.cx, diag_info.clone())).ok()?;
10251071
let disambiguator = *disambiguator;
10261072

1027-
let (mut res, fragment) = self.resolve_with_disambiguator_cached(
1073+
let mut resolved = self.resolve_with_disambiguator_cached(
10281074
ResolutionInfo {
10291075
item_id,
10301076
module_id,
@@ -1040,6 +1086,101 @@ impl LinkCollector<'_, '_> {
10401086
false,
10411087
)?;
10421088

1089+
if resolved.len() > 1 {
1090+
let links = AmbiguousLinks {
1091+
disambiguator,
1092+
link_text: link_text.clone(),
1093+
diag_info: diag_info.into(),
1094+
resolved,
1095+
};
1096+
self.ambiguous_links.insert((item.item_id, path_str.to_string()), links);
1097+
None
1098+
} else if let Some((res, fragment)) = resolved.pop() {
1099+
self.compute_link(res, fragment, path_str, disambiguator, diag_info, link_text)
1100+
} else {
1101+
None
1102+
}
1103+
}
1104+
1105+
/// Returns `true` if a link could be generated from the given intra-doc information.
1106+
///
1107+
/// This is a very light version of `format::href_with_root_path` since we're only interested
1108+
/// about whether we can generate a link to an item or not.
1109+
///
1110+
/// * If `original_did` is local, then we check if the item is reexported or public.
1111+
/// * If `original_did` is not local, then we check if the crate it comes from is a direct
1112+
/// public dependency.
1113+
fn validate_link(&self, original_did: DefId) -> bool {
1114+
let tcx = self.cx.tcx;
1115+
let def_kind = tcx.def_kind(original_did);
1116+
let did = match def_kind {
1117+
DefKind::AssocTy | DefKind::AssocFn | DefKind::AssocConst | DefKind::Variant => {
1118+
// documented on their parent's page
1119+
tcx.parent(original_did)
1120+
}
1121+
// If this a constructor, we get the parent (either a struct or a variant) and then
1122+
// generate the link for this item.
1123+
DefKind::Ctor(..) => return self.validate_link(tcx.parent(original_did)),
1124+
DefKind::ExternCrate => {
1125+
// Link to the crate itself, not the `extern crate` item.
1126+
if let Some(local_did) = original_did.as_local() {
1127+
tcx.extern_mod_stmt_cnum(local_did).unwrap_or(LOCAL_CRATE).as_def_id()
1128+
} else {
1129+
original_did
1130+
}
1131+
}
1132+
_ => original_did,
1133+
};
1134+
1135+
let cache = &self.cx.cache;
1136+
if !original_did.is_local()
1137+
&& !cache.effective_visibilities.is_directly_public(tcx, did)
1138+
&& !cache.document_private
1139+
&& !cache.primitive_locations.values().any(|&id| id == did)
1140+
{
1141+
return false;
1142+
}
1143+
1144+
cache.paths.get(&did).is_some()
1145+
|| cache.external_paths.get(&did).is_some()
1146+
|| !did.is_local()
1147+
}
1148+
1149+
pub(crate) fn resolve_ambiguities(&mut self) {
1150+
let mut ambiguous_links = mem::take(&mut self.ambiguous_links);
1151+
1152+
for ((item_id, path_str), info) in ambiguous_links.iter_mut() {
1153+
info.resolved.retain(|(res, _)| match res {
1154+
Res::Def(_, def_id) => self.validate_link(*def_id),
1155+
// Primitive types are always valid.
1156+
Res::Primitive(_) => true,
1157+
});
1158+
if info.resolved.len() == 1 {
1159+
let (res, fragment) = info.resolved.pop().unwrap();
1160+
let diag_info = info.diag_info.into_info();
1161+
if let Some(link) = self.compute_link(
1162+
res,
1163+
fragment,
1164+
path_str,
1165+
info.disambiguator,
1166+
diag_info,
1167+
&info.link_text,
1168+
) {
1169+
self.save_link(*item_id, link);
1170+
}
1171+
}
1172+
}
1173+
}
1174+
1175+
fn compute_link(
1176+
&mut self,
1177+
mut res: Res,
1178+
fragment: Option<UrlFragment>,
1179+
path_str: &str,
1180+
disambiguator: Option<Disambiguator>,
1181+
diag_info: DiagnosticInfo<'_>,
1182+
link_text: &Box<str>,
1183+
) -> Option<ItemLink> {
10431184
// Check for a primitive which might conflict with a module
10441185
// Report the ambiguity and require that the user specify which one they meant.
10451186
// FIXME: could there ever be a primitive not in the type namespace?
@@ -1055,7 +1196,7 @@ impl LinkCollector<'_, '_> {
10551196
} else {
10561197
// `[char]` when a `char` module is in scope
10571198
let candidates = &[(res, res.def_id(self.cx.tcx)), (prim, None)];
1058-
ambiguity_error(self.cx, &diag_info, path_str, candidates);
1199+
ambiguity_error(self.cx, &diag_info, path_str, candidates, true);
10591200
return None;
10601201
}
10611202
}
@@ -1085,7 +1226,7 @@ impl LinkCollector<'_, '_> {
10851226
}
10861227

10871228
res.def_id(self.cx.tcx).map(|page_id| ItemLink {
1088-
link: Box::<str>::from(&*ori_link.link),
1229+
link: Box::<str>::from(&*diag_info.ori_link),
10891230
link_text: link_text.clone(),
10901231
page_id,
10911232
fragment,
@@ -1107,7 +1248,7 @@ impl LinkCollector<'_, '_> {
11071248

11081249
let page_id = clean::register_res(self.cx, rustc_hir::def::Res::Def(kind, id));
11091250
Some(ItemLink {
1110-
link: Box::<str>::from(&*ori_link.link),
1251+
link: Box::<str>::from(&*diag_info.ori_link),
11111252
link_text: link_text.clone(),
11121253
page_id,
11131254
fragment,
@@ -1220,10 +1361,10 @@ impl LinkCollector<'_, '_> {
12201361
// If this call is intended to be recoverable, then pass true to silence.
12211362
// This is only recoverable when path is failed to resolved.
12221363
recoverable: bool,
1223-
) -> Option<(Res, Option<UrlFragment>)> {
1364+
) -> Option<Vec<(Res, Option<UrlFragment>)>> {
12241365
if let Some(res) = self.visited_links.get(&key) {
12251366
if res.is_some() || cache_errors {
1226-
return res.clone();
1367+
return res.clone().map(|r| vec![r]);
12271368
}
12281369
}
12291370

@@ -1249,30 +1390,33 @@ impl LinkCollector<'_, '_> {
12491390
// won't emit an error. So at this point, we can just take the first candidate as it was
12501391
// the first retrieved and use it to generate the link.
12511392
if let [candidate, _candidate2, ..] = *candidates
1252-
&& !ambiguity_error(self.cx, &diag, &key.path_str, &candidates)
1393+
&& !ambiguity_error(self.cx, &diag, &key.path_str, &candidates, false)
12531394
{
12541395
candidates = vec![candidate];
12551396
}
12561397

1257-
if let &[(res, def_id)] = candidates.as_slice() {
1398+
let mut resolved = Vec::with_capacity(candidates.len());
1399+
for (res, def_id) in candidates.as_slice() {
12581400
let fragment = match (&key.extra_fragment, def_id) {
12591401
(Some(_), Some(def_id)) => {
1260-
report_anchor_conflict(self.cx, diag, def_id);
1402+
report_anchor_conflict(self.cx, diag, *def_id);
12611403
return None;
12621404
}
12631405
(Some(u_frag), None) => Some(UrlFragment::UserWritten(u_frag.clone())),
1264-
(None, Some(def_id)) => Some(UrlFragment::Item(def_id)),
1406+
(None, Some(def_id)) => Some(UrlFragment::Item(*def_id)),
12651407
(None, None) => None,
12661408
};
1267-
let r = Some((res, fragment));
1268-
self.visited_links.insert(key, r.clone());
1269-
return r;
1409+
let r = (res.clone(), fragment.clone());
1410+
self.visited_links.insert(key.clone(), Some(r.clone()));
1411+
resolved.push(r);
12701412
}
12711413

1272-
if cache_errors {
1414+
if resolved.is_empty() && cache_errors {
12731415
self.visited_links.insert(key, None);
1416+
None
1417+
} else {
1418+
Some(resolved)
12741419
}
1275-
None
12761420
}
12771421

12781422
/// After parsing the disambiguator, resolve the main part of the link.
@@ -1429,7 +1573,7 @@ fn should_ignore_link(path_str: &str) -> bool {
14291573

14301574
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
14311575
/// Disambiguators for a link.
1432-
enum Disambiguator {
1576+
pub(crate) enum Disambiguator {
14331577
/// `prim@`
14341578
///
14351579
/// This is buggy, see <https://github.com/rust-lang/rust/pull/77875#discussion_r503583103>
@@ -2046,6 +2190,7 @@ fn ambiguity_error(
20462190
diag_info: &DiagnosticInfo<'_>,
20472191
path_str: &str,
20482192
candidates: &[(Res, Option<DefId>)],
2193+
emit_error: bool,
20492194
) -> bool {
20502195
let mut descrs = FxHashSet::default();
20512196
let kinds = candidates
@@ -2062,6 +2207,9 @@ fn ambiguity_error(
20622207
// candidate and not show a warning.
20632208
return false;
20642209
}
2210+
if !emit_error {
2211+
return true;
2212+
}
20652213

20662214
let mut msg = format!("`{path_str}` is ");
20672215
match kinds.as_slice() {

0 commit comments

Comments
 (0)