Skip to content

Commit 59ba74c

Browse files
committed
rustdoc: simplify JS search routine by not messing with lev distance
Since the sorting function accounts for an `index` field, there's not much reason to also be applying changes to the levenshtein distance. Instead, we can just not treat `lev` as a filter if there's already a non-sentinel value for `index`. This change gives slightly more weight to the index and path part, as search criteria, than it used to. This changes some of the test cases, but not in any obviously-"worse" way, and, in particular, substring matches are a bigger deal than levenshtein distances (we're assuming that a typo is less likely than someone just not typing the entire name). Based on #103710 (comment)
1 parent 4b51adf commit 59ba74c

File tree

1 file changed

+64
-50
lines changed

1 file changed

+64
-50
lines changed

src/librustdoc/html/static/js/search.js

+64-50
Original file line numberDiff line numberDiff line change
@@ -781,7 +781,29 @@ function initSearch(rawSearchIndex) {
781781
return a - b;
782782
}
783783

784-
// Sort by non levenshtein results and then levenshtein results by the distance
784+
// sort by index of keyword in item name (no literal occurrence goes later)
785+
a = (aaa.index < 0);
786+
b = (bbb.index < 0);
787+
if (a !== b) {
788+
return a - b;
789+
}
790+
791+
// Sort by distance in the path part, if specified
792+
// (less changes required to match means higher rankings)
793+
a = aaa.path_lev;
794+
b = bbb.path_lev;
795+
if (a !== b) {
796+
return a - b;
797+
}
798+
799+
// (later literal occurrence, if any, goes later)
800+
a = aaa.index;
801+
b = bbb.index;
802+
if (a !== b) {
803+
return a - b;
804+
}
805+
806+
// Sort by distance in the name part, the last part of the path
785807
// (less changes required to match means higher rankings)
786808
a = (aaa.lev);
787809
b = (bbb.lev);
@@ -810,19 +832,6 @@ function initSearch(rawSearchIndex) {
810832
return (a > b ? +1 : -1);
811833
}
812834

813-
// sort by index of keyword in item name (no literal occurrence goes later)
814-
a = (aaa.index < 0);
815-
b = (bbb.index < 0);
816-
if (a !== b) {
817-
return a - b;
818-
}
819-
// (later literal occurrence, if any, goes later)
820-
a = aaa.index;
821-
b = bbb.index;
822-
if (a !== b) {
823-
return a - b;
824-
}
825-
826835
// special precedence for primitive and keyword pages
827836
if ((aaa.item.ty === TY_PRIMITIVE && bbb.item.ty !== TY_KEYWORD) ||
828837
(aaa.item.ty === TY_KEYWORD && bbb.item.ty !== TY_PRIMITIVE)) {
@@ -1230,15 +1239,19 @@ function initSearch(rawSearchIndex) {
12301239
* * `id` is the index in both `searchWords` and `searchIndex` arrays for this element.
12311240
* * `index` is an `integer`` used to sort by the position of the word in the item's name.
12321241
* * `lev` is the main metric used to sort the search results.
1242+
* * `path_lev` is zero if a single-component search query is used, otherwise it's the
1243+
* distance computed for everything other than the last path component.
12331244
*
12341245
* @param {Results} results
12351246
* @param {string} fullId
12361247
* @param {integer} id
12371248
* @param {integer} index
12381249
* @param {integer} lev
1250+
* @param {integer} path_lev
12391251
*/
1240-
function addIntoResults(results, fullId, id, index, lev) {
1241-
if (lev === 0 || (!parsedQuery.literalSearch && lev <= MAX_LEV_DISTANCE)) {
1252+
function addIntoResults(results, fullId, id, index, lev, path_lev) {
1253+
const inBounds = lev <= MAX_LEV_DISTANCE || index !== -1;
1254+
if (lev === 0 || (!parsedQuery.literalSearch && inBounds)) {
12421255
if (results[fullId] !== undefined) {
12431256
const result = results[fullId];
12441257
if (result.dontValidate || result.lev <= lev) {
@@ -1250,6 +1263,7 @@ function initSearch(rawSearchIndex) {
12501263
index: index,
12511264
dontValidate: parsedQuery.literalSearch,
12521265
lev: lev,
1266+
path_lev: path_lev,
12531267
};
12541268
}
12551269
}
@@ -1280,68 +1294,68 @@ function initSearch(rawSearchIndex) {
12801294
if (!row || (filterCrates !== null && row.crate !== filterCrates)) {
12811295
return;
12821296
}
1283-
let lev, lev_add = 0, index = -1;
1297+
let lev, index = -1, path_lev = 0;
12841298
const fullId = row.id;
1299+
const searchWord = searchWords[pos];
12851300

12861301
const in_args = findArg(row, elem, parsedQuery.typeFilter);
12871302
const returned = checkReturned(row, elem, parsedQuery.typeFilter);
12881303

1289-
addIntoResults(results_in_args, fullId, pos, index, in_args);
1290-
addIntoResults(results_returned, fullId, pos, index, returned);
1304+
// path_lev is 0 because no parent path information is currently stored
1305+
// in the search index
1306+
addIntoResults(results_in_args, fullId, pos, -1, in_args, 0);
1307+
addIntoResults(results_returned, fullId, pos, -1, returned, 0);
12911308

12921309
if (!typePassesFilter(parsedQuery.typeFilter, row.ty)) {
12931310
return;
12941311
}
1295-
const searchWord = searchWords[pos];
12961312

1297-
if (parsedQuery.literalSearch) {
1298-
if (searchWord === elem.name) {
1299-
addIntoResults(results_others, fullId, pos, -1, 0);
1300-
}
1301-
return;
1313+
const row_index = row.normalizedName.indexOf(elem.pathLast);
1314+
const word_index = searchWord.indexOf(elem.pathLast);
1315+
1316+
// lower indexes are "better" matches
1317+
// rank based on the "best" match
1318+
if (row_index === -1) {
1319+
index = word_index;
1320+
} else if (word_index === -1) {
1321+
index = row_index;
1322+
} else if (word_index < row_index) {
1323+
index = word_index;
1324+
} else {
1325+
index = row_index;
13021326
}
13031327

13041328
// No need to check anything else if it's a "pure" generics search.
13051329
if (elem.name.length === 0) {
13061330
if (row.type !== null) {
13071331
lev = checkGenerics(row.type, elem, MAX_LEV_DISTANCE + 1);
1308-
addIntoResults(results_others, fullId, pos, index, lev);
1332+
// path_lev is 0 because we know it's empty
1333+
addIntoResults(results_others, fullId, pos, index, lev, 0);
13091334
}
13101335
return;
13111336
}
13121337

13131338
if (elem.fullPath.length > 1) {
1314-
lev = checkPath(elem.pathWithoutLast, row);
1315-
if (lev > MAX_LEV_DISTANCE || (parsedQuery.literalSearch && lev !== 0)) {
1339+
path_lev = checkPath(elem.pathWithoutLast, row);
1340+
if (path_lev > MAX_LEV_DISTANCE) {
13161341
return;
1317-
} else if (lev > 0) {
1318-
lev_add = lev / 10;
13191342
}
13201343
}
13211344

1322-
if (searchWord.indexOf(elem.pathLast) > -1 ||
1323-
row.normalizedName.indexOf(elem.pathLast) > -1
1324-
) {
1325-
index = row.normalizedName.indexOf(elem.pathLast);
1326-
}
1327-
lev = levenshtein(searchWord, elem.pathLast);
1328-
if (lev > 0 && elem.pathLast.length > 2 && searchWord.indexOf(elem.pathLast) > -1) {
1329-
if (elem.pathLast.length < 6) {
1330-
lev = 1;
1331-
} else {
1332-
lev = 0;
1345+
if (parsedQuery.literalSearch) {
1346+
if (searchWord === elem.name) {
1347+
addIntoResults(results_others, fullId, pos, index, 0, path_lev);
13331348
}
1334-
}
1335-
lev += lev_add;
1336-
if (lev > MAX_LEV_DISTANCE) {
13371349
return;
1338-
} else if (index !== -1 && elem.fullPath.length < 2) {
1339-
lev -= 1;
13401350
}
1341-
if (lev < 0) {
1342-
lev = 0;
1351+
1352+
lev = levenshtein(searchWord, elem.pathLast);
1353+
1354+
if (index === -1 && lev + path_lev > MAX_LEV_DISTANCE) {
1355+
return;
13431356
}
1344-
addIntoResults(results_others, fullId, pos, index, lev);
1357+
1358+
addIntoResults(results_others, fullId, pos, index, lev, path_lev);
13451359
}
13461360

13471361
/**
@@ -1386,7 +1400,7 @@ function initSearch(rawSearchIndex) {
13861400
return;
13871401
}
13881402
const lev = Math.round(totalLev / nbLev);
1389-
addIntoResults(results, row.id, pos, 0, lev);
1403+
addIntoResults(results, row.id, pos, 0, lev, 0);
13901404
}
13911405

13921406
function innerRunQuery() {

0 commit comments

Comments
 (0)