-
Notifications
You must be signed in to change notification settings - Fork 13.4k
Improved the doc search by including Levenshtein distance #15385
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -58,7 +58,7 @@ | |
} | ||
$('#' + from)[0].scrollIntoView(); | ||
$('.line-numbers span').removeClass('line-highlighted'); | ||
for (i = from; i <= to; i += 1) { | ||
for (i = from; i <= to; ++i) { | ||
$('#' + i).addClass('line-highlighted'); | ||
} | ||
} | ||
|
@@ -99,7 +99,7 @@ | |
stripped = '', | ||
len = rootPath.match(/\.\.\//g).length + 1; | ||
|
||
for (i = 0; i < len; i += 1) { | ||
for (i = 0; i < len; ++i) { | ||
match = url.match(/\/[^\/]*$/); | ||
if (i < len - 1) { | ||
stripped = match[0] + stripped; | ||
|
@@ -111,9 +111,44 @@ | |
|
||
document.location.href = url; | ||
}); | ||
/** | ||
* Code from Stackoverflow to compute the Levenshtein distance between two strings | ||
* Written by Marco de Wit at http://stackoverflow.com/a/18514751/745719 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this going to cause a licensing issue? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did a little research into the matter, and from what I can tell, it shouldn't be an issue. http://meta.stackexchange.com/questions/25956/what-is-up-with-the-source-code-license-on-stack-overflow Basically, it looks like I need to update this with more licensing information (and maybe add this to the license exception file if there is one?) According to footer of stackoverflow, code on the site is licensed with CC BY-SA 3.0 which means that I should include a link to the license in the comment above it and provide proper attribution (which I think I did). Since the code is unmodified, I don't have to do anything else besides mention that it is unmodified, so I will make these changes and add them to the pull request then. If anyone knows of something else that needs to be done to be in compliance with this license, let me know and I'll update it. In the case that its fundamentally incompatible, I have no issues with writing my own implementation of it from scratch, but this one was one of the fastest JavaScript versions I could find. |
||
*/ | ||
var levenshtein = (function() { | ||
var row2 = []; | ||
return function(s1, s2) { | ||
if (s1 === s2) { | ||
return 0; | ||
} else { | ||
var s1_len = s1.length, s2_len = s2.length; | ||
if (s1_len && s2_len) { | ||
var i1 = 0, i2 = 0, a, b, c, c2, row = row2; | ||
while (i1 < s1_len) | ||
row[i1] = ++i1; | ||
while (i2 < s2_len) { | ||
c2 = s2.charCodeAt(i2); | ||
a = i2; | ||
++i2; | ||
b = i2; | ||
for (i1 = 0; i1 < s1_len; ++i1) { | ||
c = a + (s1.charCodeAt(i1) !== c2 ? 1 : 0); | ||
a = row[i1]; | ||
b = b < a ? (b < c ? b + 1 : c) : (a < c ? a + 1 : c); | ||
row[i1] = b; | ||
} | ||
} | ||
return b; | ||
} else { | ||
return s1_len + s2_len; | ||
} | ||
} | ||
}; | ||
})(); | ||
|
||
function initSearch(rawSearchIndex) { | ||
var currentResults, index, searchIndex; | ||
var MAX_LEV_DISTANCE = 3; | ||
var params = getQueryStringParams(); | ||
|
||
// Populate search bar with query string search term when provided, | ||
|
@@ -140,7 +175,7 @@ | |
split = valLower.split("::"); | ||
|
||
//remove empty keywords | ||
for (var j = 0; j < split.length; j++) { | ||
for (var j = 0; j < split.length; ++j) { | ||
split[j].toLowerCase(); | ||
if (split[j] === "") { | ||
split.splice(j, 1); | ||
|
@@ -153,7 +188,7 @@ | |
val.charAt(val.length - 1) === val.charAt(0)) | ||
{ | ||
val = val.substr(1, val.length - 2); | ||
for (var i = 0; i < nSearchWords; i += 1) { | ||
for (var i = 0; i < nSearchWords; ++i) { | ||
if (searchWords[i] === val) { | ||
// filter type: ... queries | ||
if (typeFilter < 0 || typeFilter === searchIndex[i].ty) { | ||
|
@@ -167,15 +202,31 @@ | |
} else { | ||
// gather matching search results up to a certain maximum | ||
val = val.replace(/\_/g, ""); | ||
for (var i = 0; i < split.length; i++) { | ||
for (var j = 0; j < nSearchWords; j += 1) { | ||
for (var i = 0; i < split.length; ++i) { | ||
for (var j = 0; j < nSearchWords; ++j) { | ||
var lev_distance; | ||
if (searchWords[j].indexOf(split[i]) > -1 || | ||
searchWords[j].indexOf(val) > -1 || | ||
searchWords[j].replace(/_/g, "").indexOf(val) > -1) | ||
{ | ||
// filter type: ... queries | ||
if (typeFilter < 0 || typeFilter === searchIndex[j].ty) { | ||
results.push({id: j, index: searchWords[j].replace(/_/g, "").indexOf(val)}); | ||
results.push({ | ||
id: j, | ||
index: searchWords[j].replace(/_/g, "").indexOf(val), | ||
lev: 0, | ||
}); | ||
} | ||
} else if ( | ||
(lev_distance = levenshtein(searchWords[j], val)) <= | ||
MAX_LEV_DISTANCE) { | ||
if (typeFilter < 0 || typeFilter === searchIndex[j].ty) { | ||
results.push({ | ||
id: j, | ||
index: 0, | ||
// we want lev results to go lower than others | ||
lev: lev_distance, | ||
}); | ||
} | ||
} | ||
if (results.length === max) { | ||
|
@@ -186,7 +237,7 @@ | |
} | ||
|
||
var nresults = results.length; | ||
for (var i = 0; i < nresults; i += 1) { | ||
for (var i = 0; i < nresults; ++i) { | ||
results[i].word = searchWords[results[i].id]; | ||
results[i].item = searchIndex[results[i].id] || {}; | ||
} | ||
|
@@ -198,6 +249,12 @@ | |
results.sort(function(aaa, bbb) { | ||
var a, b; | ||
|
||
// Sort by non levenshtein results and then levenshtein results by the distance | ||
// (less changes required to match means higher rankings) | ||
a = (aaa.lev); | ||
b = (bbb.lev); | ||
if (a !== b) return a - b; | ||
|
||
// sort by crate (non-current crate goes later) | ||
a = (aaa.item.crate !== window.currentCrate); | ||
b = (bbb.item.crate !== window.currentCrate); | ||
|
@@ -255,7 +312,7 @@ | |
results[i].id = -1; | ||
} | ||
} | ||
for (var i = 0; i < results.length; i++) { | ||
for (var i = 0; i < results.length; ++i) { | ||
var result = results[i], | ||
name = result.item.name.toLowerCase(), | ||
path = result.item.path.toLowerCase(), | ||
|
@@ -285,38 +342,23 @@ | |
* @return {[boolean]} [Whether the result is valid or not] | ||
*/ | ||
function validateResult(name, path, keys, parent) { | ||
//initially valid | ||
var validate = true; | ||
//if there is a parent, then validate against parent | ||
if (parent !== undefined) { | ||
for (var i = 0; i < keys.length; i++) { | ||
// if previous keys are valid and current key is in the | ||
// path, name or parent | ||
if ((validate) && | ||
(name.toLowerCase().indexOf(keys[i]) > -1 || | ||
path.toLowerCase().indexOf(keys[i]) > -1 || | ||
parent.name.toLowerCase().indexOf(keys[i]) > -1)) | ||
{ | ||
validate = true; | ||
} else { | ||
validate = false; | ||
} | ||
} | ||
} else { | ||
for (var i = 0; i < keys.length; i++) { | ||
// if previous keys are valid and current key is in the | ||
// path, name | ||
if ((validate) && | ||
(name.toLowerCase().indexOf(keys[i]) > -1 || | ||
path.toLowerCase().indexOf(keys[i]) > -1)) | ||
{ | ||
validate = true; | ||
} else { | ||
validate = false; | ||
} | ||
for (var i=0; i < keys.length; ++i) { | ||
// each check is for validation so we negate the conditions and invalidate | ||
if (!( | ||
// check for an exact name match | ||
name.toLowerCase().indexOf(keys[i]) > -1 || | ||
// then an exact path match | ||
path.toLowerCase().indexOf(keys[i]) > -1 || | ||
// next if there is a parent, check for exact parent match | ||
(parent !== undefined && | ||
parent.name.toLowerCase().indexOf(keys[i]) > -1) || | ||
// lastly check to see if the name was a levenshtein match | ||
levenshtein(name.toLowerCase(), keys[i]) <= | ||
MAX_LEV_DISTANCE)) { | ||
return false; | ||
} | ||
} | ||
return validate; | ||
return true; | ||
} | ||
|
||
function getQuery() { | ||
|
@@ -496,7 +538,7 @@ | |
|
||
resultIndex = execQuery(query, 20000, index); | ||
len = resultIndex.length; | ||
for (i = 0; i < len; i += 1) { | ||
for (i = 0; i < len; ++i) { | ||
if (resultIndex[i].id > -1) { | ||
obj = searchIndex[resultIndex[i].id]; | ||
filterdata.push([obj.name, obj.ty, obj.path, obj.desc]); | ||
|
@@ -568,7 +610,7 @@ | |
// faster analysis operations | ||
var len = items.length; | ||
var lastPath = ""; | ||
for (var i = 0; i < len; i += 1) { | ||
for (var i = 0; i < len; ++i) { | ||
var rawRow = items[i]; | ||
var row = {crate: crate, ty: rawRow[0], name: rawRow[1], | ||
path: rawRow[2] || lastPath, desc: rawRow[3], | ||
|
@@ -640,7 +682,7 @@ | |
crates.push(crate); | ||
} | ||
crates.sort(); | ||
for (var i = 0; i < crates.length; i++) { | ||
for (var i = 0; i < crates.length; ++i) { | ||
var klass = 'crate'; | ||
if (crates[i] == window.currentCrate) { | ||
klass += ' current'; | ||
|
@@ -657,10 +699,10 @@ | |
window.register_implementors = function(imp) { | ||
var list = $('#implementors-list'); | ||
var libs = Object.getOwnPropertyNames(imp); | ||
for (var i = 0; i < libs.length; i++) { | ||
for (var i = 0; i < libs.length; ++i) { | ||
if (libs[i] == currentCrate) continue; | ||
var structs = imp[libs[i]]; | ||
for (var j = 0; j < structs.length; j++) { | ||
for (var j = 0; j < structs.length; ++j) { | ||
var code = $('<code>').append(structs[j]); | ||
$.each(code.find('a'), function(idx, a) { | ||
var href = $(a).attr('href'); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Feels a bit funny to see this in a diff since Rust doesn't have this.