1
Fork 0

rustdoc-search: switch to recursive backtracking

This is significantly faster, because

- It allows the one-element fast path to kick in on multi-
  element queries.
- It constructs intermediate data structures more lazily
  than the old system did.

It's measurably faster than the old algo even without the fast path, but
that fast path still helps significantly.
This commit is contained in:
Michael Howell 2023-11-18 12:31:46 -07:00
parent a66972d551
commit d82b3748d5

View file

@ -1331,7 +1331,7 @@ function initSearch(rawSearchIndex) {
/** /**
* @type Map<integer, integer>|null * @type Map<integer, integer>|null
*/ */
let mgens = mgensIn === null ? null : new Map(mgensIn); const mgens = mgensIn === null ? null : new Map(mgensIn);
if (queryElems.length === 0) { if (queryElems.length === 0) {
return !solutionCb || solutionCb(mgens); return !solutionCb || solutionCb(mgens);
} }
@ -1339,10 +1339,10 @@ function initSearch(rawSearchIndex) {
return false; return false;
} }
const ql = queryElems.length; const ql = queryElems.length;
let fl = fnTypesIn.length; const fl = fnTypesIn.length;
// Fast path // One element fast path / base case
if (queryElems.length === 1 && queryElems[0].generics.length === 0) { if (ql === 1 && queryElems[0].generics.length === 0) {
const queryElem = queryElems[0]; const queryElem = queryElems[0];
for (const fnType of fnTypesIn) { for (const fnType of fnTypesIn) {
if (!unifyFunctionTypeIsMatchCandidate(fnType, queryElem, whereClause, mgens)) { if (!unifyFunctionTypeIsMatchCandidate(fnType, queryElem, whereClause, mgens)) {
@ -1396,183 +1396,113 @@ function initSearch(rawSearchIndex) {
return false; return false;
} }
// Slow path // Multiple element recursive case
/** /**
* @type Array<FunctionType> * @type Array<FunctionType>
*/ */
let fnTypes = fnTypesIn.slice(); const fnTypes = fnTypesIn.slice();
/** /**
* loop works by building up a solution set in the working arrays * Algorithm works by building up a solution set in the working arrays
* fnTypes gets mutated in place to make this work, while queryElems * fnTypes gets mutated in place to make this work, while queryElems
* is left alone * is left alone.
* *
* vvvvvvv `i` points here * It works backwards, because arrays can be cheaply truncated that way.
* queryElems = [ good, good, good, unknown, unknown ], *
* fnTypes = [ good, good, good, unknown, unknown ], * vvvvvvv `queryElem`
* ---------------- ^^^^^^^^^^^^^^^^ `j` iterates after `i`, * queryElems = [ unknown, unknown, good, good, good ]
* | looking for candidates * fnTypes = [ unknown, unknown, good, good, good ]
* everything before `i` is the * ^^^^^^^^^^^^^^^^ loop over these elements to find candidates
* current working solution
* *
* Everything in the current working solution is known to be a good * Everything in the current working solution is known to be a good
* match, but it might not be the match we wind up going with, because * match, but it might not be the match we wind up going with, because
* there might be more than one candidate match, and we need to try them all * there might be more than one candidate match, and we need to try them all
* before giving up. So, to handle this, it backtracks on failure. * before giving up. So, to handle this, it backtracks on failure.
*
* @type Array<{
* "fnTypesScratch": Array<FunctionType>,
* "queryElemsOffset": integer,
* "fnTypesOffset": integer
* }>
*/ */
const backtracking = []; const flast = fl - 1;
let i = 0; const qlast = ql - 1;
let j = 0; const queryElem = queryElems[qlast];
const backtrack = () => { let queryElemsTmp = null;
while (backtracking.length !== 0) { for (let i = flast; i >= 0; i -= 1) {
// this session failed, but there are other possible solutions const fnType = fnTypes[i];
// to backtrack, reset to (a copy of) the old array, do the swap or unboxing if (!unifyFunctionTypeIsMatchCandidate(fnType, queryElem, whereClause, mgens)) {
const { continue;
fnTypesScratch,
mgensScratch,
queryElemsOffset,
fnTypesOffset,
unbox,
} = backtracking.pop();
mgens = mgensScratch !== null ? new Map(mgensScratch) : null;
const fnType = fnTypesScratch[fnTypesOffset];
const queryElem = queryElems[queryElemsOffset];
if (unbox) {
if (fnType.id < 0) {
if (mgens === null) {
mgens = new Map();
} else if (mgens.has(fnType.id) && mgens.get(fnType.id) !== 0) {
continue;
}
mgens.set(fnType.id, 0);
}
const generics = fnType.id < 0 ?
whereClause[(-fnType.id) - 1] :
fnType.generics;
fnTypes = fnTypesScratch.toSpliced(fnTypesOffset, 1, ...generics);
fl = fnTypes.length;
// re-run the matching algorithm on this item
i = queryElemsOffset - 1;
} else {
if (fnType.id < 0) {
if (mgens === null) {
mgens = new Map();
} else if (mgens.has(fnType.id) &&
mgens.get(fnType.id) !== queryElem.id) {
continue;
}
mgens.set(fnType.id, queryElem.id);
}
fnTypes = fnTypesScratch.slice();
fl = fnTypes.length;
const tmp = fnTypes[queryElemsOffset];
fnTypes[queryElemsOffset] = fnTypes[fnTypesOffset];
fnTypes[fnTypesOffset] = tmp;
// this is known as a good match; go to the next one
i = queryElemsOffset;
}
return true;
} }
return false; let mgensScratch;
}; if (fnType.id < 0) {
for (i = 0; i !== ql; ++i) { mgensScratch = new Map(mgens);
const queryElem = queryElems[i]; if (mgensScratch.has(fnType.id)
/** && mgensScratch.get(fnType.id) !== queryElem.id) {
* list of potential function types that go with the current query element. continue;
* @type Array<integer> }
*/ mgensScratch.set(fnType.id, queryElem.id);
const matchCandidates = []; } else {
let fnTypesScratch = null; mgensScratch = mgens;
let mgensScratch = null; }
// don't try anything before `i`, because they've already been // fnTypes[i] is a potential match
// paired off with the other query elements // fnTypes[flast] is the last item in the list
for (j = i; j !== fl; ++j) { // swap them, and drop the potential match from the list
const fnType = fnTypes[j]; // check if the remaining function types also match
if (unifyFunctionTypeIsMatchCandidate(fnType, queryElem, whereClause, mgens)) { fnTypes[i] = fnTypes[flast];
if (!fnTypesScratch) { fnTypes.length = flast;
fnTypesScratch = fnTypes.slice(); if (!queryElemsTmp) {
queryElemsTmp = queryElems.slice(0, qlast);
}
const passesUnification = unifyFunctionTypes(
fnTypes,
queryElemsTmp,
whereClause,
mgensScratch,
mgensScratch => {
if (fnType.generics.length === 0 && queryElem.generics.length === 0) {
return !solutionCb || solutionCb(mgensScratch);
} }
unifyFunctionTypes( return unifyFunctionTypes(
fnType.generics, fnType.generics,
queryElem.generics, queryElem.generics,
whereClause, whereClause,
mgens, mgensScratch,
mgensScratch => { solutionCb
matchCandidates.push({
fnTypesScratch,
mgensScratch,
queryElemsOffset: i,
fnTypesOffset: j,
unbox: false,
});
return false; // "reject" all candidates to gather all of them
}
); );
} }
if (unifyFunctionTypeIsUnboxCandidate(fnType, queryElem, whereClause, mgens)) { );
if (!fnTypesScratch) { if (passesUnification) {
fnTypesScratch = fnTypes.slice(); return true;
}
if (!mgensScratch && mgens !== null) {
mgensScratch = new Map(mgens);
}
backtracking.push({
fnTypesScratch,
mgensScratch,
queryElemsOffset: i,
fnTypesOffset: j,
unbox: true,
});
}
} }
if (matchCandidates.length === 0) { // backtrack
if (backtrack()) { fnTypes[flast] = fnTypes[i];
fnTypes[i] = fnType;
fnTypes.length = fl;
}
for (let i = flast; i >= 0; i -= 1) {
const fnType = fnTypes[i];
if (!unifyFunctionTypeIsUnboxCandidate(fnType, queryElem, whereClause, mgens)) {
continue;
}
let mgensScratch;
if (fnType.id < 0) {
mgensScratch = new Map(mgens);
if (mgensScratch.has(fnType.id) && mgensScratch.get(fnType.id) !== 0) {
continue; continue;
} else {
return false;
} }
mgensScratch.set(fnType.id, 0);
} else {
mgensScratch = mgens;
} }
// use the current candidate const generics = fnType.id < 0 ?
const {fnTypesOffset: candidate, mgensScratch: mgensNew} = matchCandidates.pop(); whereClause[(-fnType.id) - 1] :
if (fnTypes[candidate].id < 0 && queryElems[i].id < 0) { fnType.generics;
if (mgens === null) { const passesUnification = unifyFunctionTypes(
mgens = new Map(); fnTypes.toSpliced(i, 1, ...generics),
} queryElems,
mgens.set(fnTypes[candidate].id, queryElems[i].id); whereClause,
} mgensScratch,
if (mgensNew !== null) { solutionCb
if (mgens === null) { );
mgens = mgensNew; if (passesUnification) {
} else { return true;
for (const [fid, qid] of mgensNew) {
mgens.set(fid, qid);
}
}
}
// `i` and `j` are paired off
// `queryElems[i]` is left in place
// `fnTypes[j]` is swapped with `fnTypes[i]` to pair them off
const tmp = fnTypes[candidate];
fnTypes[candidate] = fnTypes[i];
fnTypes[i] = tmp;
// write other candidates to backtracking queue
for (const otherCandidate of matchCandidates) {
backtracking.push(otherCandidate);
}
// If we're on the last item, check the solution with the callback
// backtrack if the callback says its unsuitable
while (i === (ql - 1) && solutionCb && !solutionCb(mgens)) {
if (!backtrack()) {
return false;
}
} }
} }
return true; return false;
} }
function unifyFunctionTypeIsMatchCandidate(fnType, queryElem, whereClause, mgens) { function unifyFunctionTypeIsMatchCandidate(fnType, queryElem, whereClause, mgens) {
// type filters look like `trait:Read` or `enum:Result` // type filters look like `trait:Read` or `enum:Result`