Fix out of memory in hash computation

This commit is contained in:
Edoardo Pirovano 2021-06-07 12:07:56 +01:00 committed by Edoardo Pirovano
parent 242fd828aa
commit 02e8dcfe9c
10 changed files with 151 additions and 91 deletions

34
lib/fingerprints.js generated
View file

@ -36,9 +36,9 @@ function computeFirstMod() {
* the hashes of the lines near the end of the file.
*
* @param callback function that is called with the line number (1-based) and hash for every line
* @param input The file's contents
* @param filepath The path to the file to hash
*/
function hash(callback, input) {
async function hash(callback, filepath) {
// A rolling view in to the input
const window = Array(BLOCK_SIZE).fill(0);
// If the character in the window is the start of a new line
@ -82,12 +82,11 @@ function hash(callback, input) {
// as we go. Once we reach a point in the window again then we've processed
// BLOCK_SIZE characters and if the last character at this point in the window
// was the start of a line then we should output the hash for that line.
for (let i = 0, len = input.length; i <= len; i++) {
let current = i === len ? 65535 : input.charCodeAt(i);
const processCharacter = function (current) {
// skip tabs, spaces, and line feeds that come directly after a carriage return
if (current === space || current === tab || (prevCR && current === lf)) {
prevCR = false;
continue;
return;
}
// replace CR with LF
if (current === cr) {
@ -109,7 +108,18 @@ function hash(callback, input) {
lineStart = true;
}
updateHash(current);
}
};
await new Promise((fulfill) => {
const readStream = fs.createReadStream(filepath, "utf8");
readStream.on("close", fulfill);
readStream.on("end", () => {
processCharacter(65535);
});
readStream.on("data", (data) => {
for (let i = 0; i < data.length; ++i)
processCharacter(data.charCodeAt(i));
});
});
// Flush the remaining lines
for (let i = 0; i < BLOCK_SIZE; i++) {
if (lineNumbers[index] !== -1) {
@ -206,8 +216,8 @@ function resolveUriToFile(location, artifacts, checkoutPath, logger) {
exports.resolveUriToFile = resolveUriToFile;
// Compute fingerprints for results in the given sarif file
// and return an updated sarif file contents.
function addFingerprints(sarifContents, checkoutPath, logger) {
var _a, _b;
async function addFingerprints(sarifContents, checkoutPath, logger) {
var _a, _b, _c, _d, _e;
const sarif = JSON.parse(sarifContents);
// Gather together results for the same file and construct
// callbacks to accept hashes for that file and update the location
@ -222,6 +232,11 @@ function addFingerprints(sarifContents, checkoutPath, logger) {
logger.debug(`Unable to compute fingerprint for invalid location: ${JSON.stringify(primaryLocation)}`);
continue;
}
if (typeof ((_e = (_d = (_c = primaryLocation) === null || _c === void 0 ? void 0 : _c.physicalLocation) === null || _d === void 0 ? void 0 : _d.region) === null || _e === void 0 ? void 0 : _e.startLine) ===
"undefined") {
// Locations without a line number are unlikely to be source files
continue;
}
const filepath = resolveUriToFile(primaryLocation.physicalLocation.artifactLocation, artifacts, checkoutPath, logger);
if (!filepath) {
continue;
@ -240,8 +255,7 @@ function addFingerprints(sarifContents, checkoutPath, logger) {
c(lineNumber, hashValue);
}
};
const fileContents = fs.readFileSync(filepath).toString();
hash(teeCallback, fileContents);
await hash(teeCallback, filepath);
}
return JSON.stringify(sarif);
}