Fix out of memory in hash computation

This commit is contained in:
Edoardo Pirovano 2021-06-07 12:07:56 +01:00 committed by Edoardo Pirovano
parent 242fd828aa
commit 02e8dcfe9c
10 changed files with 151 additions and 91 deletions

34
lib/fingerprints.js generated
View file

@ -36,9 +36,9 @@ function computeFirstMod() {
* the hashes of the lines near the end of the file.
*
* @param callback function that is called with the line number (1-based) and hash for every line
* @param input The file's contents
* @param filepath The path to the file to hash
*/
function hash(callback, input) {
async function hash(callback, filepath) {
// A rolling view in to the input
const window = Array(BLOCK_SIZE).fill(0);
// If the character in the window is the start of a new line
@ -82,12 +82,11 @@ function hash(callback, input) {
// as we go. Once we reach a point in the window again then we've processed
// BLOCK_SIZE characters and if the last character at this point in the window
// was the start of a line then we should output the hash for that line.
for (let i = 0, len = input.length; i <= len; i++) {
let current = i === len ? 65535 : input.charCodeAt(i);
const processCharacter = function (current) {
// skip tabs, spaces, and line feeds that come directly after a carriage return
if (current === space || current === tab || (prevCR && current === lf)) {
prevCR = false;
continue;
return;
}
// replace CR with LF
if (current === cr) {
@ -109,7 +108,18 @@ function hash(callback, input) {
lineStart = true;
}
updateHash(current);
}
};
await new Promise((fulfill) => {
const readStream = fs.createReadStream(filepath, "utf8");
readStream.on("close", fulfill);
readStream.on("end", () => {
processCharacter(65535);
});
readStream.on("data", (data) => {
for (let i = 0; i < data.length; ++i)
processCharacter(data.charCodeAt(i));
});
});
// Flush the remaining lines
for (let i = 0; i < BLOCK_SIZE; i++) {
if (lineNumbers[index] !== -1) {
@ -206,8 +216,8 @@ function resolveUriToFile(location, artifacts, checkoutPath, logger) {
exports.resolveUriToFile = resolveUriToFile;
// Compute fingerprints for results in the given sarif file
// and return an updated sarif file contents.
function addFingerprints(sarifContents, checkoutPath, logger) {
var _a, _b;
async function addFingerprints(sarifContents, checkoutPath, logger) {
var _a, _b, _c, _d, _e;
const sarif = JSON.parse(sarifContents);
// Gather together results for the same file and construct
// callbacks to accept hashes for that file and update the location
@ -222,6 +232,11 @@ function addFingerprints(sarifContents, checkoutPath, logger) {
logger.debug(`Unable to compute fingerprint for invalid location: ${JSON.stringify(primaryLocation)}`);
continue;
}
if (typeof ((_e = (_d = (_c = primaryLocation) === null || _c === void 0 ? void 0 : _c.physicalLocation) === null || _d === void 0 ? void 0 : _d.region) === null || _e === void 0 ? void 0 : _e.startLine) ===
"undefined") {
// Locations without a line number are unlikely to be source files
continue;
}
const filepath = resolveUriToFile(primaryLocation.physicalLocation.artifactLocation, artifacts, checkoutPath, logger);
if (!filepath) {
continue;
@ -240,8 +255,7 @@ function addFingerprints(sarifContents, checkoutPath, logger) {
c(lineNumber, hashValue);
}
};
const fileContents = fs.readFileSync(filepath).toString();
hash(teeCallback, fileContents);
await hash(teeCallback, filepath);
}
return JSON.stringify(sarif);
}

File diff suppressed because one or more lines are too long

View file

@ -16,28 +16,33 @@ const ava_1 = __importDefault(require("ava"));
const fingerprints = __importStar(require("./fingerprints"));
const logging_1 = require("./logging");
const testing_utils_1 = require("./testing-utils");
const util = __importStar(require("./util"));
testing_utils_1.setupTests(ava_1.default);
function testHash(t, input, expectedHashes) {
let index = 0;
const callback = function (lineNumber, hash) {
t.is(lineNumber, index + 1);
t.is(hash, expectedHashes[index]);
index++;
};
fingerprints.hash(callback, input);
t.is(index, input.split(/\r\n|\r|\n/).length);
async function testHash(t, input, expectedHashes) {
await util.withTmpDir(async (tmpDir) => {
const tmpFile = path.resolve(tmpDir, "testfile");
fs.writeFileSync(tmpFile, input);
let index = 0;
const callback = function (lineNumber, hash) {
t.is(lineNumber, index + 1);
t.is(hash, expectedHashes[index]);
index++;
};
await fingerprints.hash(callback, tmpFile);
t.is(index, input.split(/\r\n|\r|\n/).length);
});
}
ava_1.default("hash", (t) => {
ava_1.default("hash", async (t) => {
// Try empty file
testHash(t, "", ["c129715d7a2bc9a3:1"]);
await testHash(t, "", ["c129715d7a2bc9a3:1"]);
// Try various combinations of newline characters
testHash(t, " a\nb\n \t\tc\n d", [
await testHash(t, " a\nb\n \t\tc\n d", [
"271789c17abda88f:1",
"54703d4cd895b18:1",
"180aee12dab6264:1",
"a23a3dc5e078b07b:1",
]);
testHash(t, " hello; \t\nworld!!!\n\n\n \t\tGreetings\n End", [
await testHash(t, " hello; \t\nworld!!!\n\n\n \t\tGreetings\n End", [
"8b7cf3e952e7aeb2:1",
"b1ae1287ec4718d9:1",
"bff680108adb0fcc:1",
@ -45,7 +50,7 @@ ava_1.default("hash", (t) => {
"b86d3392aea1be30:1",
"e6ceba753e1a442:1",
]);
testHash(t, " hello; \t\nworld!!!\n\n\n \t\tGreetings\n End\n", [
await testHash(t, " hello; \t\nworld!!!\n\n\n \t\tGreetings\n End\n", [
"e9496ae3ebfced30:1",
"fb7c023a8b9ccb3f:1",
"ce8ba1a563dcdaca:1",
@ -54,7 +59,7 @@ ava_1.default("hash", (t) => {
"c8e28b0b4002a3a0:1",
"c129715d7a2bc9a3:1",
]);
testHash(t, " hello; \t\nworld!!!\r\r\r \t\tGreetings\r End\r", [
await testHash(t, " hello; \t\nworld!!!\r\r\r \t\tGreetings\r End\r", [
"e9496ae3ebfced30:1",
"fb7c023a8b9ccb3f:1",
"ce8ba1a563dcdaca:1",
@ -63,7 +68,7 @@ ava_1.default("hash", (t) => {
"c8e28b0b4002a3a0:1",
"c129715d7a2bc9a3:1",
]);
testHash(t, " hello; \t\r\nworld!!!\r\n\r\n\r\n \t\tGreetings\r\n End\r\n", [
await testHash(t, " hello; \t\r\nworld!!!\r\n\r\n\r\n \t\tGreetings\r\n End\r\n", [
"e9496ae3ebfced30:1",
"fb7c023a8b9ccb3f:1",
"ce8ba1a563dcdaca:1",
@ -72,7 +77,7 @@ ava_1.default("hash", (t) => {
"c8e28b0b4002a3a0:1",
"c129715d7a2bc9a3:1",
]);
testHash(t, " hello; \t\nworld!!!\r\n\n\r \t\tGreetings\r End\r\n", [
await testHash(t, " hello; \t\nworld!!!\r\n\n\r \t\tGreetings\r End\r\n", [
"e9496ae3ebfced30:1",
"fb7c023a8b9ccb3f:1",
"ce8ba1a563dcdaca:1",
@ -82,7 +87,7 @@ ava_1.default("hash", (t) => {
"c129715d7a2bc9a3:1",
]);
// Try repeating line that will generate identical hashes
testHash(t, "Lorem ipsum dolor sit amet.\n".repeat(10), [
await testHash(t, "Lorem ipsum dolor sit amet.\n".repeat(10), [
"a7f2ff13bc495cf2:1",
"a7f2ff13bc495cf2:2",
"a7f2ff13bc495cf2:3",
@ -95,7 +100,7 @@ ava_1.default("hash", (t) => {
"cc97dc7b1d7d8f7b:1",
"c129715d7a2bc9a3:1",
]);
testHash(t, "x = 2\nx = 1\nprint(x)\nx = 3\nprint(x)\nx = 4\nprint(x)\n", [
await testHash(t, "x = 2\nx = 1\nprint(x)\nx = 3\nprint(x)\nx = 4\nprint(x)\n", [
"e54938cc54b302f1:1",
"bb609acbe9138d60:1",
"1131fd5871777f34:1",
@ -150,7 +155,7 @@ ava_1.default("resolveUriToFile", (t) => {
t.is(testResolveUriToFile(dirpath, undefined, []), undefined);
t.is(testResolveUriToFile(`file://${dirpath}`, undefined, []), undefined);
});
ava_1.default("addFingerprints", (t) => {
ava_1.default("addFingerprints", async (t) => {
// Run an end-to-end test on a test file
let input = fs
.readFileSync(`${__dirname}/../src/testdata/fingerprinting.input.sarif`)
@ -163,9 +168,9 @@ ava_1.default("addFingerprints", (t) => {
expected = JSON.stringify(JSON.parse(expected));
// The URIs in the SARIF files resolve to files in the testdata directory
const checkoutPath = path.normalize(`${__dirname}/../src/testdata`);
t.deepEqual(fingerprints.addFingerprints(input, checkoutPath, logging_1.getRunnerLogger(true)), expected);
t.deepEqual(await fingerprints.addFingerprints(input, checkoutPath, logging_1.getRunnerLogger(true)), expected);
});
ava_1.default("missingRegions", (t) => {
ava_1.default("missingRegions", async (t) => {
// Run an end-to-end test on a test file
let input = fs
.readFileSync(`${__dirname}/../src/testdata/fingerprinting2.input.sarif`)
@ -178,6 +183,6 @@ ava_1.default("missingRegions", (t) => {
expected = JSON.stringify(JSON.parse(expected));
// The URIs in the SARIF files resolve to files in the testdata directory
const checkoutPath = path.normalize(`${__dirname}/../src/testdata`);
t.deepEqual(fingerprints.addFingerprints(input, checkoutPath, logging_1.getRunnerLogger(true)), expected);
t.deepEqual(await fingerprints.addFingerprints(input, checkoutPath, logging_1.getRunnerLogger(true)), expected);
});
//# sourceMappingURL=fingerprints.test.js.map

File diff suppressed because one or more lines are too long

2
lib/upload-lib.js generated
View file

@ -243,7 +243,7 @@ async function uploadFiles(sarifFiles, repositoryNwo, commitOid, ref, analysisKe
validateSarifFileSchema(file, logger);
}
let sarifPayload = combineSarifFiles(sarifFiles);
sarifPayload = fingerprints.addFingerprints(sarifPayload, checkoutPath, logger);
sarifPayload = await fingerprints.addFingerprints(sarifPayload, checkoutPath, logger);
sarifPayload = populateRunAutomationDetails(sarifPayload, category, analysisKey, environment);
const zippedSarif = zlib_1.default.gzipSync(sarifPayload).toString("base64");
const checkoutURI = file_url_1.default(checkoutPath);

File diff suppressed because one or more lines are too long

View file

@ -7,32 +7,41 @@ import test from "ava";
import * as fingerprints from "./fingerprints";
import { getRunnerLogger } from "./logging";
import { setupTests } from "./testing-utils";
import * as util from "./util";
setupTests(test);
function testHash(t: ava.Assertions, input: string, expectedHashes: string[]) {
let index = 0;
const callback = function (lineNumber: number, hash: string) {
t.is(lineNumber, index + 1);
t.is(hash, expectedHashes[index]);
index++;
};
fingerprints.hash(callback, input);
t.is(index, input.split(/\r\n|\r|\n/).length);
async function testHash(
t: ava.Assertions,
input: string,
expectedHashes: string[]
) {
await util.withTmpDir(async (tmpDir) => {
const tmpFile = path.resolve(tmpDir, "testfile");
fs.writeFileSync(tmpFile, input);
let index = 0;
const callback = function (lineNumber: number, hash: string) {
t.is(lineNumber, index + 1);
t.is(hash, expectedHashes[index]);
index++;
};
await fingerprints.hash(callback, tmpFile);
t.is(index, input.split(/\r\n|\r|\n/).length);
});
}
test("hash", (t: ava.Assertions) => {
test("hash", async (t: ava.Assertions) => {
// Try empty file
testHash(t, "", ["c129715d7a2bc9a3:1"]);
await testHash(t, "", ["c129715d7a2bc9a3:1"]);
// Try various combinations of newline characters
testHash(t, " a\nb\n \t\tc\n d", [
await testHash(t, " a\nb\n \t\tc\n d", [
"271789c17abda88f:1",
"54703d4cd895b18:1",
"180aee12dab6264:1",
"a23a3dc5e078b07b:1",
]);
testHash(t, " hello; \t\nworld!!!\n\n\n \t\tGreetings\n End", [
await testHash(t, " hello; \t\nworld!!!\n\n\n \t\tGreetings\n End", [
"8b7cf3e952e7aeb2:1",
"b1ae1287ec4718d9:1",
"bff680108adb0fcc:1",
@ -40,7 +49,7 @@ test("hash", (t: ava.Assertions) => {
"b86d3392aea1be30:1",
"e6ceba753e1a442:1",
]);
testHash(t, " hello; \t\nworld!!!\n\n\n \t\tGreetings\n End\n", [
await testHash(t, " hello; \t\nworld!!!\n\n\n \t\tGreetings\n End\n", [
"e9496ae3ebfced30:1",
"fb7c023a8b9ccb3f:1",
"ce8ba1a563dcdaca:1",
@ -49,7 +58,7 @@ test("hash", (t: ava.Assertions) => {
"c8e28b0b4002a3a0:1",
"c129715d7a2bc9a3:1",
]);
testHash(t, " hello; \t\nworld!!!\r\r\r \t\tGreetings\r End\r", [
await testHash(t, " hello; \t\nworld!!!\r\r\r \t\tGreetings\r End\r", [
"e9496ae3ebfced30:1",
"fb7c023a8b9ccb3f:1",
"ce8ba1a563dcdaca:1",
@ -58,16 +67,20 @@ test("hash", (t: ava.Assertions) => {
"c8e28b0b4002a3a0:1",
"c129715d7a2bc9a3:1",
]);
testHash(t, " hello; \t\r\nworld!!!\r\n\r\n\r\n \t\tGreetings\r\n End\r\n", [
"e9496ae3ebfced30:1",
"fb7c023a8b9ccb3f:1",
"ce8ba1a563dcdaca:1",
"e20e36e16fcb0cc8:1",
"b3edc88f2938467e:1",
"c8e28b0b4002a3a0:1",
"c129715d7a2bc9a3:1",
]);
testHash(t, " hello; \t\nworld!!!\r\n\n\r \t\tGreetings\r End\r\n", [
await testHash(
t,
" hello; \t\r\nworld!!!\r\n\r\n\r\n \t\tGreetings\r\n End\r\n",
[
"e9496ae3ebfced30:1",
"fb7c023a8b9ccb3f:1",
"ce8ba1a563dcdaca:1",
"e20e36e16fcb0cc8:1",
"b3edc88f2938467e:1",
"c8e28b0b4002a3a0:1",
"c129715d7a2bc9a3:1",
]
);
await testHash(t, " hello; \t\nworld!!!\r\n\n\r \t\tGreetings\r End\r\n", [
"e9496ae3ebfced30:1",
"fb7c023a8b9ccb3f:1",
"ce8ba1a563dcdaca:1",
@ -78,7 +91,7 @@ test("hash", (t: ava.Assertions) => {
]);
// Try repeating line that will generate identical hashes
testHash(t, "Lorem ipsum dolor sit amet.\n".repeat(10), [
await testHash(t, "Lorem ipsum dolor sit amet.\n".repeat(10), [
"a7f2ff13bc495cf2:1",
"a7f2ff13bc495cf2:2",
"a7f2ff13bc495cf2:3",
@ -92,16 +105,20 @@ test("hash", (t: ava.Assertions) => {
"c129715d7a2bc9a3:1",
]);
testHash(t, "x = 2\nx = 1\nprint(x)\nx = 3\nprint(x)\nx = 4\nprint(x)\n", [
"e54938cc54b302f1:1",
"bb609acbe9138d60:1",
"1131fd5871777f34:1",
"5c482a0f8b35ea28:1",
"54517377da7028d2:1",
"2c644846cb18d53e:1",
"f1b89f20de0d133:1",
"c129715d7a2bc9a3:1",
]);
await testHash(
t,
"x = 2\nx = 1\nprint(x)\nx = 3\nprint(x)\nx = 4\nprint(x)\n",
[
"e54938cc54b302f1:1",
"bb609acbe9138d60:1",
"1131fd5871777f34:1",
"5c482a0f8b35ea28:1",
"54517377da7028d2:1",
"2c644846cb18d53e:1",
"f1b89f20de0d133:1",
"c129715d7a2bc9a3:1",
]
);
});
function testResolveUriToFile(uri: any, index: any, artifactsURIs: any[]) {
@ -170,7 +187,7 @@ test("resolveUriToFile", (t) => {
t.is(testResolveUriToFile(`file://${dirpath}`, undefined, []), undefined);
});
test("addFingerprints", (t) => {
test("addFingerprints", async (t) => {
// Run an end-to-end test on a test file
let input = fs
.readFileSync(`${__dirname}/../src/testdata/fingerprinting.input.sarif`)
@ -187,12 +204,16 @@ test("addFingerprints", (t) => {
const checkoutPath = path.normalize(`${__dirname}/../src/testdata`);
t.deepEqual(
fingerprints.addFingerprints(input, checkoutPath, getRunnerLogger(true)),
await fingerprints.addFingerprints(
input,
checkoutPath,
getRunnerLogger(true)
),
expected
);
});
test("missingRegions", (t) => {
test("missingRegions", async (t) => {
// Run an end-to-end test on a test file
let input = fs
.readFileSync(`${__dirname}/../src/testdata/fingerprinting2.input.sarif`)
@ -209,7 +230,11 @@ test("missingRegions", (t) => {
const checkoutPath = path.normalize(`${__dirname}/../src/testdata`);
t.deepEqual(
fingerprints.addFingerprints(input, checkoutPath, getRunnerLogger(true)),
await fingerprints.addFingerprints(
input,
checkoutPath,
getRunnerLogger(true)
),
expected
);
});

View file

@ -34,9 +34,9 @@ type hashCallback = (lineNumber: number, hash: string) => void;
* the hashes of the lines near the end of the file.
*
* @param callback function that is called with the line number (1-based) and hash for every line
* @param input The file's contents
* @param filepath The path to the file to hash
*/
export function hash(callback: hashCallback, input: string) {
export async function hash(callback: hashCallback, filepath: string) {
// A rolling view in to the input
const window = Array(BLOCK_SIZE).fill(0);
@ -87,12 +87,11 @@ export function hash(callback: hashCallback, input: string) {
// as we go. Once we reach a point in the window again then we've processed
// BLOCK_SIZE characters and if the last character at this point in the window
// was the start of a line then we should output the hash for that line.
for (let i = 0, len = input.length; i <= len; i++) {
let current = i === len ? 65535 : input.charCodeAt(i);
const processCharacter = function (current: number) {
// skip tabs, spaces, and line feeds that come directly after a carriage return
if (current === space || current === tab || (prevCR && current === lf)) {
prevCR = false;
continue;
return;
}
// replace CR with LF
if (current === cr) {
@ -113,7 +112,19 @@ export function hash(callback: hashCallback, input: string) {
lineStart = true;
}
updateHash(current);
}
};
await new Promise((fulfill) => {
const readStream = fs.createReadStream(filepath, "utf8");
readStream.on("close", fulfill);
readStream.on("end", () => {
processCharacter(65535);
});
readStream.on("data", (data) => {
for (let i = 0; i < data.length; ++i)
processCharacter(data.charCodeAt(i));
});
});
// Flush the remaining lines
for (let i = 0; i < BLOCK_SIZE; i++) {
@ -237,11 +248,11 @@ export function resolveUriToFile(
// Compute fingerprints for results in the given sarif file
// and return an updated sarif file contents.
export function addFingerprints(
export async function addFingerprints(
sarifContents: string,
checkoutPath: string,
logger: Logger
): string {
): Promise<string> {
const sarif = JSON.parse(sarifContents);
// Gather together results for the same file and construct
@ -263,6 +274,14 @@ export function addFingerprints(
continue;
}
if (
typeof primaryLocation?.physicalLocation?.region?.startLine ===
"undefined"
) {
// Locations without a line number are unlikely to be source files
continue;
}
const filepath = resolveUriToFile(
primaryLocation.physicalLocation.artifactLocation,
artifacts,
@ -289,8 +308,7 @@ export function addFingerprints(
c(lineNumber, hashValue);
}
};
const fileContents = fs.readFileSync(filepath).toString();
hash(teeCallback, fileContents);
await hash(teeCallback, filepath);
}
return JSON.stringify(sarif);

View file

@ -30,9 +30,7 @@
"message": {
"text": "This header file should contain a header guard to prevent multiple inclusion."
},
"partialFingerprints": {
"primaryLocationLineHash": "599c824c91d0f75e:1"
},
"partialFingerprints": {},
"ruleId": "cpp/missing-header-guard",
"ruleIndex": 0
}

View file

@ -356,7 +356,7 @@ async function uploadFiles(
}
let sarifPayload = combineSarifFiles(sarifFiles);
sarifPayload = fingerprints.addFingerprints(
sarifPayload = await fingerprints.addFingerprints(
sarifPayload,
checkoutPath,
logger