Merge remote-tracking branch 'upstream/main' into aeisenberg/pack-run

This commit is contained in:
Andrew Eisenberg 2021-06-09 12:43:17 -07:00
commit 82388fd94a
36 changed files with 490 additions and 289 deletions

View file

@ -8,6 +8,7 @@ import {
runAnalyze,
CodeQLAnalysisError,
QueriesStatusReport,
runCleanup,
} from "./analyze";
import { Config, getConfig } from "./config-utils";
import { getActionsLogger } from "./logging";
@ -89,6 +90,20 @@ async function run() {
logger
);
if (actionsUtil.getOptionalInput("cleanup-level") !== "none") {
await runCleanup(
config,
actionsUtil.getOptionalInput("cleanup-level") || "brutal",
logger
);
}
const dbLocations: { [lang: string]: string } = {};
for (const language of config.languages) {
dbLocations[language] = util.getCodeQLDatabasePath(config, language);
}
core.setOutput("db-locations", dbLocations);
if (actionsUtil.getRequiredInput("upload") === "true") {
const uploadStats = await upload_lib.uploadFromActions(
outputDir,

View file

@ -53,10 +53,16 @@ test("status report fields and search path setting", async (t) => {
for (const language of Object.values(Language)) {
setCodeQL({
packDownload: async () => ({ packs: [] }),
databaseAnalyze: async (
_,
sarifFile: string,
databaseRunQueries: async (
_db: string,
searchPath: string | undefined
) => {
searchPathsUsed.push(searchPath);
},
databaseInterpretResults: async (
_db: string,
_queriesRun: string[],
sarifFile: string
) => {
fs.writeFileSync(
sarifFile,
@ -92,7 +98,6 @@ test("status report fields and search path setting", async (t) => {
],
})
);
searchPathsUsed.push(searchPath);
return "";
},
});
@ -135,6 +140,9 @@ test("status report fields and search path setting", async (t) => {
t.true(
`analyze_builtin_queries_${language}_duration_ms` in builtinStatusReport
);
t.true(
`interpret_results_${language}_duration_ms` in builtinStatusReport
);
config.queries[language] = {
builtin: [],
@ -158,7 +166,7 @@ test("status report fields and search path setting", async (t) => {
config,
getRunnerLogger(true)
);
t.deepEqual(Object.keys(customStatusReport).length, 1);
t.deepEqual(Object.keys(customStatusReport).length, 2);
t.true(
`analyze_custom_queries_${language}_duration_ms` in customStatusReport
);
@ -166,6 +174,7 @@ test("status report fields and search path setting", async (t) => {
? [undefined, undefined, "/1", "/2", undefined]
: [undefined, "/1", "/2"];
t.deepEqual(searchPathsUsed, expectedSearchPathsUsed);
t.true(`interpret_results_${language}_duration_ms` in customStatusReport);
}
verifyLineCounts(tmpDir);
@ -177,12 +186,7 @@ test("status report fields and search path setting", async (t) => {
Object.keys(Language).forEach((lang, i) => {
verifyLineCountForFile(
lang as Language,
path.join(tmpDir, `${lang}-builtin.sarif`),
i + 1
);
verifyLineCountForFile(
lang as Language,
path.join(tmpDir, `${lang}-custom.sarif`),
path.join(tmpDir, `${lang}.sarif`),
i + 1
);
});

View file

@ -10,7 +10,6 @@ import { countLoc, getIdPrefix } from "./count-loc";
import { isScannedLanguage, Language } from "./languages";
import { Logger } from "./logging";
import * as sharedEnv from "./shared-environment";
import { combineSarifFiles } from "./upload-lib";
import * as util from "./util";
export class CodeQLAnalysisError extends Error {
@ -25,34 +24,48 @@ export class CodeQLAnalysisError extends Error {
}
export interface QueriesStatusReport {
// Time taken in ms to analyze builtin queries for cpp (or undefined if this language was not analyzed)
// Time taken in ms to run builtin queries for cpp (or undefined if this language was not analyzed)
analyze_builtin_queries_cpp_duration_ms?: number;
// Time taken in ms to analyze builtin queries for csharp (or undefined if this language was not analyzed)
// Time taken in ms to run builtin queries for csharp (or undefined if this language was not analyzed)
analyze_builtin_queries_csharp_duration_ms?: number;
// Time taken in ms to analyze builtin queries for go (or undefined if this language was not analyzed)
// Time taken in ms to run builtin queries for go (or undefined if this language was not analyzed)
analyze_builtin_queries_go_duration_ms?: number;
// Time taken in ms to analyze builtin queries for java (or undefined if this language was not analyzed)
// Time taken in ms to run builtin queries for java (or undefined if this language was not analyzed)
analyze_builtin_queries_java_duration_ms?: number;
// Time taken in ms to analyze builtin queries for javascript (or undefined if this language was not analyzed)
// Time taken in ms to run builtin queries for javascript (or undefined if this language was not analyzed)
analyze_builtin_queries_javascript_duration_ms?: number;
// Time taken in ms to analyze builtin queries for python (or undefined if this language was not analyzed)
// Time taken in ms to run builtin queries for python (or undefined if this language was not analyzed)
analyze_builtin_queries_python_duration_ms?: number;
// Time taken in ms to analyze builtin queries for ruby (or undefined if this language was not analyzed)
// Time taken in ms to run builtin queries for ruby (or undefined if this language was not analyzed)
analyze_builtin_queries_ruby_duration_ms?: number;
// Time taken in ms to analyze custom queries for cpp (or undefined if this language was not analyzed)
// Time taken in ms to run custom queries for cpp (or undefined if this language was not analyzed)
analyze_custom_queries_cpp_duration_ms?: number;
// Time taken in ms to analyze custom queries for csharp (or undefined if this language was not analyzed)
// Time taken in ms to run custom queries for csharp (or undefined if this language was not analyzed)
analyze_custom_queries_csharp_duration_ms?: number;
// Time taken in ms to analyze custom queries for go (or undefined if this language was not analyzed)
// Time taken in ms to run custom queries for go (or undefined if this language was not analyzed)
analyze_custom_queries_go_duration_ms?: number;
// Time taken in ms to analyze custom queries for java (or undefined if this language was not analyzed)
// Time taken in ms to run custom queries for java (or undefined if this language was not analyzed)
analyze_custom_queries_java_duration_ms?: number;
// Time taken in ms to analyze custom queries for javascript (or undefined if this language was not analyzed)
// Time taken in ms to run custom queries for javascript (or undefined if this language was not analyzed)
analyze_custom_queries_javascript_duration_ms?: number;
// Time taken in ms to analyze custom queries for python (or undefined if this language was not analyzed)
// Time taken in ms to run custom queries for python (or undefined if this language was not analyzed)
analyze_custom_queries_python_duration_ms?: number;
// Time taken in ms to analyze custom queries for ruby (or undefined if this language was not analyzed)
// Time taken in ms to run custom queries for ruby (or undefined if this language was not analyzed)
analyze_custom_queries_ruby_duration_ms?: number;
// Time taken in ms to interpret results for cpp (or undefined if this language was not analyzed)
interpret_results_cpp_duration_ms?: number;
// Time taken in ms to interpret results for csharp (or undefined if this language was not analyzed)
interpret_results_csharp_duration_ms?: number;
// Time taken in ms to interpret results for go (or undefined if this language was not analyzed)
interpret_results_go_duration_ms?: number;
// Time taken in ms to interpret results for java (or undefined if this language was not analyzed)
interpret_results_java_duration_ms?: number;
// Time taken in ms to interpret results for javascript (or undefined if this language was not analyzed)
interpret_results_javascript_duration_ms?: number;
// Time taken in ms to interpret results for python (or undefined if this language was not analyzed)
interpret_results_python_duration_ms?: number;
// Time taken in ms to interpret results for ruby (or undefined if this language was not analyzed)
interpret_results_ruby_duration_ms?: number;
// Name of language that errored during analysis (or undefined if no language failed)
analyze_failure_language?: string;
}
@ -163,7 +176,7 @@ export async function runQueries(
);
for (const language of config.languages) {
logger.startGroup(`Analyzing ${language}`);
logger.startGroup(`Running queries for ${language}`);
const queries = config.queries[language];
const packsWithVersion = config.packs[language] || [];
@ -188,82 +201,65 @@ export async function runQueries(
);
}
let analysisSummaryBuiltIn = "";
const customAnalysisSummaries: string[] = [];
const querySuitePaths: string[] = [];
if (queries["builtin"].length > 0) {
const startTimeBuiltIn = new Date().getTime();
const { sarifFile, stdout } = await runQueryGroup(
language,
"builtin",
createQuerySuiteContents(queries["builtin"]),
sarifFolder,
undefined
querySuitePaths.push(
await runQueryGroup(
language,
"builtin",
createQuerySuiteContents(queries["builtin"]),
undefined
)
);
analysisSummaryBuiltIn = stdout;
await injectLinesOfCode(sarifFile, language, locPromise);
statusReport[`analyze_builtin_queries_${language}_duration_ms`] =
new Date().getTime() - startTimeBuiltIn;
}
const startTimeCustom = new Date().getTime();
const temporarySarifDir = config.tempDir;
const temporarySarifFiles: string[] = [];
let ranCustom = false;
for (let i = 0; i < queries["custom"].length; ++i) {
if (queries["custom"][i].queries.length > 0) {
const { sarifFile, stdout } = await runQueryGroup(
language,
`custom-${i}`,
createQuerySuiteContents(queries["custom"][i].queries),
temporarySarifDir,
queries["custom"][i].searchPath
querySuitePaths.push(
await runQueryGroup(
language,
`custom-${i}`,
createQuerySuiteContents(queries["custom"][i].queries),
queries["custom"][i].searchPath
)
);
customAnalysisSummaries.push(stdout);
temporarySarifFiles.push(sarifFile);
ranCustom = true;
}
}
if (packsWithVersion.length > 0) {
const { sarifFile, stdout } = await runQueryGroup(
language,
"packs",
createPackSuiteContents(packsWithVersion),
temporarySarifDir,
undefined
querySuitePaths.push(
await runQueryGroup(
language,
"packs",
createPackSuiteContents(packsWithVersion),
undefined
)
);
customAnalysisSummaries.push(stdout);
temporarySarifFiles.push(sarifFile);
ranCustom = true;
}
if (temporarySarifFiles.length > 0) {
const sarifFile = path.join(sarifFolder, `${language}-custom.sarif`);
fs.writeFileSync(sarifFile, combineSarifFiles(temporarySarifFiles));
await injectLinesOfCode(sarifFile, language, locPromise);
if (ranCustom) {
statusReport[`analyze_custom_queries_${language}_duration_ms`] =
new Date().getTime() - startTimeCustom;
}
logger.endGroup();
// Print the LoC baseline and the summary results from database analyze for the standard
// query suite and (if appropriate) each custom query suite.
logger.startGroup(`Analysis summary for ${language}`);
printLinesOfCodeSummary(logger, language, await locPromise);
logger.info(analysisSummaryBuiltIn);
for (const [i, customSummary] of customAnalysisSummaries.entries()) {
if (customSummary.trim() === "") {
continue;
}
const description =
customAnalysisSummaries.length === 1
? "custom queries"
: `custom query suite ${i + 1}/${customAnalysisSummaries.length}`;
logger.info(`Analysis summary for ${description}:`);
logger.info("");
logger.info(customSummary);
logger.info("");
}
logger.startGroup(`Interpreting results for ${language}`);
const startTimeInterpretResults = new Date().getTime();
const sarifFile = path.join(sarifFolder, `${language}.sarif`);
const analysisSummary = await runInterpretResults(
language,
querySuitePaths,
sarifFile
);
await injectLinesOfCode(sarifFile, language, locPromise);
statusReport[`interpret_results_${language}_duration_ms`] =
new Date().getTime() - startTimeInterpretResults;
logger.endGroup();
logger.info(analysisSummary);
printLinesOfCodeSummary(logger, language, await locPromise);
} catch (e) {
logger.info(e);
logger.info(e.stack);
@ -277,13 +273,29 @@ export async function runQueries(
return statusReport;
async function runInterpretResults(
language: Language,
queries: string[],
sarifFile: string
): Promise<string> {
const databasePath = util.getCodeQLDatabasePath(config, language);
const codeql = getCodeQL(config.codeQLCmd);
return await codeql.databaseInterpretResults(
databasePath,
queries,
sarifFile,
addSnippetsFlag,
threadsFlag,
automationDetailsId
);
}
async function runQueryGroup(
language: Language,
type: string,
querySuiteContents: string,
destinationFolder: string,
searchPath: string | undefined
): Promise<{ sarifFile: string; stdout: string }> {
): Promise<string> {
const databasePath = util.getCodeQLDatabasePath(config, language);
// Pass the queries to codeql using a file instead of using the command
// line to avoid command line length restrictions, particularly on windows.
@ -293,24 +305,17 @@ export async function runQueries(
`Query suite file for ${language}-${type}...\n${querySuiteContents}`
);
const sarifFile = path.join(destinationFolder, `${language}-${type}.sarif`);
const codeql = getCodeQL(config.codeQLCmd);
const databaseAnalyzeStdout = await codeql.databaseAnalyze(
await codeql.databaseRunQueries(
databasePath,
sarifFile,
searchPath,
querySuitePath,
memoryFlag,
addSnippetsFlag,
threadsFlag,
automationDetailsId
threadsFlag
);
logger.debug(
`SARIF results for database ${language} created at "${sarifFile}"`
);
return { sarifFile, stdout: databaseAnalyzeStdout };
logger.debug(`BQRS results produced for ${language} (queries: ${type})"`);
return querySuitePath;
}
}
@ -365,6 +370,20 @@ export async function runAnalyze(
return { ...queriesStats };
}
export async function runCleanup(
config: configUtils.Config,
cleanupLevel: string,
logger: Logger
): Promise<void> {
logger.startGroup("Cleaning up databases");
for (const language of config.languages) {
const codeql = getCodeQL(config.codeQLCmd);
const databasePath = util.getCodeQLDatabasePath(config, language);
await codeql.databaseCleanup(databasePath, cleanupLevel);
}
logger.endGroup();
}
async function injectLinesOfCode(
sarifFile: string,
language: Language,

View file

@ -11,10 +11,13 @@ const pkg = require("../package.json");
setupTests(test);
let pluginStub: sinon.SinonStub;
let githubStub: sinon.SinonStub;
test.beforeEach(() => {
githubStub = sinon.stub(githubUtils, "GitHub");
pluginStub = sinon.stub(githubUtils.GitHub, "plugin");
githubStub = sinon.stub();
pluginStub.returns(githubStub);
initializeEnvironment(Mode.actions, pkg.version);
});

View file

@ -1,6 +1,7 @@
import * as path from "path";
import * as githubUtils from "@actions/github/lib/utils";
import * as retry from "@octokit/plugin-retry";
import consoleLogLevel from "console-log-level";
import { getRequiredInput } from "./actions-util";
@ -33,7 +34,8 @@ export const getApiClient = function (
) {
const auth =
(allowExternal && apiDetails.externalRepoAuth) || apiDetails.auth;
return new githubUtils.GitHub(
const retryingOctokit = githubUtils.GitHub.plugin(retry.retry);
return new retryingOctokit(
githubUtils.getOctokitOptions(auth, {
baseUrl: getApiUrl(apiDetails.url),
userAgent: `CodeQL-${getMode()}/${pkg.version}`,

View file

@ -96,14 +96,26 @@ export interface CodeQL {
packDownload(packs: PackWithVersion[]): Promise<PackDownloadOutput>;
/**
* Run 'codeql database analyze'.
* Run 'codeql database cleanup'.
*/
databaseAnalyze(
databaseCleanup(databasePath: string, cleanupLevel: string): Promise<void>;
/**
* Run 'codeql database run-queries'.
*/
databaseRunQueries(
databasePath: string,
sarifFile: string,
extraSearchPath: string | undefined,
querySuite: string,
querySuitePath: string,
memoryFlag: string,
threadsFlag: string
): Promise<void>;
/**
* Run 'codeql database interpret-results'.
*/
databaseInterpretResults(
databasePath: string,
querySuitePaths: string[],
sarifFile: string,
addSnippetsFlag: string,
threadsFlag: string,
automationDetailsId: string | undefined
@ -498,8 +510,13 @@ export function setCodeQL(partialCodeql: Partial<CodeQL>): CodeQL {
finalizeDatabase: resolveFunction(partialCodeql, "finalizeDatabase"),
resolveLanguages: resolveFunction(partialCodeql, "resolveLanguages"),
resolveQueries: resolveFunction(partialCodeql, "resolveQueries"),
databaseAnalyze: resolveFunction(partialCodeql, "databaseAnalyze"),
packDownload: resolveFunction(partialCodeql, "packDownload"),
databaseCleanup: resolveFunction(partialCodeql, "databaseCleanup"),
databaseRunQueries: resolveFunction(partialCodeql, "databaseRunQueries"),
databaseInterpretResults: resolveFunction(
partialCodeql,
"databaseInterpretResults"
),
};
return cachedCodeQL;
}
@ -667,6 +684,7 @@ function getCodeQLForCmd(cmd: string): CodeQL {
[
"database",
"finalize",
"--finalize-dataset",
threadsFlag,
...getExtraOptionsFromEnv(["database", "finalize"]),
databasePath,
@ -722,39 +740,53 @@ function getCodeQLForCmd(cmd: string): CodeQL {
throw new Error(`Unexpected output from codeql resolve queries: ${e}`);
}
},
async databaseAnalyze(
async databaseRunQueries(
databasePath: string,
sarifFile: string,
extraSearchPath: string | undefined,
querySuite: string,
querySuitePath: string,
memoryFlag: string,
threadsFlag: string
): Promise<void> {
const args = [
"database",
"run-queries",
memoryFlag,
threadsFlag,
databasePath,
"--min-disk-free=1024", // Try to leave at least 1GB free
"-v",
...getExtraOptionsFromEnv(["database", "run-queries"]),
];
if (extraSearchPath !== undefined) {
args.push("--additional-packs", extraSearchPath);
}
args.push(querySuitePath);
await new toolrunner.ToolRunner(cmd, args).exec();
},
async databaseInterpretResults(
databasePath: string,
querySuitePaths: string[],
sarifFile: string,
addSnippetsFlag: string,
threadsFlag: string,
automationDetailsId: string | undefined
): Promise<string> {
const args = [
"database",
"analyze",
memoryFlag,
"interpret-results",
threadsFlag,
databasePath,
"--min-disk-free=1024", // Try to leave at least 1GB free
"--format=sarif-latest",
"--sarif-multicause-markdown",
"--print-metrics-summary",
"--sarif-group-rules-by-pack",
"-v",
`--output=${sarifFile}`,
addSnippetsFlag,
// Enable progress verbosity so we log each query as it's interpreted. This aids debugging
// when interpretation takes a while for one of the queries being analyzed.
"-v",
...getExtraOptionsFromEnv(["database", "analyze"]),
...getExtraOptionsFromEnv(["database", "interpret-results"]),
];
if (extraSearchPath !== undefined) {
args.push("--additional-packs", extraSearchPath);
}
if (automationDetailsId !== undefined) {
args.push("--sarif-category", automationDetailsId);
}
args.push(querySuite);
args.push(databasePath, ...querySuitePaths);
// capture stdout, which contains analysis summaries
let output = "";
await new toolrunner.ToolRunner(cmd, args, {
@ -814,6 +846,18 @@ function getCodeQLForCmd(cmd: string): CodeQL {
);
}
},
async databaseCleanup(
databasePath: string,
cleanupLevel: string
): Promise<void> {
const args = [
"database",
"cleanup",
databasePath,
`--mode=${cleanupLevel}`,
];
await new toolrunner.ToolRunner(cmd, args).exec();
},
};
}

View file

@ -7,32 +7,41 @@ import test from "ava";
import * as fingerprints from "./fingerprints";
import { getRunnerLogger } from "./logging";
import { setupTests } from "./testing-utils";
import * as util from "./util";
setupTests(test);
function testHash(t: ava.Assertions, input: string, expectedHashes: string[]) {
let index = 0;
const callback = function (lineNumber: number, hash: string) {
t.is(lineNumber, index + 1);
t.is(hash, expectedHashes[index]);
index++;
};
fingerprints.hash(callback, input);
t.is(index, input.split(/\r\n|\r|\n/).length);
async function testHash(
t: ava.Assertions,
input: string,
expectedHashes: string[]
) {
await util.withTmpDir(async (tmpDir) => {
const tmpFile = path.resolve(tmpDir, "testfile");
fs.writeFileSync(tmpFile, input);
let index = 0;
const callback = function (lineNumber: number, hash: string) {
t.is(lineNumber, index + 1);
t.is(hash, expectedHashes[index]);
index++;
};
await fingerprints.hash(callback, tmpFile);
t.is(index, input.split(/\r\n|\r|\n/).length);
});
}
test("hash", (t: ava.Assertions) => {
test("hash", async (t: ava.Assertions) => {
// Try empty file
testHash(t, "", ["c129715d7a2bc9a3:1"]);
await testHash(t, "", ["c129715d7a2bc9a3:1"]);
// Try various combinations of newline characters
testHash(t, " a\nb\n \t\tc\n d", [
await testHash(t, " a\nb\n \t\tc\n d", [
"271789c17abda88f:1",
"54703d4cd895b18:1",
"180aee12dab6264:1",
"a23a3dc5e078b07b:1",
]);
testHash(t, " hello; \t\nworld!!!\n\n\n \t\tGreetings\n End", [
await testHash(t, " hello; \t\nworld!!!\n\n\n \t\tGreetings\n End", [
"8b7cf3e952e7aeb2:1",
"b1ae1287ec4718d9:1",
"bff680108adb0fcc:1",
@ -40,7 +49,7 @@ test("hash", (t: ava.Assertions) => {
"b86d3392aea1be30:1",
"e6ceba753e1a442:1",
]);
testHash(t, " hello; \t\nworld!!!\n\n\n \t\tGreetings\n End\n", [
await testHash(t, " hello; \t\nworld!!!\n\n\n \t\tGreetings\n End\n", [
"e9496ae3ebfced30:1",
"fb7c023a8b9ccb3f:1",
"ce8ba1a563dcdaca:1",
@ -49,7 +58,7 @@ test("hash", (t: ava.Assertions) => {
"c8e28b0b4002a3a0:1",
"c129715d7a2bc9a3:1",
]);
testHash(t, " hello; \t\nworld!!!\r\r\r \t\tGreetings\r End\r", [
await testHash(t, " hello; \t\nworld!!!\r\r\r \t\tGreetings\r End\r", [
"e9496ae3ebfced30:1",
"fb7c023a8b9ccb3f:1",
"ce8ba1a563dcdaca:1",
@ -58,16 +67,20 @@ test("hash", (t: ava.Assertions) => {
"c8e28b0b4002a3a0:1",
"c129715d7a2bc9a3:1",
]);
testHash(t, " hello; \t\r\nworld!!!\r\n\r\n\r\n \t\tGreetings\r\n End\r\n", [
"e9496ae3ebfced30:1",
"fb7c023a8b9ccb3f:1",
"ce8ba1a563dcdaca:1",
"e20e36e16fcb0cc8:1",
"b3edc88f2938467e:1",
"c8e28b0b4002a3a0:1",
"c129715d7a2bc9a3:1",
]);
testHash(t, " hello; \t\nworld!!!\r\n\n\r \t\tGreetings\r End\r\n", [
await testHash(
t,
" hello; \t\r\nworld!!!\r\n\r\n\r\n \t\tGreetings\r\n End\r\n",
[
"e9496ae3ebfced30:1",
"fb7c023a8b9ccb3f:1",
"ce8ba1a563dcdaca:1",
"e20e36e16fcb0cc8:1",
"b3edc88f2938467e:1",
"c8e28b0b4002a3a0:1",
"c129715d7a2bc9a3:1",
]
);
await testHash(t, " hello; \t\nworld!!!\r\n\n\r \t\tGreetings\r End\r\n", [
"e9496ae3ebfced30:1",
"fb7c023a8b9ccb3f:1",
"ce8ba1a563dcdaca:1",
@ -78,7 +91,7 @@ test("hash", (t: ava.Assertions) => {
]);
// Try repeating line that will generate identical hashes
testHash(t, "Lorem ipsum dolor sit amet.\n".repeat(10), [
await testHash(t, "Lorem ipsum dolor sit amet.\n".repeat(10), [
"a7f2ff13bc495cf2:1",
"a7f2ff13bc495cf2:2",
"a7f2ff13bc495cf2:3",
@ -92,16 +105,20 @@ test("hash", (t: ava.Assertions) => {
"c129715d7a2bc9a3:1",
]);
testHash(t, "x = 2\nx = 1\nprint(x)\nx = 3\nprint(x)\nx = 4\nprint(x)\n", [
"e54938cc54b302f1:1",
"bb609acbe9138d60:1",
"1131fd5871777f34:1",
"5c482a0f8b35ea28:1",
"54517377da7028d2:1",
"2c644846cb18d53e:1",
"f1b89f20de0d133:1",
"c129715d7a2bc9a3:1",
]);
await testHash(
t,
"x = 2\nx = 1\nprint(x)\nx = 3\nprint(x)\nx = 4\nprint(x)\n",
[
"e54938cc54b302f1:1",
"bb609acbe9138d60:1",
"1131fd5871777f34:1",
"5c482a0f8b35ea28:1",
"54517377da7028d2:1",
"2c644846cb18d53e:1",
"f1b89f20de0d133:1",
"c129715d7a2bc9a3:1",
]
);
});
function testResolveUriToFile(uri: any, index: any, artifactsURIs: any[]) {
@ -170,7 +187,7 @@ test("resolveUriToFile", (t) => {
t.is(testResolveUriToFile(`file://${dirpath}`, undefined, []), undefined);
});
test("addFingerprints", (t) => {
test("addFingerprints", async (t) => {
// Run an end-to-end test on a test file
let input = fs
.readFileSync(`${__dirname}/../src/testdata/fingerprinting.input.sarif`)
@ -187,12 +204,16 @@ test("addFingerprints", (t) => {
const checkoutPath = path.normalize(`${__dirname}/../src/testdata`);
t.deepEqual(
fingerprints.addFingerprints(input, checkoutPath, getRunnerLogger(true)),
await fingerprints.addFingerprints(
input,
checkoutPath,
getRunnerLogger(true)
),
expected
);
});
test("missingRegions", (t) => {
test("missingRegions", async (t) => {
// Run an end-to-end test on a test file
let input = fs
.readFileSync(`${__dirname}/../src/testdata/fingerprinting2.input.sarif`)
@ -209,7 +230,11 @@ test("missingRegions", (t) => {
const checkoutPath = path.normalize(`${__dirname}/../src/testdata`);
t.deepEqual(
fingerprints.addFingerprints(input, checkoutPath, getRunnerLogger(true)),
await fingerprints.addFingerprints(
input,
checkoutPath,
getRunnerLogger(true)
),
expected
);
});

View file

@ -8,6 +8,7 @@ const tab = "\t".charCodeAt(0);
const space = " ".charCodeAt(0);
const lf = "\n".charCodeAt(0);
const cr = "\r".charCodeAt(0);
const EOF = 65535;
const BLOCK_SIZE = 100;
const MOD = Long.fromInt(37); // L
@ -34,9 +35,9 @@ type hashCallback = (lineNumber: number, hash: string) => void;
* the hashes of the lines near the end of the file.
*
* @param callback function that is called with the line number (1-based) and hash for every line
* @param input The file's contents
* @param filepath The path to the file to hash
*/
export function hash(callback: hashCallback, input: string) {
export async function hash(callback: hashCallback, filepath: string) {
// A rolling view in to the input
const window = Array(BLOCK_SIZE).fill(0);
@ -87,12 +88,11 @@ export function hash(callback: hashCallback, input: string) {
// as we go. Once we reach a point in the window again then we've processed
// BLOCK_SIZE characters and if the last character at this point in the window
// was the start of a line then we should output the hash for that line.
for (let i = 0, len = input.length; i <= len; i++) {
let current = i === len ? 65535 : input.charCodeAt(i);
const processCharacter = function (current: number) {
// skip tabs, spaces, and line feeds that come directly after a carriage return
if (current === space || current === tab || (prevCR && current === lf)) {
prevCR = false;
continue;
return;
}
// replace CR with LF
if (current === cr) {
@ -113,7 +113,15 @@ export function hash(callback: hashCallback, input: string) {
lineStart = true;
}
updateHash(current);
};
const readStream = fs.createReadStream(filepath, "utf8");
for await (const data of readStream) {
for (let i = 0; i < data.length; ++i) {
processCharacter(data.charCodeAt(i));
}
}
processCharacter(EOF);
// Flush the remaining lines
for (let i = 0; i < BLOCK_SIZE; i++) {
@ -237,11 +245,11 @@ export function resolveUriToFile(
// Compute fingerprints for results in the given sarif file
// and return an updated sarif file contents.
export function addFingerprints(
export async function addFingerprints(
sarifContents: string,
checkoutPath: string,
logger: Logger
): string {
): Promise<string> {
const sarif = JSON.parse(sarifContents);
// Gather together results for the same file and construct
@ -263,6 +271,11 @@ export function addFingerprints(
continue;
}
if (primaryLocation?.physicalLocation?.region?.startLine === undefined) {
// Locations without a line number are unlikely to be source files
continue;
}
const filepath = resolveUriToFile(
primaryLocation.physicalLocation.artifactLocation,
artifacts,
@ -289,8 +302,7 @@ export function addFingerprints(
c(lineNumber, hashValue);
}
};
const fileContents = fs.readFileSync(filepath).toString();
hash(teeCallback, fileContents);
await hash(teeCallback, filepath);
}
return JSON.stringify(sarif);

View file

@ -30,9 +30,7 @@
"message": {
"text": "This header file should contain a header guard to prevent multiple inclusion."
},
"partialFingerprints": {
"primaryLocationLineHash": "599c824c91d0f75e:1"
},
"partialFingerprints": {},
"ruleId": "cpp/missing-header-guard",
"ruleIndex": 0
}

View file

@ -356,7 +356,7 @@ async function uploadFiles(
}
let sarifPayload = combineSarifFiles(sarifFiles);
sarifPayload = fingerprints.addFingerprints(
sarifPayload = await fingerprints.addFingerprints(
sarifPayload,
checkoutPath,
logger