Merge remote-tracking branch 'upstream/main' into aeisenberg/pack-run

2021-06-09 12:43:17 -07:00 · 2021-06-09 12:43:17 -07:00 · 82388fd94a
commit 82388fd94a
parent d42f654f7a babcc1b793
36 changed files with 490 additions and 289 deletions
--- a/src/analyze-action.ts
+++ b/src/analyze-action.ts
@ -8,6 +8,7 @@ import {
  runAnalyze,
  CodeQLAnalysisError,
  QueriesStatusReport,
+  runCleanup,
 } from "./analyze";
 import { Config, getConfig } from "./config-utils";
 import { getActionsLogger } from "./logging";
@ -89,6 +90,20 @@ async function run() {
      logger
    );

+    if (actionsUtil.getOptionalInput("cleanup-level") !== "none") {
+      await runCleanup(
+        config,
+        actionsUtil.getOptionalInput("cleanup-level") || "brutal",
+        logger
+      );
+    }
+
+    const dbLocations: { [lang: string]: string } = {};
+    for (const language of config.languages) {
+      dbLocations[language] = util.getCodeQLDatabasePath(config, language);
+    }
+    core.setOutput("db-locations", dbLocations);
+
    if (actionsUtil.getRequiredInput("upload") === "true") {
      const uploadStats = await upload_lib.uploadFromActions(
        outputDir,
--- a/src/analyze.test.ts
+++ b/src/analyze.test.ts
@ -53,10 +53,16 @@ test("status report fields and search path setting", async (t) => {
    for (const language of Object.values(Language)) {
      setCodeQL({
        packDownload: async () => ({ packs: [] }),
-        databaseAnalyze: async (
-          _,
-          sarifFile: string,
+        databaseRunQueries: async (
+          _db: string,
          searchPath: string | undefined
+        ) => {
+          searchPathsUsed.push(searchPath);
+        },
+        databaseInterpretResults: async (
+          _db: string,
+          _queriesRun: string[],
+          sarifFile: string
        ) => {
          fs.writeFileSync(
            sarifFile,
@ -92,7 +98,6 @@ test("status report fields and search path setting", async (t) => {
              ],
            })
          );
-          searchPathsUsed.push(searchPath);
          return "";
        },
      });
@ -135,6 +140,9 @@ test("status report fields and search path setting", async (t) => {
      t.true(
        `analyze_builtin_queries_${language}_duration_ms` in builtinStatusReport
      );
+      t.true(
+        `interpret_results_${language}_duration_ms` in builtinStatusReport
+      );

      config.queries[language] = {
        builtin: [],
@ -158,7 +166,7 @@ test("status report fields and search path setting", async (t) => {
        config,
        getRunnerLogger(true)
      );
-      t.deepEqual(Object.keys(customStatusReport).length, 1);
+      t.deepEqual(Object.keys(customStatusReport).length, 2);
      t.true(
        `analyze_custom_queries_${language}_duration_ms` in customStatusReport
      );
@ -166,6 +174,7 @@ test("status report fields and search path setting", async (t) => {
        ? [undefined, undefined, "/1", "/2", undefined]
        : [undefined, "/1", "/2"];
      t.deepEqual(searchPathsUsed, expectedSearchPathsUsed);
+      t.true(`interpret_results_${language}_duration_ms` in customStatusReport);
    }

    verifyLineCounts(tmpDir);
@ -177,12 +186,7 @@ test("status report fields and search path setting", async (t) => {
    Object.keys(Language).forEach((lang, i) => {
      verifyLineCountForFile(
        lang as Language,
-        path.join(tmpDir, `${lang}-builtin.sarif`),
-        i + 1
-      );
-      verifyLineCountForFile(
-        lang as Language,
-        path.join(tmpDir, `${lang}-custom.sarif`),
+        path.join(tmpDir, `${lang}.sarif`),
        i + 1
      );
    });
--- a/src/analyze.ts
+++ b/src/analyze.ts
@ -10,7 +10,6 @@ import { countLoc, getIdPrefix } from "./count-loc";
 import { isScannedLanguage, Language } from "./languages";
 import { Logger } from "./logging";
 import * as sharedEnv from "./shared-environment";
-import { combineSarifFiles } from "./upload-lib";
 import * as util from "./util";

 export class CodeQLAnalysisError extends Error {
@ -25,34 +24,48 @@ export class CodeQLAnalysisError extends Error {
 }

 export interface QueriesStatusReport {
-  // Time taken in ms to analyze builtin queries for cpp (or undefined if this language was not analyzed)
+  // Time taken in ms to run builtin queries for cpp (or undefined if this language was not analyzed)
  analyze_builtin_queries_cpp_duration_ms?: number;
-  // Time taken in ms to analyze builtin queries for csharp (or undefined if this language was not analyzed)
+  // Time taken in ms to run builtin queries for csharp (or undefined if this language was not analyzed)
  analyze_builtin_queries_csharp_duration_ms?: number;
-  // Time taken in ms to analyze builtin queries for go (or undefined if this language was not analyzed)
+  // Time taken in ms to run builtin queries for go (or undefined if this language was not analyzed)
  analyze_builtin_queries_go_duration_ms?: number;
-  // Time taken in ms to analyze builtin queries for java (or undefined if this language was not analyzed)
+  // Time taken in ms to run builtin queries for java (or undefined if this language was not analyzed)
  analyze_builtin_queries_java_duration_ms?: number;
-  // Time taken in ms to analyze builtin queries for javascript (or undefined if this language was not analyzed)
+  // Time taken in ms to run builtin queries for javascript (or undefined if this language was not analyzed)
  analyze_builtin_queries_javascript_duration_ms?: number;
-  // Time taken in ms to analyze builtin queries for python (or undefined if this language was not analyzed)
+  // Time taken in ms to run builtin queries for python (or undefined if this language was not analyzed)
  analyze_builtin_queries_python_duration_ms?: number;
-  // Time taken in ms to analyze builtin queries for ruby (or undefined if this language was not analyzed)
+  // Time taken in ms to run builtin queries for ruby (or undefined if this language was not analyzed)
  analyze_builtin_queries_ruby_duration_ms?: number;
-  // Time taken in ms to analyze custom queries for cpp (or undefined if this language was not analyzed)
+  // Time taken in ms to run custom queries for cpp (or undefined if this language was not analyzed)
  analyze_custom_queries_cpp_duration_ms?: number;
-  // Time taken in ms to analyze custom queries for csharp (or undefined if this language was not analyzed)
+  // Time taken in ms to run custom queries for csharp (or undefined if this language was not analyzed)
  analyze_custom_queries_csharp_duration_ms?: number;
-  // Time taken in ms to analyze custom queries for go (or undefined if this language was not analyzed)
+  // Time taken in ms to run custom queries for go (or undefined if this language was not analyzed)
  analyze_custom_queries_go_duration_ms?: number;
-  // Time taken in ms to analyze custom queries for java (or undefined if this language was not analyzed)
+  // Time taken in ms to run custom queries for java (or undefined if this language was not analyzed)
  analyze_custom_queries_java_duration_ms?: number;
-  // Time taken in ms to analyze custom queries for javascript (or undefined if this language was not analyzed)
+  // Time taken in ms to run custom queries for javascript (or undefined if this language was not analyzed)
  analyze_custom_queries_javascript_duration_ms?: number;
-  // Time taken in ms to analyze custom queries for python (or undefined if this language was not analyzed)
+  // Time taken in ms to run custom queries for python (or undefined if this language was not analyzed)
  analyze_custom_queries_python_duration_ms?: number;
-  // Time taken in ms to analyze custom queries for ruby (or undefined if this language was not analyzed)
+  // Time taken in ms to run custom queries for ruby (or undefined if this language was not analyzed)
  analyze_custom_queries_ruby_duration_ms?: number;
+  // Time taken in ms to interpret results for cpp (or undefined if this language was not analyzed)
+  interpret_results_cpp_duration_ms?: number;
+  // Time taken in ms to interpret results for csharp (or undefined if this language was not analyzed)
+  interpret_results_csharp_duration_ms?: number;
+  // Time taken in ms to interpret results for go (or undefined if this language was not analyzed)
+  interpret_results_go_duration_ms?: number;
+  // Time taken in ms to interpret results for java (or undefined if this language was not analyzed)
+  interpret_results_java_duration_ms?: number;
+  // Time taken in ms to interpret results for javascript (or undefined if this language was not analyzed)
+  interpret_results_javascript_duration_ms?: number;
+  // Time taken in ms to interpret results for python (or undefined if this language was not analyzed)
+  interpret_results_python_duration_ms?: number;
+  // Time taken in ms to interpret results for ruby (or undefined if this language was not analyzed)
+  interpret_results_ruby_duration_ms?: number;
  // Name of language that errored during analysis (or undefined if no language failed)
  analyze_failure_language?: string;
 }
@ -163,7 +176,7 @@ export async function runQueries(
  );

  for (const language of config.languages) {
-    logger.startGroup(`Analyzing ${language}`);
+    logger.startGroup(`Running queries for ${language}`);

    const queries = config.queries[language];
    const packsWithVersion = config.packs[language] || [];
@ -188,82 +201,65 @@ export async function runQueries(
        );
      }

-      let analysisSummaryBuiltIn = "";
-      const customAnalysisSummaries: string[] = [];
+      const querySuitePaths: string[] = [];
      if (queries["builtin"].length > 0) {
        const startTimeBuiltIn = new Date().getTime();
-        const { sarifFile, stdout } = await runQueryGroup(
-          language,
-          "builtin",
-          createQuerySuiteContents(queries["builtin"]),
-          sarifFolder,
-          undefined
+        querySuitePaths.push(
+          await runQueryGroup(
+            language,
+            "builtin",
+            createQuerySuiteContents(queries["builtin"]),
+            undefined
+          )
        );
-        analysisSummaryBuiltIn = stdout;
-        await injectLinesOfCode(sarifFile, language, locPromise);
-
        statusReport[`analyze_builtin_queries_${language}_duration_ms`] =
          new Date().getTime() - startTimeBuiltIn;
      }
      const startTimeCustom = new Date().getTime();
-      const temporarySarifDir = config.tempDir;
-      const temporarySarifFiles: string[] = [];
+      let ranCustom = false;
      for (let i = 0; i < queries["custom"].length; ++i) {
        if (queries["custom"][i].queries.length > 0) {
-          const { sarifFile, stdout } = await runQueryGroup(
-            language,
-            `custom-${i}`,
-            createQuerySuiteContents(queries["custom"][i].queries),
-            temporarySarifDir,
-            queries["custom"][i].searchPath
+          querySuitePaths.push(
+            await runQueryGroup(
+              language,
+              `custom-${i}`,
+              createQuerySuiteContents(queries["custom"][i].queries),
+              queries["custom"][i].searchPath
+            )
          );
-          customAnalysisSummaries.push(stdout);
-          temporarySarifFiles.push(sarifFile);
+          ranCustom = true;
        }
      }
      if (packsWithVersion.length > 0) {
-        const { sarifFile, stdout } = await runQueryGroup(
-          language,
-          "packs",
-          createPackSuiteContents(packsWithVersion),
-          temporarySarifDir,
-          undefined
+        querySuitePaths.push(
+          await runQueryGroup(
+            language,
+            "packs",
+            createPackSuiteContents(packsWithVersion),
+            undefined
+          )
        );
-        customAnalysisSummaries.push(stdout);
-        temporarySarifFiles.push(sarifFile);
+        ranCustom = true;
      }
-      if (temporarySarifFiles.length > 0) {
-        const sarifFile = path.join(sarifFolder, `${language}-custom.sarif`);
-        fs.writeFileSync(sarifFile, combineSarifFiles(temporarySarifFiles));
-        await injectLinesOfCode(sarifFile, language, locPromise);
-
+      if (ranCustom) {
        statusReport[`analyze_custom_queries_${language}_duration_ms`] =
          new Date().getTime() - startTimeCustom;
      }
      logger.endGroup();
-
-      // Print the LoC baseline and the summary results from database analyze for the standard
-      // query suite and (if appropriate) each custom query suite.
-      logger.startGroup(`Analysis summary for ${language}`);
-
-      printLinesOfCodeSummary(logger, language, await locPromise);
-      logger.info(analysisSummaryBuiltIn);
-
-      for (const [i, customSummary] of customAnalysisSummaries.entries()) {
-        if (customSummary.trim() === "") {
-          continue;
-        }
-        const description =
-          customAnalysisSummaries.length === 1
-            ? "custom queries"
-            : `custom query suite ${i + 1}/${customAnalysisSummaries.length}`;
-        logger.info(`Analysis summary for ${description}:`);
-        logger.info("");
-        logger.info(customSummary);
-        logger.info("");
-      }
-
+      logger.startGroup(`Interpreting results for ${language}`);
+      const startTimeInterpretResults = new Date().getTime();
+      const sarifFile = path.join(sarifFolder, `${language}.sarif`);
+      const analysisSummary = await runInterpretResults(
+        language,
+        querySuitePaths,
+        sarifFile
+      );
+      await injectLinesOfCode(sarifFile, language, locPromise);
+      statusReport[`interpret_results_${language}_duration_ms`] =
+        new Date().getTime() - startTimeInterpretResults;
      logger.endGroup();
+      logger.info(analysisSummary);
+      printLinesOfCodeSummary(logger, language, await locPromise);
    } catch (e) {
      logger.info(e);
      logger.info(e.stack);
@ -277,13 +273,29 @@ export async function runQueries(

  return statusReport;

+  async function runInterpretResults(
+    language: Language,
+    queries: string[],
+    sarifFile: string
+  ): Promise<string> {
+    const databasePath = util.getCodeQLDatabasePath(config, language);
+    const codeql = getCodeQL(config.codeQLCmd);
+    return await codeql.databaseInterpretResults(
+      databasePath,
+      queries,
+      sarifFile,
+      addSnippetsFlag,
+      threadsFlag,
+      automationDetailsId
+    );
+  }
+
  async function runQueryGroup(
    language: Language,
    type: string,
    querySuiteContents: string,
-    destinationFolder: string,
    searchPath: string | undefined
-  ): Promise<{ sarifFile: string; stdout: string }> {
+  ): Promise<string> {
    const databasePath = util.getCodeQLDatabasePath(config, language);
    // Pass the queries to codeql using a file instead of using the command
    // line to avoid command line length restrictions, particularly on windows.
@ -293,24 +305,17 @@ export async function runQueries(
      `Query suite file for ${language}-${type}...\n${querySuiteContents}`
    );

-    const sarifFile = path.join(destinationFolder, `${language}-${type}.sarif`);
-
    const codeql = getCodeQL(config.codeQLCmd);
-    const databaseAnalyzeStdout = await codeql.databaseAnalyze(
+    await codeql.databaseRunQueries(
      databasePath,
-      sarifFile,
      searchPath,
      querySuitePath,
      memoryFlag,
-      addSnippetsFlag,
-      threadsFlag,
-      automationDetailsId
+      threadsFlag
    );

-    logger.debug(
-      `SARIF results for database ${language} created at "${sarifFile}"`
-    );
-    return { sarifFile, stdout: databaseAnalyzeStdout };
+    logger.debug(`BQRS results produced for ${language} (queries: ${type})"`);
+    return querySuitePath;
  }
 }

@ -365,6 +370,20 @@ export async function runAnalyze(
  return { ...queriesStats };
 }

+export async function runCleanup(
+  config: configUtils.Config,
+  cleanupLevel: string,
+  logger: Logger
+): Promise<void> {
+  logger.startGroup("Cleaning up databases");
+  for (const language of config.languages) {
+    const codeql = getCodeQL(config.codeQLCmd);
+    const databasePath = util.getCodeQLDatabasePath(config, language);
+    await codeql.databaseCleanup(databasePath, cleanupLevel);
+  }
+  logger.endGroup();
+}
+
 async function injectLinesOfCode(
  sarifFile: string,
  language: Language,
--- a/src/api-client.test.ts
+++ b/src/api-client.test.ts
@ -11,10 +11,13 @@ const pkg = require("../package.json");

 setupTests(test);

+let pluginStub: sinon.SinonStub;
 let githubStub: sinon.SinonStub;

 test.beforeEach(() => {
-  githubStub = sinon.stub(githubUtils, "GitHub");
+  pluginStub = sinon.stub(githubUtils.GitHub, "plugin");
+  githubStub = sinon.stub();
+  pluginStub.returns(githubStub);
  initializeEnvironment(Mode.actions, pkg.version);
 });

--- a/src/api-client.ts
+++ b/src/api-client.ts
@ -1,6 +1,7 @@
 import * as path from "path";

 import * as githubUtils from "@actions/github/lib/utils";
+import * as retry from "@octokit/plugin-retry";
 import consoleLogLevel from "console-log-level";

 import { getRequiredInput } from "./actions-util";
@ -33,7 +34,8 @@ export const getApiClient = function (
 ) {
  const auth =
    (allowExternal && apiDetails.externalRepoAuth) || apiDetails.auth;
-  return new githubUtils.GitHub(
+  const retryingOctokit = githubUtils.GitHub.plugin(retry.retry);
+  return new retryingOctokit(
    githubUtils.getOctokitOptions(auth, {
      baseUrl: getApiUrl(apiDetails.url),
      userAgent: `CodeQL-${getMode()}/${pkg.version}`,
--- a/src/codeql.ts
+++ b/src/codeql.ts
@ -96,14 +96,26 @@ export interface CodeQL {
  packDownload(packs: PackWithVersion[]): Promise<PackDownloadOutput>;

  /**
-   * Run 'codeql database analyze'.
+   * Run 'codeql database cleanup'.
   */
-  databaseAnalyze(
+  databaseCleanup(databasePath: string, cleanupLevel: string): Promise<void>;
+  /**
+   * Run 'codeql database run-queries'.
+   */
+  databaseRunQueries(
    databasePath: string,
-    sarifFile: string,
    extraSearchPath: string | undefined,
-    querySuite: string,
+    querySuitePath: string,
    memoryFlag: string,
+    threadsFlag: string
+  ): Promise<void>;
+  /**
+   * Run 'codeql database interpret-results'.
+   */
+  databaseInterpretResults(
+    databasePath: string,
+    querySuitePaths: string[],
+    sarifFile: string,
    addSnippetsFlag: string,
    threadsFlag: string,
    automationDetailsId: string | undefined
@ -498,8 +510,13 @@ export function setCodeQL(partialCodeql: Partial<CodeQL>): CodeQL {
    finalizeDatabase: resolveFunction(partialCodeql, "finalizeDatabase"),
    resolveLanguages: resolveFunction(partialCodeql, "resolveLanguages"),
    resolveQueries: resolveFunction(partialCodeql, "resolveQueries"),
-    databaseAnalyze: resolveFunction(partialCodeql, "databaseAnalyze"),
    packDownload: resolveFunction(partialCodeql, "packDownload"),
+    databaseCleanup: resolveFunction(partialCodeql, "databaseCleanup"),
+    databaseRunQueries: resolveFunction(partialCodeql, "databaseRunQueries"),
+    databaseInterpretResults: resolveFunction(
+      partialCodeql,
+      "databaseInterpretResults"
+    ),
  };
  return cachedCodeQL;
 }
@ -667,6 +684,7 @@ function getCodeQLForCmd(cmd: string): CodeQL {
        [
          "database",
          "finalize",
+          "--finalize-dataset",
          threadsFlag,
          ...getExtraOptionsFromEnv(["database", "finalize"]),
          databasePath,
@ -722,39 +740,53 @@ function getCodeQLForCmd(cmd: string): CodeQL {
        throw new Error(`Unexpected output from codeql resolve queries: ${e}`);
      }
    },
-    async databaseAnalyze(
+    async databaseRunQueries(
      databasePath: string,
-      sarifFile: string,
      extraSearchPath: string | undefined,
-      querySuite: string,
+      querySuitePath: string,
      memoryFlag: string,
+      threadsFlag: string
+    ): Promise<void> {
+      const args = [
+        "database",
+        "run-queries",
+        memoryFlag,
+        threadsFlag,
+        databasePath,
+        "--min-disk-free=1024", // Try to leave at least 1GB free
+        "-v",
+        ...getExtraOptionsFromEnv(["database", "run-queries"]),
+      ];
+      if (extraSearchPath !== undefined) {
+        args.push("--additional-packs", extraSearchPath);
+      }
+      args.push(querySuitePath);
+      await new toolrunner.ToolRunner(cmd, args).exec();
+    },
+    async databaseInterpretResults(
+      databasePath: string,
+      querySuitePaths: string[],
+      sarifFile: string,
      addSnippetsFlag: string,
      threadsFlag: string,
      automationDetailsId: string | undefined
    ): Promise<string> {
      const args = [
        "database",
-        "analyze",
-        memoryFlag,
+        "interpret-results",
        threadsFlag,
-        databasePath,
-        "--min-disk-free=1024", // Try to leave at least 1GB free
        "--format=sarif-latest",
-        "--sarif-multicause-markdown",
+        "--print-metrics-summary",
+        "--sarif-group-rules-by-pack",
+        "-v",
        `--output=${sarifFile}`,
        addSnippetsFlag,
-        // Enable progress verbosity so we log each query as it's interpreted. This aids debugging
-        // when interpretation takes a while for one of the queries being analyzed.
-        "-v",
-        ...getExtraOptionsFromEnv(["database", "analyze"]),
+        ...getExtraOptionsFromEnv(["database", "interpret-results"]),
      ];
-      if (extraSearchPath !== undefined) {
-        args.push("--additional-packs", extraSearchPath);
-      }
      if (automationDetailsId !== undefined) {
        args.push("--sarif-category", automationDetailsId);
      }
-      args.push(querySuite);
+      args.push(databasePath, ...querySuitePaths);
      // capture stdout, which contains analysis summaries
      let output = "";
      await new toolrunner.ToolRunner(cmd, args, {
@ -814,6 +846,18 @@ function getCodeQLForCmd(cmd: string): CodeQL {
        );
      }
    },
+    async databaseCleanup(
+      databasePath: string,
+      cleanupLevel: string
+    ): Promise<void> {
+      const args = [
+        "database",
+        "cleanup",
+        databasePath,
+        `--mode=${cleanupLevel}`,
+      ];
+      await new toolrunner.ToolRunner(cmd, args).exec();
+    },
  };
 }

--- a/src/fingerprints.test.ts
+++ b/src/fingerprints.test.ts
@ -7,32 +7,41 @@ import test from "ava";
 import * as fingerprints from "./fingerprints";
 import { getRunnerLogger } from "./logging";
 import { setupTests } from "./testing-utils";
+import * as util from "./util";

 setupTests(test);

-function testHash(t: ava.Assertions, input: string, expectedHashes: string[]) {
-  let index = 0;
-  const callback = function (lineNumber: number, hash: string) {
-    t.is(lineNumber, index + 1);
-    t.is(hash, expectedHashes[index]);
-    index++;
-  };
-  fingerprints.hash(callback, input);
-  t.is(index, input.split(/\r\n|\r|\n/).length);
+async function testHash(
+  t: ava.Assertions,
+  input: string,
+  expectedHashes: string[]
+) {
+  await util.withTmpDir(async (tmpDir) => {
+    const tmpFile = path.resolve(tmpDir, "testfile");
+    fs.writeFileSync(tmpFile, input);
+    let index = 0;
+    const callback = function (lineNumber: number, hash: string) {
+      t.is(lineNumber, index + 1);
+      t.is(hash, expectedHashes[index]);
+      index++;
+    };
+    await fingerprints.hash(callback, tmpFile);
+    t.is(index, input.split(/\r\n|\r|\n/).length);
+  });
 }

-test("hash", (t: ava.Assertions) => {
+test("hash", async (t: ava.Assertions) => {
  // Try empty file
-  testHash(t, "", ["c129715d7a2bc9a3:1"]);
+  await testHash(t, "", ["c129715d7a2bc9a3:1"]);

  // Try various combinations of newline characters
-  testHash(t, " a\nb\n  \t\tc\n d", [
+  await testHash(t, " a\nb\n  \t\tc\n d", [
    "271789c17abda88f:1",
    "54703d4cd895b18:1",
    "180aee12dab6264:1",
    "a23a3dc5e078b07b:1",
  ]);
-  testHash(t, " hello; \t\nworld!!!\n\n\n  \t\tGreetings\n End", [
+  await testHash(t, " hello; \t\nworld!!!\n\n\n  \t\tGreetings\n End", [
    "8b7cf3e952e7aeb2:1",
    "b1ae1287ec4718d9:1",
    "bff680108adb0fcc:1",
@ -40,7 +49,7 @@ test("hash", (t: ava.Assertions) => {
    "b86d3392aea1be30:1",
    "e6ceba753e1a442:1",
  ]);
-  testHash(t, " hello; \t\nworld!!!\n\n\n  \t\tGreetings\n End\n", [
+  await testHash(t, " hello; \t\nworld!!!\n\n\n  \t\tGreetings\n End\n", [
    "e9496ae3ebfced30:1",
    "fb7c023a8b9ccb3f:1",
    "ce8ba1a563dcdaca:1",
@ -49,7 +58,7 @@ test("hash", (t: ava.Assertions) => {
    "c8e28b0b4002a3a0:1",
    "c129715d7a2bc9a3:1",
  ]);
-  testHash(t, " hello; \t\nworld!!!\r\r\r  \t\tGreetings\r End\r", [
+  await testHash(t, " hello; \t\nworld!!!\r\r\r  \t\tGreetings\r End\r", [
    "e9496ae3ebfced30:1",
    "fb7c023a8b9ccb3f:1",
    "ce8ba1a563dcdaca:1",
@ -58,16 +67,20 @@ test("hash", (t: ava.Assertions) => {
    "c8e28b0b4002a3a0:1",
    "c129715d7a2bc9a3:1",
  ]);
-  testHash(t, " hello; \t\r\nworld!!!\r\n\r\n\r\n  \t\tGreetings\r\n End\r\n", [
-    "e9496ae3ebfced30:1",
-    "fb7c023a8b9ccb3f:1",
-    "ce8ba1a563dcdaca:1",
-    "e20e36e16fcb0cc8:1",
-    "b3edc88f2938467e:1",
-    "c8e28b0b4002a3a0:1",
-    "c129715d7a2bc9a3:1",
-  ]);
-  testHash(t, " hello; \t\nworld!!!\r\n\n\r  \t\tGreetings\r End\r\n", [
+  await testHash(
+    t,
+    " hello; \t\r\nworld!!!\r\n\r\n\r\n  \t\tGreetings\r\n End\r\n",
+    [
+      "e9496ae3ebfced30:1",
+      "fb7c023a8b9ccb3f:1",
+      "ce8ba1a563dcdaca:1",
+      "e20e36e16fcb0cc8:1",
+      "b3edc88f2938467e:1",
+      "c8e28b0b4002a3a0:1",
+      "c129715d7a2bc9a3:1",
+    ]
+  );
+  await testHash(t, " hello; \t\nworld!!!\r\n\n\r  \t\tGreetings\r End\r\n", [
    "e9496ae3ebfced30:1",
    "fb7c023a8b9ccb3f:1",
    "ce8ba1a563dcdaca:1",
@ -78,7 +91,7 @@ test("hash", (t: ava.Assertions) => {
  ]);

  // Try repeating line that will generate identical hashes
-  testHash(t, "Lorem ipsum dolor sit amet.\n".repeat(10), [
+  await testHash(t, "Lorem ipsum dolor sit amet.\n".repeat(10), [
    "a7f2ff13bc495cf2:1",
    "a7f2ff13bc495cf2:2",
    "a7f2ff13bc495cf2:3",
@ -92,16 +105,20 @@ test("hash", (t: ava.Assertions) => {
    "c129715d7a2bc9a3:1",
  ]);

-  testHash(t, "x = 2\nx = 1\nprint(x)\nx = 3\nprint(x)\nx = 4\nprint(x)\n", [
-    "e54938cc54b302f1:1",
-    "bb609acbe9138d60:1",
-    "1131fd5871777f34:1",
-    "5c482a0f8b35ea28:1",
-    "54517377da7028d2:1",
-    "2c644846cb18d53e:1",
-    "f1b89f20de0d133:1",
-    "c129715d7a2bc9a3:1",
-  ]);
+  await testHash(
+    t,
+    "x = 2\nx = 1\nprint(x)\nx = 3\nprint(x)\nx = 4\nprint(x)\n",
+    [
+      "e54938cc54b302f1:1",
+      "bb609acbe9138d60:1",
+      "1131fd5871777f34:1",
+      "5c482a0f8b35ea28:1",
+      "54517377da7028d2:1",
+      "2c644846cb18d53e:1",
+      "f1b89f20de0d133:1",
+      "c129715d7a2bc9a3:1",
+    ]
+  );
 });

 function testResolveUriToFile(uri: any, index: any, artifactsURIs: any[]) {
@ -170,7 +187,7 @@ test("resolveUriToFile", (t) => {
  t.is(testResolveUriToFile(`file://${dirpath}`, undefined, []), undefined);
 });

-test("addFingerprints", (t) => {
+test("addFingerprints", async (t) => {
  // Run an end-to-end test on a test file
  let input = fs
    .readFileSync(`${__dirname}/../src/testdata/fingerprinting.input.sarif`)
@ -187,12 +204,16 @@ test("addFingerprints", (t) => {
  const checkoutPath = path.normalize(`${__dirname}/../src/testdata`);

  t.deepEqual(
-    fingerprints.addFingerprints(input, checkoutPath, getRunnerLogger(true)),
+    await fingerprints.addFingerprints(
+      input,
+      checkoutPath,
+      getRunnerLogger(true)
+    ),
    expected
  );
 });

-test("missingRegions", (t) => {
+test("missingRegions", async (t) => {
  // Run an end-to-end test on a test file
  let input = fs
    .readFileSync(`${__dirname}/../src/testdata/fingerprinting2.input.sarif`)
@ -209,7 +230,11 @@ test("missingRegions", (t) => {
  const checkoutPath = path.normalize(`${__dirname}/../src/testdata`);

  t.deepEqual(
-    fingerprints.addFingerprints(input, checkoutPath, getRunnerLogger(true)),
+    await fingerprints.addFingerprints(
+      input,
+      checkoutPath,
+      getRunnerLogger(true)
+    ),
    expected
  );
 });
--- a/src/fingerprints.ts
+++ b/src/fingerprints.ts
@ -8,6 +8,7 @@ const tab = "\t".charCodeAt(0);
 const space = " ".charCodeAt(0);
 const lf = "\n".charCodeAt(0);
 const cr = "\r".charCodeAt(0);
+const EOF = 65535;
 const BLOCK_SIZE = 100;
 const MOD = Long.fromInt(37); // L

@ -34,9 +35,9 @@ type hashCallback = (lineNumber: number, hash: string) => void;
 * the hashes of the lines near the end of the file.
 *
 * @param callback function that is called with the line number (1-based) and hash for every line
- * @param input The file's contents
+ * @param filepath The path to the file to hash
 */
-export function hash(callback: hashCallback, input: string) {
+export async function hash(callback: hashCallback, filepath: string) {
  // A rolling view in to the input
  const window = Array(BLOCK_SIZE).fill(0);

@ -87,12 +88,11 @@ export function hash(callback: hashCallback, input: string) {
  // as we go. Once we reach a point in the window again then we've processed
  // BLOCK_SIZE characters and if the last character at this point in the window
  // was the start of a line then we should output the hash for that line.
-  for (let i = 0, len = input.length; i <= len; i++) {
-    let current = i === len ? 65535 : input.charCodeAt(i);
+  const processCharacter = function (current: number) {
    // skip tabs, spaces, and line feeds that come directly after a carriage return
    if (current === space || current === tab || (prevCR && current === lf)) {
      prevCR = false;
-      continue;
+      return;
    }
    // replace CR with LF
    if (current === cr) {
@ -113,7 +113,15 @@ export function hash(callback: hashCallback, input: string) {
      lineStart = true;
    }
    updateHash(current);
+  };
+
+  const readStream = fs.createReadStream(filepath, "utf8");
+  for await (const data of readStream) {
+    for (let i = 0; i < data.length; ++i) {
+      processCharacter(data.charCodeAt(i));
+    }
  }
+  processCharacter(EOF);

  // Flush the remaining lines
  for (let i = 0; i < BLOCK_SIZE; i++) {
@ -237,11 +245,11 @@ export function resolveUriToFile(

 // Compute fingerprints for results in the given sarif file
 // and return an updated sarif file contents.
-export function addFingerprints(
+export async function addFingerprints(
  sarifContents: string,
  checkoutPath: string,
  logger: Logger
-): string {
+): Promise<string> {
  const sarif = JSON.parse(sarifContents);

  // Gather together results for the same file and construct
@ -263,6 +271,11 @@ export function addFingerprints(
        continue;
      }

+      if (primaryLocation?.physicalLocation?.region?.startLine === undefined) {
+        // Locations without a line number are unlikely to be source files
+        continue;
+      }
+
      const filepath = resolveUriToFile(
        primaryLocation.physicalLocation.artifactLocation,
        artifacts,
@ -289,8 +302,7 @@ export function addFingerprints(
        c(lineNumber, hashValue);
      }
    };
-    const fileContents = fs.readFileSync(filepath).toString();
-    hash(teeCallback, fileContents);
+    await hash(teeCallback, filepath);
  }

  return JSON.stringify(sarif);
--- a/src/testdata/fingerprinting2.expected.sarif
+++ b/src/testdata/fingerprinting2.expected.sarif
@ -30,9 +30,7 @@
                    "message": {
                        "text": "This header file should contain a header guard to prevent multiple inclusion."
                    },
-                    "partialFingerprints": {
-                        "primaryLocationLineHash": "599c824c91d0f75e:1"
-                    },
+                    "partialFingerprints": {},
                    "ruleId": "cpp/missing-header-guard",
                    "ruleIndex": 0
                }
--- a/src/upload-lib.ts
+++ b/src/upload-lib.ts
@ -356,7 +356,7 @@ async function uploadFiles(
  }

  let sarifPayload = combineSarifFiles(sarifFiles);
-  sarifPayload = fingerprints.addFingerprints(
+  sarifPayload = await fingerprints.addFingerprints(
    sarifPayload,
    checkoutPath,
    logger