Merge pull request #2514 from github/henrymercer/zstd-better-failure-logging

Capture stderr from extracting Zstandard bundles
2024-10-02 22:14:53 +01:00 · 2024-10-02 22:14:53 +01:00 · e85017e674
commit e85017e674
parent a60db1d7ff 28db28fc03
24 changed files with 487 additions and 263 deletions
--- a/src/actions-util.ts
+++ b/src/actions-util.ts
@ -480,3 +480,89 @@ export const getFileType = async (filePath: string): Promise<string> => {
 export function isSelfHostedRunner() {
  return process.env.RUNNER_ENVIRONMENT === "self-hosted";
 }
+
+export function prettyPrintInvocation(cmd: string, args: string[]): string {
+  return [cmd, ...args].map((x) => (x.includes(" ") ? `'${x}'` : x)).join(" ");
+}
+
+/**
+ * An error from a tool invocation, with associated exit code, stderr, etc.
+ */
+export class CommandInvocationError extends Error {
+  constructor(
+    public cmd: string,
+    public args: string[],
+    public exitCode: number,
+    public stderr: string,
+    public stdout: string,
+  ) {
+    const prettyCommand = prettyPrintInvocation(cmd, args);
+    const lastLine = ensureEndsInPeriod(
+      stderr.trim().split("\n").pop()?.trim() || "n/a",
+    );
+    super(
+      `Failed to run "${prettyCommand}". ` +
+        `Exit code was ${exitCode} and last log line was: ${lastLine} See the logs for more details.`,
+    );
+  }
+}
+
+export function ensureEndsInPeriod(text: string): string {
+  return text[text.length - 1] === "." ? text : `${text}.`;
+}
+
+/**
+ * A constant defining the maximum number of characters we will keep from
+ * the programs stderr for logging.
+ *
+ * This serves two purposes:
+ * 1. It avoids an OOM if a program fails in a way that results it
+ *    printing many log lines.
+ * 2. It avoids us hitting the limit of how much data we can send in our
+ *    status reports on GitHub.com.
+ */
+const MAX_STDERR_BUFFER_SIZE = 20000;
+
+/**
+ * Runs a CLI tool.
+ *
+ * @returns Standard output produced by the tool.
+ * @throws A `CommandInvocationError` if the tool exits with a non-zero status code.
+ */
+export async function runTool(
+  cmd: string,
+  args: string[] = [],
+  opts: { stdin?: string; noStreamStdout?: boolean } = {},
+): Promise<string> {
+  let stdout = "";
+  let stderr = "";
+  process.stdout.write(`[command]${cmd} ${args.join(" ")}\n`);
+  const exitCode = await new toolrunner.ToolRunner(cmd, args, {
+    ignoreReturnCode: true,
+    listeners: {
+      stdout: (data: Buffer) => {
+        stdout += data.toString("utf8");
+        if (!opts.noStreamStdout) {
+          process.stdout.write(data);
+        }
+      },
+      stderr: (data: Buffer) => {
+        let readStartIndex = 0;
+        // If the error is too large, then we only take the last MAX_STDERR_BUFFER_SIZE characters
+        if (data.length - MAX_STDERR_BUFFER_SIZE > 0) {
+          // Eg: if we have MAX_STDERR_BUFFER_SIZE the start index should be 2.
+          readStartIndex = data.length - MAX_STDERR_BUFFER_SIZE + 1;
+        }
+        stderr += data.toString("utf8", readStartIndex);
+        // Mimic the standard behavior of the toolrunner by writing stderr to stdout
+        process.stdout.write(data);
+      },
+    },
+    silent: true,
+    ...(opts.stdin ? { input: Buffer.from(opts.stdin || "") } : {}),
+  }).exec();
+  if (exitCode !== 0) {
+    throw new CommandInvocationError(cmd, args, exitCode, stderr, stdout);
+  }
+  return stdout;
+}
--- a/src/cli-errors.ts
+++ b/src/cli-errors.ts
@ -1,21 +1,20 @@
+import {
+  CommandInvocationError,
+  ensureEndsInPeriod,
+  prettyPrintInvocation,
+} from "./actions-util";
 import { DocUrl } from "./doc-url";
 import { ConfigurationError } from "./util";

 /**
- * A class of Error that we can classify as an error stemming from a CLI
- * invocation, with associated exit code, stderr,etc.
+ * An error from a CodeQL CLI invocation, with associated exit code, stderr, etc.
 */
-export class CommandInvocationError extends Error {
-  constructor(
-    cmd: string,
-    args: string[],
-    public exitCode: number,
-    public stderr: string,
-    public stdout: string,
-  ) {
-    const prettyCommand = [cmd, ...args]
-      .map((x) => (x.includes(" ") ? `'${x}'` : x))
-      .join(" ");
+export class CliError extends Error {
+  public readonly exitCode: number;
+  public readonly stderr: string;
+
+  constructor({ cmd, args, exitCode, stderr }: CommandInvocationError) {
+    const prettyCommand = prettyPrintInvocation(cmd, args);

    const fatalErrors = extractFatalErrors(stderr);
    const autobuildErrors = extractAutobuildErrors(stderr);
@ -42,6 +41,8 @@ export class CommandInvocationError extends Error {
    }

    super(message);
+    this.exitCode = exitCode;
+    this.stderr = stderr;
  }
 }

@ -115,10 +116,6 @@ function extractAutobuildErrors(error: string): string | undefined {
  return errorLines.join("\n") || undefined;
 }

-function ensureEndsInPeriod(text: string): string {
-  return text[text.length - 1] === "." ? text : `${text}.`;
-}
-
 /** Error messages from the CLI that we consider configuration errors and handle specially. */
 export enum CliConfigErrorCategory {
  AutobuildError = "AutobuildError",
@ -282,7 +279,7 @@ export const cliErrorsConfig: Record<
 * if not, return undefined.
 */
 export function getCliConfigCategoryIfExists(
-  cliError: CommandInvocationError,
+  cliError: CliError,
 ): CliConfigErrorCategory | undefined {
  for (const [category, configuration] of Object.entries(cliErrorsConfig)) {
    if (
@ -308,11 +305,7 @@ export function getCliConfigCategoryIfExists(
 * error message appended, if it exists in a known set of configuration errors. Otherwise,
 * simply returns the original error.
 */
-export function wrapCliConfigurationError(cliError: Error): Error {
-  if (!(cliError instanceof CommandInvocationError)) {
-    return cliError;
-  }
-
+export function wrapCliConfigurationError(cliError: CliError): Error {
  const cliConfigErrorCategory = getCliConfigCategoryIfExists(cliError);
  if (cliConfigErrorCategory === undefined) {
    return cliError;
--- a/src/codeql.test.ts
+++ b/src/codeql.test.ts
@ -12,7 +12,7 @@ import * as sinon from "sinon";

 import * as actionsUtil from "./actions-util";
 import { GitHubApiDetails } from "./api-client";
-import { CommandInvocationError } from "./cli-errors";
+import { CliError } from "./cli-errors";
 import * as codeql from "./codeql";
 import { AugmentationProperties, Config } from "./config-utils";
 import * as defaults from "./defaults.json";
@ -961,7 +961,7 @@ test("runTool recognizes fatal internal errors", async (t) => {
    async () =>
      await codeqlObject.databaseRunQueries(stubConfig.dbLocation, []),
    {
-      instanceOf: CommandInvocationError,
+      instanceOf: CliError,
      message: `Encountered a fatal error while running "codeql-for-testing database run-queries  --expect-discarded-cache --min-disk-free=1024 -v --intra-layer-parallelism". Exit code was 1 and error was: Oops! A fatal internal error occurred. Details:
    com.semmle.util.exception.CatastrophicError: An error occurred while evaluating ControlFlowGraph::ControlFlow::Root.isRootOf/1#dispred#f610e6ed/2@86282cc8
    Severe disk cache trouble (corruption or out of space) at /home/runner/work/_temp/codeql_databases/go/db-go/default/cache/pages/28/33.pack: Failed to write item to disk. See the logs for more details.`,
--- a/src/codeql.ts
+++ b/src/codeql.ts
@ -7,15 +7,14 @@ import * as yaml from "js-yaml";
 import * as semver from "semver";

 import {
+  CommandInvocationError,
  getActionVersion,
  getOptionalInput,
  isAnalyzingDefaultBranch,
+  runTool,
 } from "./actions-util";
 import * as api from "./api-client";
-import {
-  CommandInvocationError,
-  wrapCliConfigurationError,
-} from "./cli-errors";
+import { CliError, wrapCliConfigurationError } from "./cli-errors";
 import { type Config } from "./config-utils";
 import { DocUrl } from "./doc-url";
 import { EnvVar } from "./environment";
@ -544,7 +543,7 @@ export async function getCodeQLForCmd(
    async getVersion() {
      let result = util.getCachedCodeQlVersion();
      if (result === undefined) {
-        const output = await runTool(cmd, ["version", "--format=json"]);
+        const output = await runCli(cmd, ["version", "--format=json"]);
        try {
          result = JSON.parse(output) as VersionInfo;
        } catch {
@ -557,7 +556,7 @@ export async function getCodeQLForCmd(
      return result;
    },
    async printVersion() {
-      await runTool(cmd, ["version", "--format=json"]);
+      await runCli(cmd, ["version", "--format=json"]);
    },
    async supportsFeature(feature: ToolsFeature) {
      return isSupportedToolsFeature(await this.getVersion(), feature);
@ -627,7 +626,7 @@ export async function getCodeQLForCmd(
        ? "--force-overwrite"
        : "--overwrite";

-      await runTool(
+      await runCli(
        cmd,
        [
          "database",
@ -674,10 +673,10 @@ export async function getCodeQLForCmd(
      // When `DYLD_INSERT_LIBRARIES` is set in the environment for a step,
      // the Actions runtime introduces its own workaround for SIP
      // (https://github.com/actions/runner/pull/416).
-      await runTool(autobuildCmd);
+      await runCli(autobuildCmd);
    },
    async extractScannedLanguage(config: Config, language: Language) {
-      await runTool(cmd, [
+      await runCli(cmd, [
        "database",
        "trace-command",
        "--index-traceless-dbs",
@ -692,7 +691,7 @@ export async function getCodeQLForCmd(
        applyAutobuildAzurePipelinesTimeoutFix();
      }
      try {
-        await runTool(cmd, [
+        await runCli(cmd, [
          "database",
          "trace-command",
          "--use-build-mode",
@ -731,7 +730,7 @@ export async function getCodeQLForCmd(
        ...getExtraOptionsFromEnv(["database", "finalize"]),
        databasePath,
      ];
-      await runTool(cmd, args);
+      await runCli(cmd, args);
    },
    async resolveLanguages() {
      const codeqlArgs = [
@ -740,7 +739,7 @@ export async function getCodeQLForCmd(
        "--format=json",
        ...getExtraOptionsFromEnv(["resolve", "languages"]),
      ];
-      const output = await runTool(cmd, codeqlArgs);
+      const output = await runCli(cmd, codeqlArgs);

      try {
        return JSON.parse(output) as ResolveLanguagesOutput;
@ -759,7 +758,7 @@ export async function getCodeQLForCmd(
        ...(await getLanguageAliasingArguments(this)),
        ...getExtraOptionsFromEnv(["resolve", "languages"]),
      ];
-      const output = await runTool(cmd, codeqlArgs);
+      const output = await runCli(cmd, codeqlArgs);

      try {
        return JSON.parse(output) as BetterResolveLanguagesOutput;
@ -783,7 +782,7 @@ export async function getCodeQLForCmd(
      if (extraSearchPath !== undefined) {
        codeqlArgs.push("--additional-packs", extraSearchPath);
      }
-      const output = await runTool(cmd, codeqlArgs);
+      const output = await runCli(cmd, codeqlArgs);

      try {
        return JSON.parse(output) as ResolveQueriesOutput;
@ -805,7 +804,7 @@ export async function getCodeQLForCmd(
      if (workingDir !== undefined) {
        codeqlArgs.push("--working-dir", workingDir);
      }
-      const output = await runTool(cmd, codeqlArgs);
+      const output = await runCli(cmd, codeqlArgs);

      try {
        return JSON.parse(output) as ResolveBuildEnvironmentOutput;
@ -839,7 +838,7 @@ export async function getCodeQLForCmd(
      ) {
        codeqlArgs.push("--intra-layer-parallelism");
      }
-      await runTool(cmd, codeqlArgs);
+      await runCli(cmd, codeqlArgs);
    },
    async databaseInterpretResults(
      databasePath: string,
@ -911,7 +910,7 @@ export async function getCodeQLForCmd(
      }
      // Capture the stdout, which contains the analysis summary. Don't stream it to the Actions
      // logs to avoid printing it twice.
-      return await runTool(cmd, codeqlArgs, {
+      return await runCli(cmd, codeqlArgs, {
        noStreamStdout: true,
      });
    },
@ -922,7 +921,7 @@ export async function getCodeQLForCmd(
        ...getExtraOptionsFromEnv(["database", "print-baseline"]),
        databasePath,
      ];
-      return await runTool(cmd, codeqlArgs);
+      return await runCli(cmd, codeqlArgs);
    },

    /**
@ -956,7 +955,7 @@ export async function getCodeQLForCmd(
        ...packs,
      ];

-      const output = await runTool(cmd, codeqlArgs);
+      const output = await runCli(cmd, codeqlArgs);

      try {
        const parsedOutput: PackDownloadOutput = JSON.parse(output);
@ -994,7 +993,7 @@ export async function getCodeQLForCmd(
        `${cacheCleanupFlag}=${cleanupLevel}`,
        ...getExtraOptionsFromEnv(["database", "cleanup"]),
      ];
-      await runTool(cmd, codeqlArgs);
+      await runCli(cmd, codeqlArgs);
    },
    async databaseBundle(
      databasePath: string,
@ -1103,7 +1102,7 @@ export async function getCodeQLForCmd(
        args.push("--sarif-merge-runs-from-equal-category");
      }

-      await runTool(cmd, args);
+      await runCli(cmd, args);
    },
  };
  // To ensure that status reports include the CodeQL CLI version wherever
@ -1216,53 +1215,19 @@ export function getExtraOptions(
  return all.concat(specific);
 }

-/*
- * A constant defining the maximum number of characters we will keep from
- * the programs stderr for logging. This serves two purposes:
- * (1) It avoids an OOM if a program fails in a way that results it
- *     printing many log lines.
- * (2) It avoids us hitting the limit of how much data we can send in our
- *     status reports on GitHub.com.
- */
-const maxErrorSize = 20_000;
-
-async function runTool(
+async function runCli(
  cmd: string,
  args: string[] = [],
  opts: { stdin?: string; noStreamStdout?: boolean } = {},
-) {
-  let stdout = "";
-  let stderr = "";
-  process.stdout.write(`[command]${cmd} ${args.join(" ")}\n`);
-  const exitCode = await new toolrunner.ToolRunner(cmd, args, {
-    ignoreReturnCode: true,
-    listeners: {
-      stdout: (data: Buffer) => {
-        stdout += data.toString("utf8");
-        if (!opts.noStreamStdout) {
-          process.stdout.write(data);
-        }
-      },
-      stderr: (data: Buffer) => {
-        let readStartIndex = 0;
-        // If the error is too large, then we only take the last 20,000 characters
-        if (data.length - maxErrorSize > 0) {
-          // Eg: if we have 20,000 the start index should be 2.
-          readStartIndex = data.length - maxErrorSize + 1;
-        }
-        stderr += data.toString("utf8", readStartIndex);
-        // Mimic the standard behavior of the toolrunner by writing stderr to stdout
-        process.stdout.write(data);
-      },
-    },
-    silent: true,
-    ...(opts.stdin ? { input: Buffer.from(opts.stdin || "") } : {}),
-  }).exec();
-  if (exitCode !== 0) {
-    const e = new CommandInvocationError(cmd, args, exitCode, stderr, stdout);
-    throw wrapCliConfigurationError(e);
+): Promise<string> {
+  try {
+    return await runTool(cmd, args, opts);
+  } catch (e) {
+    if (e instanceof CommandInvocationError) {
+      throw wrapCliConfigurationError(new CliError(e));
+    }
+    throw e;
  }
-  return stdout;
 }

 /**
--- a/src/resolve-environment-action.ts
+++ b/src/resolve-environment-action.ts
@ -7,7 +7,7 @@ import {
  getTemporaryDirectory,
 } from "./actions-util";
 import { getGitHubVersion } from "./api-client";
-import { CommandInvocationError } from "./cli-errors";
+import { CliError } from "./cli-errors";
 import { Config, getConfig } from "./config-utils";
 import { getActionsLogger } from "./logging";
 import { runResolveBuildEnvironment } from "./resolve-environment";
@ -69,7 +69,7 @@ async function run() {
  } catch (unwrappedError) {
    const error = wrapError(unwrappedError);

-    if (error instanceof CommandInvocationError) {
+    if (error instanceof CliError) {
      // If the CLI failed to run successfully for whatever reason,
      // we just return an empty JSON object and proceed with the workflow.
      core.setOutput(ENVIRONMENT_OUTPUT_NAME, {});
--- a/src/setup-codeql.ts
+++ b/src/setup-codeql.ts
@ -4,12 +4,11 @@ import * as path from "path";
 import { performance } from "perf_hooks";

 import * as toolcache from "@actions/tool-cache";
-import del from "del";
 import { default as deepEqual } from "fast-deep-equal";
 import * as semver from "semver";
 import { v4 as uuidV4 } from "uuid";

-import { isRunningLocalAction } from "./actions-util";
+import { CommandInvocationError, isRunningLocalAction } from "./actions-util";
 import * as api from "./api-client";
 // Note: defaults.json is referenced from the CodeQL Action sync tool and the Actions runner image
 // creation scripts. Ensure that any changes to the format of this file are compatible with both of
@ -24,7 +23,7 @@ import {
 import { Logger } from "./logging";
 import * as tar from "./tar";
 import * as util from "./util";
-import { isGoodVersion } from "./util";
+import { cleanUpGlob, isGoodVersion } from "./util";

 export enum ToolsSource {
  Unknown = "UNKNOWN",
@ -497,6 +496,7 @@ export const downloadCodeQL = async function (
  maybeBundleVersion: string | undefined,
  maybeCliVersion: string | undefined,
  apiDetails: api.GitHubApiDetails,
+  tarVersion: tar.TarVersion | undefined,
  tempDir: string,
  logger: Logger,
 ): Promise<{
@ -549,17 +549,25 @@ export const downloadCodeQL = async function (
    `Finished downloading CodeQL bundle to ${archivedBundlePath} (${downloadDurationMs} ms).`,
  );

-  logger.debug("Extracting CodeQL bundle.");
-  const extractionStart = performance.now();
-  const extractedBundlePath = await tar.extract(
-    archivedBundlePath,
-    compressionMethod,
-  );
-  const extractionDurationMs = Math.round(performance.now() - extractionStart);
-  logger.debug(
-    `Finished extracting CodeQL bundle to ${extractedBundlePath} (${extractionDurationMs} ms).`,
-  );
-  await cleanUpGlob(archivedBundlePath, "CodeQL bundle archive", logger);
+  let extractedBundlePath: string;
+  let extractionDurationMs: number;
+
+  try {
+    logger.debug("Extracting CodeQL bundle.");
+    const extractionStart = performance.now();
+    extractedBundlePath = await tar.extract(
+      archivedBundlePath,
+      compressionMethod,
+      tarVersion,
+      logger,
+    );
+    extractionDurationMs = Math.round(performance.now() - extractionStart);
+    logger.debug(
+      `Finished extracting CodeQL bundle to ${extractedBundlePath} (${extractionDurationMs} ms).`,
+    );
+  } finally {
+    await cleanUpGlob(archivedBundlePath, "CodeQL bundle archive", logger);
+  }

  const bundleVersion =
    maybeBundleVersion ?? tryGetBundleVersionFromUrl(codeqlURL, logger);
@ -700,6 +708,10 @@ export async function setupCodeQLBundle(
      );
    } catch (e) {
      zstdFailureReason = util.getErrorMessage(e) || "unknown error";
+      if (e instanceof CommandInvocationError) {
+        zstdFailureReason += ` Full error: ${e.stderr}`;
+        logger.debug(`Invocation output the following to stderr: ${e.stderr}`);
+      }
      logger.warning(
        `Failed to set up CodeQL tools with zstd. Falling back to gzipped version. Error: ${util.getErrorMessage(
          e,
@ -755,7 +767,12 @@ async function setupCodeQLBundleWithCompressionMethod(
      const compressionMethod = tar.inferCompressionMethod(
        source.codeqlTarPath,
      );
-      codeqlFolder = await tar.extract(source.codeqlTarPath, compressionMethod);
+      codeqlFolder = await tar.extract(
+        source.codeqlTarPath,
+        compressionMethod,
+        zstdAvailability.version,
+        logger,
+      );
      toolsSource = ToolsSource.Local;
      break;
    }
@ -770,6 +787,7 @@ async function setupCodeQLBundleWithCompressionMethod(
        source.bundleVersion,
        source.cliVersion,
        apiDetails,
+        zstdAvailability.version,
        tempDir,
        logger,
      );
@ -791,24 +809,6 @@ async function setupCodeQLBundleWithCompressionMethod(
  };
 }

-async function cleanUpGlob(glob: string, name: string, logger: Logger) {
-  logger.debug(`Cleaning up ${name}.`);
-  try {
-    const deletedPaths = await del(glob, { force: true });
-    if (deletedPaths.length === 0) {
-      logger.warning(
-        `Failed to clean up ${name}: no files found matching ${glob}.`,
-      );
-    } else if (deletedPaths.length === 1) {
-      logger.debug(`Cleaned up ${name}.`);
-    } else {
-      logger.debug(`Cleaned up ${name} (${deletedPaths.length} files).`);
-    }
-  } catch (e) {
-    logger.warning(`Failed to clean up ${name}: ${e}.`);
-  }
-}
-
 function sanitizeUrlForStatusReport(url: string): string {
  return ["github/codeql-action", "dsp-testing/codeql-cli-nightlies"].some(
    (repo) => url.startsWith(`https://github.com/${repo}/releases/download/`),
--- a/src/tar.ts
+++ b/src/tar.ts
@ -1,9 +1,14 @@
+import * as fs from "fs";
+import path from "path";
+
 import { ToolRunner } from "@actions/exec/lib/toolrunner";
 import * as toolcache from "@actions/tool-cache";
 import { safeWhich } from "@chrisgavin/safe-which";
+import { v4 as uuidV4 } from "uuid";

+import { getTemporaryDirectory, runTool } from "./actions-util";
 import { Logger } from "./logging";
-import { assertNever } from "./util";
+import { assertNever, cleanUpGlob } from "./util";

 const MIN_REQUIRED_BSD_TAR_VERSION = "3.4.3";
 const MIN_REQUIRED_GNU_TAR_VERSION = "1.31";
@ -84,24 +89,84 @@ export async function isZstdAvailable(
 export type CompressionMethod = "gzip" | "zstd";

 export async function extract(
-  path: string,
+  tarPath: string,
  compressionMethod: CompressionMethod,
+  tarVersion: TarVersion | undefined,
+  logger: Logger,
 ): Promise<string> {
  switch (compressionMethod) {
    case "gzip":
-      // While we could also ask tar to autodetect the compression method,
-      // we defensively keep the gzip call identical as requesting a gzipped
-      // bundle will soon be a fallback option.
-      return await toolcache.extractTar(path);
+      // Defensively continue to call the toolcache API as requesting a gzipped
+      // bundle may be a fallback option.
+      return await toolcache.extractTar(tarPath);
    case "zstd":
-      // By specifying only the "x" flag, we ask tar to autodetect the
-      // compression method.
-      return await toolcache.extractTar(path, undefined, "x");
+      if (!tarVersion) {
+        throw new Error(
+          "Could not determine tar version, which is required to extract a Zstandard archive.",
+        );
+      }
+      return await extractTarZst(tarPath, tarVersion, logger);
  }
 }

-export function inferCompressionMethod(path: string): CompressionMethod {
-  if (path.endsWith(".tar.gz")) {
+/**
+ * Extract a compressed tar archive
+ *
+ * @param file     path to the tar
+ * @param dest     destination directory. Optional.
+ * @returns        path to the destination directory
+ */
+export async function extractTarZst(
+  file: string,
+  tarVersion: TarVersion,
+  logger: Logger,
+): Promise<string> {
+  if (!file) {
+    throw new Error("parameter 'file' is required");
+  }
+
+  // Create dest
+  const dest = await createExtractFolder();
+
+  try {
+    // Initialize args
+    const args = ["-x", "-v"];
+
+    let destArg = dest;
+    let fileArg = file;
+    if (process.platform === "win32" && tarVersion.type === "gnu") {
+      args.push("--force-local");
+      destArg = dest.replace(/\\/g, "/");
+
+      // Technically only the dest needs to have `/` but for aesthetic consistency
+      // convert slashes in the file arg too.
+      fileArg = file.replace(/\\/g, "/");
+    }
+
+    if (tarVersion.type === "gnu") {
+      // Suppress warnings when using GNU tar to extract archives created by BSD tar
+      args.push("--warning=no-unknown-keyword");
+      args.push("--overwrite");
+    }
+
+    args.push("-C", destArg, "-f", fileArg);
+    await runTool(`tar`, args);
+  } catch (e) {
+    await cleanUpGlob(dest, "extraction destination directory", logger);
+    throw e;
+  }
+
+  return dest;
+}
+
+async function createExtractFolder(): Promise<string> {
+  const dest = path.join(getTemporaryDirectory(), uuidV4());
+  fs.mkdirSync(dest, { recursive: true });
+  return dest;
+}
+
+export function inferCompressionMethod(tarPath: string): CompressionMethod {
+  if (tarPath.endsWith(".tar.gz")) {
    return "gzip";
  }
  return "zstd";
--- a/src/util.ts
+++ b/src/util.ts
@ -1161,3 +1161,21 @@ export async function checkSipEnablement(
    return undefined;
  }
 }
+
+export async function cleanUpGlob(glob: string, name: string, logger: Logger) {
+  logger.debug(`Cleaning up ${name}.`);
+  try {
+    const deletedPaths = await del(glob, { force: true });
+    if (deletedPaths.length === 0) {
+      logger.warning(
+        `Failed to clean up ${name}: no files found matching ${glob}.`,
+      );
+    } else if (deletedPaths.length === 1) {
+      logger.debug(`Cleaned up ${name}.`);
+    } else {
+      logger.debug(`Cleaned up ${name} (${deletedPaths.length} files).`);
+    }
+  } catch (e) {
+    logger.warning(`Failed to clean up ${name}: ${e}.`);
+  }
+}