Merge pull request #2514 from github/henrymercer/zstd-better-failure-logging

Capture stderr from extracting Zstandard bundles
This commit is contained in:
Henry Mercer 2024-10-02 22:14:53 +01:00 committed by GitHub
commit e85017e674
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
24 changed files with 487 additions and 263 deletions

View file

@ -480,3 +480,89 @@ export const getFileType = async (filePath: string): Promise<string> => {
export function isSelfHostedRunner() {
return process.env.RUNNER_ENVIRONMENT === "self-hosted";
}
export function prettyPrintInvocation(cmd: string, args: string[]): string {
return [cmd, ...args].map((x) => (x.includes(" ") ? `'${x}'` : x)).join(" ");
}
/**
* An error from a tool invocation, with associated exit code, stderr, etc.
*/
export class CommandInvocationError extends Error {
constructor(
public cmd: string,
public args: string[],
public exitCode: number,
public stderr: string,
public stdout: string,
) {
const prettyCommand = prettyPrintInvocation(cmd, args);
const lastLine = ensureEndsInPeriod(
stderr.trim().split("\n").pop()?.trim() || "n/a",
);
super(
`Failed to run "${prettyCommand}". ` +
`Exit code was ${exitCode} and last log line was: ${lastLine} See the logs for more details.`,
);
}
}
export function ensureEndsInPeriod(text: string): string {
return text[text.length - 1] === "." ? text : `${text}.`;
}
/**
* A constant defining the maximum number of characters we will keep from
* the programs stderr for logging.
*
* This serves two purposes:
* 1. It avoids an OOM if a program fails in a way that results it
* printing many log lines.
* 2. It avoids us hitting the limit of how much data we can send in our
* status reports on GitHub.com.
*/
const MAX_STDERR_BUFFER_SIZE = 20000;
/**
* Runs a CLI tool.
*
* @returns Standard output produced by the tool.
* @throws A `CommandInvocationError` if the tool exits with a non-zero status code.
*/
export async function runTool(
cmd: string,
args: string[] = [],
opts: { stdin?: string; noStreamStdout?: boolean } = {},
): Promise<string> {
let stdout = "";
let stderr = "";
process.stdout.write(`[command]${cmd} ${args.join(" ")}\n`);
const exitCode = await new toolrunner.ToolRunner(cmd, args, {
ignoreReturnCode: true,
listeners: {
stdout: (data: Buffer) => {
stdout += data.toString("utf8");
if (!opts.noStreamStdout) {
process.stdout.write(data);
}
},
stderr: (data: Buffer) => {
let readStartIndex = 0;
// If the error is too large, then we only take the last MAX_STDERR_BUFFER_SIZE characters
if (data.length - MAX_STDERR_BUFFER_SIZE > 0) {
// Eg: if we have MAX_STDERR_BUFFER_SIZE the start index should be 2.
readStartIndex = data.length - MAX_STDERR_BUFFER_SIZE + 1;
}
stderr += data.toString("utf8", readStartIndex);
// Mimic the standard behavior of the toolrunner by writing stderr to stdout
process.stdout.write(data);
},
},
silent: true,
...(opts.stdin ? { input: Buffer.from(opts.stdin || "") } : {}),
}).exec();
if (exitCode !== 0) {
throw new CommandInvocationError(cmd, args, exitCode, stderr, stdout);
}
return stdout;
}

View file

@ -1,21 +1,20 @@
import {
CommandInvocationError,
ensureEndsInPeriod,
prettyPrintInvocation,
} from "./actions-util";
import { DocUrl } from "./doc-url";
import { ConfigurationError } from "./util";
/**
* A class of Error that we can classify as an error stemming from a CLI
* invocation, with associated exit code, stderr,etc.
* An error from a CodeQL CLI invocation, with associated exit code, stderr, etc.
*/
export class CommandInvocationError extends Error {
constructor(
cmd: string,
args: string[],
public exitCode: number,
public stderr: string,
public stdout: string,
) {
const prettyCommand = [cmd, ...args]
.map((x) => (x.includes(" ") ? `'${x}'` : x))
.join(" ");
export class CliError extends Error {
public readonly exitCode: number;
public readonly stderr: string;
constructor({ cmd, args, exitCode, stderr }: CommandInvocationError) {
const prettyCommand = prettyPrintInvocation(cmd, args);
const fatalErrors = extractFatalErrors(stderr);
const autobuildErrors = extractAutobuildErrors(stderr);
@ -42,6 +41,8 @@ export class CommandInvocationError extends Error {
}
super(message);
this.exitCode = exitCode;
this.stderr = stderr;
}
}
@ -115,10 +116,6 @@ function extractAutobuildErrors(error: string): string | undefined {
return errorLines.join("\n") || undefined;
}
function ensureEndsInPeriod(text: string): string {
return text[text.length - 1] === "." ? text : `${text}.`;
}
/** Error messages from the CLI that we consider configuration errors and handle specially. */
export enum CliConfigErrorCategory {
AutobuildError = "AutobuildError",
@ -282,7 +279,7 @@ export const cliErrorsConfig: Record<
* if not, return undefined.
*/
export function getCliConfigCategoryIfExists(
cliError: CommandInvocationError,
cliError: CliError,
): CliConfigErrorCategory | undefined {
for (const [category, configuration] of Object.entries(cliErrorsConfig)) {
if (
@ -308,11 +305,7 @@ export function getCliConfigCategoryIfExists(
* error message appended, if it exists in a known set of configuration errors. Otherwise,
* simply returns the original error.
*/
export function wrapCliConfigurationError(cliError: Error): Error {
if (!(cliError instanceof CommandInvocationError)) {
return cliError;
}
export function wrapCliConfigurationError(cliError: CliError): Error {
const cliConfigErrorCategory = getCliConfigCategoryIfExists(cliError);
if (cliConfigErrorCategory === undefined) {
return cliError;

View file

@ -12,7 +12,7 @@ import * as sinon from "sinon";
import * as actionsUtil from "./actions-util";
import { GitHubApiDetails } from "./api-client";
import { CommandInvocationError } from "./cli-errors";
import { CliError } from "./cli-errors";
import * as codeql from "./codeql";
import { AugmentationProperties, Config } from "./config-utils";
import * as defaults from "./defaults.json";
@ -961,7 +961,7 @@ test("runTool recognizes fatal internal errors", async (t) => {
async () =>
await codeqlObject.databaseRunQueries(stubConfig.dbLocation, []),
{
instanceOf: CommandInvocationError,
instanceOf: CliError,
message: `Encountered a fatal error while running "codeql-for-testing database run-queries --expect-discarded-cache --min-disk-free=1024 -v --intra-layer-parallelism". Exit code was 1 and error was: Oops! A fatal internal error occurred. Details:
com.semmle.util.exception.CatastrophicError: An error occurred while evaluating ControlFlowGraph::ControlFlow::Root.isRootOf/1#dispred#f610e6ed/2@86282cc8
Severe disk cache trouble (corruption or out of space) at /home/runner/work/_temp/codeql_databases/go/db-go/default/cache/pages/28/33.pack: Failed to write item to disk. See the logs for more details.`,

View file

@ -7,15 +7,14 @@ import * as yaml from "js-yaml";
import * as semver from "semver";
import {
CommandInvocationError,
getActionVersion,
getOptionalInput,
isAnalyzingDefaultBranch,
runTool,
} from "./actions-util";
import * as api from "./api-client";
import {
CommandInvocationError,
wrapCliConfigurationError,
} from "./cli-errors";
import { CliError, wrapCliConfigurationError } from "./cli-errors";
import { type Config } from "./config-utils";
import { DocUrl } from "./doc-url";
import { EnvVar } from "./environment";
@ -544,7 +543,7 @@ export async function getCodeQLForCmd(
async getVersion() {
let result = util.getCachedCodeQlVersion();
if (result === undefined) {
const output = await runTool(cmd, ["version", "--format=json"]);
const output = await runCli(cmd, ["version", "--format=json"]);
try {
result = JSON.parse(output) as VersionInfo;
} catch {
@ -557,7 +556,7 @@ export async function getCodeQLForCmd(
return result;
},
async printVersion() {
await runTool(cmd, ["version", "--format=json"]);
await runCli(cmd, ["version", "--format=json"]);
},
async supportsFeature(feature: ToolsFeature) {
return isSupportedToolsFeature(await this.getVersion(), feature);
@ -627,7 +626,7 @@ export async function getCodeQLForCmd(
? "--force-overwrite"
: "--overwrite";
await runTool(
await runCli(
cmd,
[
"database",
@ -674,10 +673,10 @@ export async function getCodeQLForCmd(
// When `DYLD_INSERT_LIBRARIES` is set in the environment for a step,
// the Actions runtime introduces its own workaround for SIP
// (https://github.com/actions/runner/pull/416).
await runTool(autobuildCmd);
await runCli(autobuildCmd);
},
async extractScannedLanguage(config: Config, language: Language) {
await runTool(cmd, [
await runCli(cmd, [
"database",
"trace-command",
"--index-traceless-dbs",
@ -692,7 +691,7 @@ export async function getCodeQLForCmd(
applyAutobuildAzurePipelinesTimeoutFix();
}
try {
await runTool(cmd, [
await runCli(cmd, [
"database",
"trace-command",
"--use-build-mode",
@ -731,7 +730,7 @@ export async function getCodeQLForCmd(
...getExtraOptionsFromEnv(["database", "finalize"]),
databasePath,
];
await runTool(cmd, args);
await runCli(cmd, args);
},
async resolveLanguages() {
const codeqlArgs = [
@ -740,7 +739,7 @@ export async function getCodeQLForCmd(
"--format=json",
...getExtraOptionsFromEnv(["resolve", "languages"]),
];
const output = await runTool(cmd, codeqlArgs);
const output = await runCli(cmd, codeqlArgs);
try {
return JSON.parse(output) as ResolveLanguagesOutput;
@ -759,7 +758,7 @@ export async function getCodeQLForCmd(
...(await getLanguageAliasingArguments(this)),
...getExtraOptionsFromEnv(["resolve", "languages"]),
];
const output = await runTool(cmd, codeqlArgs);
const output = await runCli(cmd, codeqlArgs);
try {
return JSON.parse(output) as BetterResolveLanguagesOutput;
@ -783,7 +782,7 @@ export async function getCodeQLForCmd(
if (extraSearchPath !== undefined) {
codeqlArgs.push("--additional-packs", extraSearchPath);
}
const output = await runTool(cmd, codeqlArgs);
const output = await runCli(cmd, codeqlArgs);
try {
return JSON.parse(output) as ResolveQueriesOutput;
@ -805,7 +804,7 @@ export async function getCodeQLForCmd(
if (workingDir !== undefined) {
codeqlArgs.push("--working-dir", workingDir);
}
const output = await runTool(cmd, codeqlArgs);
const output = await runCli(cmd, codeqlArgs);
try {
return JSON.parse(output) as ResolveBuildEnvironmentOutput;
@ -839,7 +838,7 @@ export async function getCodeQLForCmd(
) {
codeqlArgs.push("--intra-layer-parallelism");
}
await runTool(cmd, codeqlArgs);
await runCli(cmd, codeqlArgs);
},
async databaseInterpretResults(
databasePath: string,
@ -911,7 +910,7 @@ export async function getCodeQLForCmd(
}
// Capture the stdout, which contains the analysis summary. Don't stream it to the Actions
// logs to avoid printing it twice.
return await runTool(cmd, codeqlArgs, {
return await runCli(cmd, codeqlArgs, {
noStreamStdout: true,
});
},
@ -922,7 +921,7 @@ export async function getCodeQLForCmd(
...getExtraOptionsFromEnv(["database", "print-baseline"]),
databasePath,
];
return await runTool(cmd, codeqlArgs);
return await runCli(cmd, codeqlArgs);
},
/**
@ -956,7 +955,7 @@ export async function getCodeQLForCmd(
...packs,
];
const output = await runTool(cmd, codeqlArgs);
const output = await runCli(cmd, codeqlArgs);
try {
const parsedOutput: PackDownloadOutput = JSON.parse(output);
@ -994,7 +993,7 @@ export async function getCodeQLForCmd(
`${cacheCleanupFlag}=${cleanupLevel}`,
...getExtraOptionsFromEnv(["database", "cleanup"]),
];
await runTool(cmd, codeqlArgs);
await runCli(cmd, codeqlArgs);
},
async databaseBundle(
databasePath: string,
@ -1103,7 +1102,7 @@ export async function getCodeQLForCmd(
args.push("--sarif-merge-runs-from-equal-category");
}
await runTool(cmd, args);
await runCli(cmd, args);
},
};
// To ensure that status reports include the CodeQL CLI version wherever
@ -1216,53 +1215,19 @@ export function getExtraOptions(
return all.concat(specific);
}
/*
* A constant defining the maximum number of characters we will keep from
* the programs stderr for logging. This serves two purposes:
* (1) It avoids an OOM if a program fails in a way that results it
* printing many log lines.
* (2) It avoids us hitting the limit of how much data we can send in our
* status reports on GitHub.com.
*/
const maxErrorSize = 20_000;
async function runTool(
async function runCli(
cmd: string,
args: string[] = [],
opts: { stdin?: string; noStreamStdout?: boolean } = {},
) {
let stdout = "";
let stderr = "";
process.stdout.write(`[command]${cmd} ${args.join(" ")}\n`);
const exitCode = await new toolrunner.ToolRunner(cmd, args, {
ignoreReturnCode: true,
listeners: {
stdout: (data: Buffer) => {
stdout += data.toString("utf8");
if (!opts.noStreamStdout) {
process.stdout.write(data);
}
},
stderr: (data: Buffer) => {
let readStartIndex = 0;
// If the error is too large, then we only take the last 20,000 characters
if (data.length - maxErrorSize > 0) {
// Eg: if we have 20,000 the start index should be 2.
readStartIndex = data.length - maxErrorSize + 1;
}
stderr += data.toString("utf8", readStartIndex);
// Mimic the standard behavior of the toolrunner by writing stderr to stdout
process.stdout.write(data);
},
},
silent: true,
...(opts.stdin ? { input: Buffer.from(opts.stdin || "") } : {}),
}).exec();
if (exitCode !== 0) {
const e = new CommandInvocationError(cmd, args, exitCode, stderr, stdout);
throw wrapCliConfigurationError(e);
): Promise<string> {
try {
return await runTool(cmd, args, opts);
} catch (e) {
if (e instanceof CommandInvocationError) {
throw wrapCliConfigurationError(new CliError(e));
}
throw e;
}
return stdout;
}
/**

View file

@ -7,7 +7,7 @@ import {
getTemporaryDirectory,
} from "./actions-util";
import { getGitHubVersion } from "./api-client";
import { CommandInvocationError } from "./cli-errors";
import { CliError } from "./cli-errors";
import { Config, getConfig } from "./config-utils";
import { getActionsLogger } from "./logging";
import { runResolveBuildEnvironment } from "./resolve-environment";
@ -69,7 +69,7 @@ async function run() {
} catch (unwrappedError) {
const error = wrapError(unwrappedError);
if (error instanceof CommandInvocationError) {
if (error instanceof CliError) {
// If the CLI failed to run successfully for whatever reason,
// we just return an empty JSON object and proceed with the workflow.
core.setOutput(ENVIRONMENT_OUTPUT_NAME, {});

View file

@ -4,12 +4,11 @@ import * as path from "path";
import { performance } from "perf_hooks";
import * as toolcache from "@actions/tool-cache";
import del from "del";
import { default as deepEqual } from "fast-deep-equal";
import * as semver from "semver";
import { v4 as uuidV4 } from "uuid";
import { isRunningLocalAction } from "./actions-util";
import { CommandInvocationError, isRunningLocalAction } from "./actions-util";
import * as api from "./api-client";
// Note: defaults.json is referenced from the CodeQL Action sync tool and the Actions runner image
// creation scripts. Ensure that any changes to the format of this file are compatible with both of
@ -24,7 +23,7 @@ import {
import { Logger } from "./logging";
import * as tar from "./tar";
import * as util from "./util";
import { isGoodVersion } from "./util";
import { cleanUpGlob, isGoodVersion } from "./util";
export enum ToolsSource {
Unknown = "UNKNOWN",
@ -497,6 +496,7 @@ export const downloadCodeQL = async function (
maybeBundleVersion: string | undefined,
maybeCliVersion: string | undefined,
apiDetails: api.GitHubApiDetails,
tarVersion: tar.TarVersion | undefined,
tempDir: string,
logger: Logger,
): Promise<{
@ -549,17 +549,25 @@ export const downloadCodeQL = async function (
`Finished downloading CodeQL bundle to ${archivedBundlePath} (${downloadDurationMs} ms).`,
);
logger.debug("Extracting CodeQL bundle.");
const extractionStart = performance.now();
const extractedBundlePath = await tar.extract(
archivedBundlePath,
compressionMethod,
);
const extractionDurationMs = Math.round(performance.now() - extractionStart);
logger.debug(
`Finished extracting CodeQL bundle to ${extractedBundlePath} (${extractionDurationMs} ms).`,
);
await cleanUpGlob(archivedBundlePath, "CodeQL bundle archive", logger);
let extractedBundlePath: string;
let extractionDurationMs: number;
try {
logger.debug("Extracting CodeQL bundle.");
const extractionStart = performance.now();
extractedBundlePath = await tar.extract(
archivedBundlePath,
compressionMethod,
tarVersion,
logger,
);
extractionDurationMs = Math.round(performance.now() - extractionStart);
logger.debug(
`Finished extracting CodeQL bundle to ${extractedBundlePath} (${extractionDurationMs} ms).`,
);
} finally {
await cleanUpGlob(archivedBundlePath, "CodeQL bundle archive", logger);
}
const bundleVersion =
maybeBundleVersion ?? tryGetBundleVersionFromUrl(codeqlURL, logger);
@ -700,6 +708,10 @@ export async function setupCodeQLBundle(
);
} catch (e) {
zstdFailureReason = util.getErrorMessage(e) || "unknown error";
if (e instanceof CommandInvocationError) {
zstdFailureReason += ` Full error: ${e.stderr}`;
logger.debug(`Invocation output the following to stderr: ${e.stderr}`);
}
logger.warning(
`Failed to set up CodeQL tools with zstd. Falling back to gzipped version. Error: ${util.getErrorMessage(
e,
@ -755,7 +767,12 @@ async function setupCodeQLBundleWithCompressionMethod(
const compressionMethod = tar.inferCompressionMethod(
source.codeqlTarPath,
);
codeqlFolder = await tar.extract(source.codeqlTarPath, compressionMethod);
codeqlFolder = await tar.extract(
source.codeqlTarPath,
compressionMethod,
zstdAvailability.version,
logger,
);
toolsSource = ToolsSource.Local;
break;
}
@ -770,6 +787,7 @@ async function setupCodeQLBundleWithCompressionMethod(
source.bundleVersion,
source.cliVersion,
apiDetails,
zstdAvailability.version,
tempDir,
logger,
);
@ -791,24 +809,6 @@ async function setupCodeQLBundleWithCompressionMethod(
};
}
async function cleanUpGlob(glob: string, name: string, logger: Logger) {
logger.debug(`Cleaning up ${name}.`);
try {
const deletedPaths = await del(glob, { force: true });
if (deletedPaths.length === 0) {
logger.warning(
`Failed to clean up ${name}: no files found matching ${glob}.`,
);
} else if (deletedPaths.length === 1) {
logger.debug(`Cleaned up ${name}.`);
} else {
logger.debug(`Cleaned up ${name} (${deletedPaths.length} files).`);
}
} catch (e) {
logger.warning(`Failed to clean up ${name}: ${e}.`);
}
}
function sanitizeUrlForStatusReport(url: string): string {
return ["github/codeql-action", "dsp-testing/codeql-cli-nightlies"].some(
(repo) => url.startsWith(`https://github.com/${repo}/releases/download/`),

View file

@ -1,9 +1,14 @@
import * as fs from "fs";
import path from "path";
import { ToolRunner } from "@actions/exec/lib/toolrunner";
import * as toolcache from "@actions/tool-cache";
import { safeWhich } from "@chrisgavin/safe-which";
import { v4 as uuidV4 } from "uuid";
import { getTemporaryDirectory, runTool } from "./actions-util";
import { Logger } from "./logging";
import { assertNever } from "./util";
import { assertNever, cleanUpGlob } from "./util";
const MIN_REQUIRED_BSD_TAR_VERSION = "3.4.3";
const MIN_REQUIRED_GNU_TAR_VERSION = "1.31";
@ -84,24 +89,84 @@ export async function isZstdAvailable(
export type CompressionMethod = "gzip" | "zstd";
export async function extract(
path: string,
tarPath: string,
compressionMethod: CompressionMethod,
tarVersion: TarVersion | undefined,
logger: Logger,
): Promise<string> {
switch (compressionMethod) {
case "gzip":
// While we could also ask tar to autodetect the compression method,
// we defensively keep the gzip call identical as requesting a gzipped
// bundle will soon be a fallback option.
return await toolcache.extractTar(path);
// Defensively continue to call the toolcache API as requesting a gzipped
// bundle may be a fallback option.
return await toolcache.extractTar(tarPath);
case "zstd":
// By specifying only the "x" flag, we ask tar to autodetect the
// compression method.
return await toolcache.extractTar(path, undefined, "x");
if (!tarVersion) {
throw new Error(
"Could not determine tar version, which is required to extract a Zstandard archive.",
);
}
return await extractTarZst(tarPath, tarVersion, logger);
}
}
export function inferCompressionMethod(path: string): CompressionMethod {
if (path.endsWith(".tar.gz")) {
/**
* Extract a compressed tar archive
*
* @param file path to the tar
* @param dest destination directory. Optional.
* @returns path to the destination directory
*/
export async function extractTarZst(
file: string,
tarVersion: TarVersion,
logger: Logger,
): Promise<string> {
if (!file) {
throw new Error("parameter 'file' is required");
}
// Create dest
const dest = await createExtractFolder();
try {
// Initialize args
const args = ["-x", "-v"];
let destArg = dest;
let fileArg = file;
if (process.platform === "win32" && tarVersion.type === "gnu") {
args.push("--force-local");
destArg = dest.replace(/\\/g, "/");
// Technically only the dest needs to have `/` but for aesthetic consistency
// convert slashes in the file arg too.
fileArg = file.replace(/\\/g, "/");
}
if (tarVersion.type === "gnu") {
// Suppress warnings when using GNU tar to extract archives created by BSD tar
args.push("--warning=no-unknown-keyword");
args.push("--overwrite");
}
args.push("-C", destArg, "-f", fileArg);
await runTool(`tar`, args);
} catch (e) {
await cleanUpGlob(dest, "extraction destination directory", logger);
throw e;
}
return dest;
}
async function createExtractFolder(): Promise<string> {
const dest = path.join(getTemporaryDirectory(), uuidV4());
fs.mkdirSync(dest, { recursive: true });
return dest;
}
export function inferCompressionMethod(tarPath: string): CompressionMethod {
if (tarPath.endsWith(".tar.gz")) {
return "gzip";
}
return "zstd";

View file

@ -1161,3 +1161,21 @@ export async function checkSipEnablement(
return undefined;
}
}
export async function cleanUpGlob(glob: string, name: string, logger: Logger) {
logger.debug(`Cleaning up ${name}.`);
try {
const deletedPaths = await del(glob, { force: true });
if (deletedPaths.length === 0) {
logger.warning(
`Failed to clean up ${name}: no files found matching ${glob}.`,
);
} else if (deletedPaths.length === 1) {
logger.debug(`Cleaned up ${name}.`);
} else {
logger.debug(`Cleaned up ${name} (${deletedPaths.length} files).`);
}
} catch (e) {
logger.warning(`Failed to clean up ${name}: ${e}.`);
}
}