split up builtin and custom queries

This commit is contained in:
Robert Brignull 2020-09-10 18:02:33 +01:00
parent 75af0bf309
commit 0539269665
12 changed files with 322 additions and 117 deletions

65
src/analyze.test.ts Normal file
View file

@ -0,0 +1,65 @@
import test from 'ava';
import * as fs from 'fs';
import { runQueries } from './analyze';
import { setCodeQL } from './codeql';
import { Config } from './config-utils';
import { Language } from './languages';
import { getRunnerLogger } from './logging';
import { setupTests } from './testing-utils';
import * as util from './util';
setupTests(test);
// Checks that the duration fields are populated for the correct language
// and correct case of builtin or custom.
test('status report fields', async t => {
return await util.withTmpDir(async tmpDir => {
setCodeQL({
databaseAnalyze: async () => undefined
});
const memoryFlag = '';
const threadsFlag = '';
for (const language of Object.values(Language)) {
const config: Config = {
languages: [language],
queries: {},
pathsIgnore: [],
paths: [],
originalUserInput: {},
tempDir: tmpDir,
toolCacheDir: tmpDir,
codeQLCmd: '',
};
fs.mkdirSync(util.getCodeQLDatabasePath(config.tempDir, language), { recursive: true });
config.queries[language] = {
builtin: ['foo.ql'],
custom: [],
};
const builtinStatusReport = await runQueries(
tmpDir,
memoryFlag,
threadsFlag,
config,
getRunnerLogger(true));
t.deepEqual(Object.keys(builtinStatusReport).length, 1);
t.true(`analyze_builtin_queries_${language}_duration_ms` in builtinStatusReport);
config.queries[language] = {
builtin: [],
custom: ['foo.ql'],
};
const customStatusReport = await runQueries(
tmpDir,
memoryFlag,
threadsFlag,
config,
getRunnerLogger(true));
t.deepEqual(Object.keys(customStatusReport).length, 1);
t.true(`analyze_custom_queries_${language}_duration_ms` in customStatusReport);
}
});
});

View file

@ -75,47 +75,58 @@ async function finalizeDatabaseCreation(
}
// Runs queries and creates sarif files in the given folder
async function runQueries(
export async function runQueries(
sarifFolder: string,
memoryFlag: string,
threadsFlag: string,
config: configUtils.Config,
logger: Logger): Promise<QueriesStatusReport> {
const codeql = getCodeQL(config.codeQLCmd);
const statusReport: QueriesStatusReport = {};
for (let language of config.languages) {
logger.startGroup('Analyzing ' + language);
const queries = config.queries[language] || [];
if (queries.length === 0) {
const queries = config.queries[language];
if (queries.builtin.length === 0 && queries.custom.length === 0) {
throw new Error('Unable to analyse ' + language + ' as no queries were selected for this language');
}
try {
const databasePath = util.getCodeQLDatabasePath(config.tempDir, language);
// Pass the queries to codeql using a file instead of using the command
// line to avoid command line length restrictions, particularly on windows.
const querySuite = databasePath + '-queries.qls';
const querySuiteContents = queries.map(q => '- query: ' + q).join('\n');
fs.writeFileSync(querySuite, querySuiteContents);
logger.debug('Query suite file for ' + language + '...\n' + querySuiteContents);
for (const type of ['builtin', 'custom']) {
if (queries[type].length > 0) {
const startTime = new Date().getTime();
const sarifFile = path.join(sarifFolder, language + '.sarif');
const databasePath = util.getCodeQLDatabasePath(config.tempDir, language);
// Pass the queries to codeql using a file instead of using the command
// line to avoid command line length restrictions, particularly on windows.
const querySuitePath = `${databasePath}-queries-${type}.qls`;
const querySuiteContents = queries[type].map((q: string) => '- query: ' + q).join('\n');
fs.writeFileSync(querySuitePath, querySuiteContents);
logger.debug('Query suite file for ' + language + '...\n' + querySuiteContents);
await codeql.databaseAnalyze(databasePath, sarifFile, querySuite, memoryFlag, threadsFlag);
const sarifFile = path.join(sarifFolder, `${language}-${type}.sarif`);
logger.debug('SARIF results for database ' + language + ' created at "' + sarifFile + '"');
logger.endGroup();
const codeql = getCodeQL(config.codeQLCmd);
await codeql.databaseAnalyze(databasePath, sarifFile, querySuitePath, memoryFlag, threadsFlag);
logger.debug('SARIF results for database ' + language + ' created at "' + sarifFile + '"');
logger.endGroup();
// Record the performance
const endTime = new Date().getTime();
statusReport[`analyze_${type}_queries_${language}_duration_ms`] = endTime - startTime;
}
}
} catch (e) {
// For now the fields about query performance are not populated
return {
analyze_failure_language: language,
};
logger.error(`Error running analysis for ${language}: ${e}`);
logger.info(e);
statusReport.analyze_failure_language = language;
return statusReport;
}
}
return {};
return statusReport;
}
export async function runAnalyze(

View file

@ -239,7 +239,10 @@ test("load non-empty input", async t => {
// And the config we expect it to parse to
const expectedConfig: configUtils.Config = {
languages: [Language.javascript],
queries: {'javascript': ['/foo/a.ql', '/bar/b.ql']},
queries: {'javascript': {
builtin: [],
custom: ['/foo/a.ql', '/bar/b.ql']
}},
pathsIgnore: ['a', 'b'],
paths: ['c/d'],
originalUserInput: {
@ -390,9 +393,10 @@ test("Queries can be specified in config file", async t => {
t.regex(resolveQueriesArgs[1].queries[0], /.*\/foo$/);
// Now check that the end result contains the default queries and the query from config
t.deepEqual(config.queries['javascript'].length, 2);
t.regex(config.queries['javascript'][0], /javascript-code-scanning.qls$/);
t.regex(config.queries['javascript'][1], /.*\/foo$/);
t.deepEqual(config.queries['javascript'].builtin.length, 1);
t.deepEqual(config.queries['javascript'].custom.length, 1);
t.regex(config.queries['javascript'].builtin[0], /javascript-code-scanning.qls$/);
t.regex(config.queries['javascript'].custom[0], /.*\/foo$/);
});
});
@ -442,9 +446,10 @@ test("Queries from config file can be overridden in workflow file", async t => {
t.regex(resolveQueriesArgs[1].queries[0], /.*\/override$/);
// Now check that the end result contains only the default queries and the override query
t.deepEqual(config.queries['javascript'].length, 2);
t.regex(config.queries['javascript'][0], /javascript-code-scanning.qls$/);
t.regex(config.queries['javascript'][1], /.*\/override$/);
t.deepEqual(config.queries['javascript'].builtin.length, 1);
t.deepEqual(config.queries['javascript'].custom.length, 1);
t.regex(config.queries['javascript'].builtin[0], /javascript-code-scanning.qls$/);
t.regex(config.queries['javascript'].custom[0], /.*\/override$/);
});
});
@ -492,8 +497,9 @@ test("Queries in workflow file can be used in tandem with the 'disable default q
t.regex(resolveQueriesArgs[0].queries[0], /.*\/workflow-query$/);
// Now check that the end result contains only the workflow query, and not the default one
t.deepEqual(config.queries['javascript'].length, 1);
t.regex(config.queries['javascript'][0], /.*\/workflow-query$/);
t.deepEqual(config.queries['javascript'].builtin.length, 0);
t.deepEqual(config.queries['javascript'].custom.length, 1);
t.regex(config.queries['javascript'].custom[0], /.*\/workflow-query$/);
});
});
@ -537,10 +543,11 @@ test("Multiple queries can be specified in workflow file, no config file require
t.regex(resolveQueriesArgs[2].queries[0], /.*\/override2$/);
// Now check that the end result contains both the queries from the workflow, as well as the defaults
t.deepEqual(config.queries['javascript'].length, 3);
t.regex(config.queries['javascript'][0], /javascript-code-scanning.qls$/);
t.regex(config.queries['javascript'][1], /.*\/override1$/);
t.regex(config.queries['javascript'][2], /.*\/override2$/);
t.deepEqual(config.queries['javascript'].builtin.length, 1);
t.deepEqual(config.queries['javascript'].custom.length, 2);
t.regex(config.queries['javascript'].builtin[0], /javascript-code-scanning.qls$/);
t.regex(config.queries['javascript'].custom[0], /.*\/override1$/);
t.regex(config.queries['javascript'].custom[1], /.*\/override2$/);
});
});
@ -598,11 +605,12 @@ test("Queries in workflow file can be added to the set of queries without overri
t.regex(resolveQueriesArgs[3].queries[0], /.*\/foo$/);
// Now check that the end result contains all the queries
t.deepEqual(config.queries['javascript'].length, 4);
t.regex(config.queries['javascript'][0], /javascript-code-scanning.qls$/);
t.regex(config.queries['javascript'][1], /.*\/additional1$/);
t.regex(config.queries['javascript'][2], /.*\/additional2$/);
t.regex(config.queries['javascript'][3], /.*\/foo$/);
t.deepEqual(config.queries['javascript'].builtin.length, 1);
t.deepEqual(config.queries['javascript'].custom.length, 3);
t.regex(config.queries['javascript'].builtin[0], /javascript-code-scanning.qls$/);
t.regex(config.queries['javascript'].custom[0], /.*\/additional1$/);
t.regex(config.queries['javascript'].custom[1], /.*\/additional2$/);
t.regex(config.queries['javascript'].custom[2], /.*\/foo$/);
});
});

View file

@ -31,6 +31,22 @@ export interface UserConfig {
paths?: string[];
}
/**
* Lists of query files for each language.
* Will only contain .ql files and not other kinds of files,
* and all file paths will be absolute.
*
* The queries are split between ones from a builtin suite
* and custom queries from unknown locations. This allows us to treat
* them separately if we want to, for example to measure performance.
*/
type Queries = { [language: string]: {
/** Queries from one of the builtin suites */
builtin: string[];
/** Custom queries, from a non-standard location */
custom: string[];
}};
/**
* Format of the parsed config file.
*/
@ -41,10 +57,8 @@ export interface Config {
languages: Language[];
/**
* Map from language to query files.
* Will only contain .ql files and not other kinds of files,
* and all file paths will be absolute.
*/
queries: { [language: string]: string[] };
queries: Queries;
/**
* List of paths to ignore from analysis.
*/
@ -122,34 +136,46 @@ function validateQueries(resolvedQueries: ResolveQueriesOutput) {
/**
* Run 'codeql resolve queries' and add the results to resultMap
*
* If a checkout path is given then the queries are assumed to be custom queries
* and an error will be thrown if there is anything invalid about the queries.
* If a checkout path is not given then the queries are assumed to be builtin
* queries, and error checking will be suppressed.
*/
async function runResolveQueries(
codeQL: CodeQL,
resultMap: { [language: string]: string[] },
resultMap: Queries,
toResolve: string[],
extraSearchPath: string | undefined,
errorOnInvalidQueries: boolean) {
extraSearchPath: string | undefined) {
const resolvedQueries = await codeQL.resolveQueries(toResolve, extraSearchPath);
for (const [language, queries] of Object.entries(resolvedQueries.byLanguage)) {
if (resultMap[language] === undefined) {
resultMap[language] = [];
}
resultMap[language].push(...Object.keys(queries).filter(q => !queryIsDisabled(language, q)));
if (extraSearchPath !== undefined) {
validateQueries(resolvedQueries);
}
if (errorOnInvalidQueries) {
validateQueries(resolvedQueries);
for (const [language, queryPaths] of Object.entries(resolvedQueries.byLanguage)) {
if (resultMap[language] === undefined) {
resultMap[language] = {
builtin: [],
custom: [],
};
}
const queries = Object.keys(queryPaths).filter(q => !queryIsDisabled(language, q));
if (extraSearchPath !== undefined) {
resultMap[language].custom.push(...queries);
} else {
resultMap[language].builtin.push(...queries);
}
}
}
/**
* Get the set of queries included by default.
*/
async function addDefaultQueries(codeQL: CodeQL, languages: string[], resultMap: { [language: string]: string[] }) {
async function addDefaultQueries(codeQL: CodeQL, languages: string[], resultMap: Queries) {
const suites = languages.map(l => l + '-code-scanning.qls');
await runResolveQueries(codeQL, resultMap, suites, undefined, false);
await runResolveQueries(codeQL, resultMap, suites, undefined);
}
// The set of acceptable values for built-in suites from the codeql bundle
@ -162,7 +188,7 @@ const builtinSuites = ['security-extended', 'security-and-quality'] as const;
async function addBuiltinSuiteQueries(
languages: string[],
codeQL: CodeQL,
resultMap: { [language: string]: string[] },
resultMap: Queries,
suiteName: string,
configFile?: string) {
@ -172,7 +198,7 @@ async function addBuiltinSuiteQueries(
}
const suites = languages.map(l => l + '-' + suiteName + '.qls');
await runResolveQueries(codeQL, resultMap, suites, undefined, false);
await runResolveQueries(codeQL, resultMap, suites, undefined);
}
/**
@ -180,7 +206,7 @@ async function addBuiltinSuiteQueries(
*/
async function addLocalQueries(
codeQL: CodeQL,
resultMap: { [language: string]: string[] },
resultMap: Queries,
localQueryPath: string,
checkoutPath: string,
configFile?: string) {
@ -202,7 +228,7 @@ async function addLocalQueries(
throw new Error(getLocalPathOutsideOfRepository(configFile, localQueryPath));
}
await runResolveQueries(codeQL, resultMap, [absoluteQueryPath], checkoutPath, true);
await runResolveQueries(codeQL, resultMap, [absoluteQueryPath], checkoutPath);
}
/**
@ -210,7 +236,7 @@ async function addLocalQueries(
*/
async function addRemoteQueries(
codeQL: CodeQL,
resultMap: { [language: string]: string[] },
resultMap: Queries,
queryUses: string,
tempDir: string,
githubUrl: string,
@ -249,7 +275,7 @@ async function addRemoteQueries(
? path.join(checkoutPath, tok.slice(2).join('/'))
: checkoutPath;
await runResolveQueries(codeQL, resultMap, [queryPath], checkoutPath, true);
await runResolveQueries(codeQL, resultMap, [queryPath], checkoutPath);
}
/**
@ -263,7 +289,7 @@ async function addRemoteQueries(
async function parseQueryUses(
languages: string[],
codeQL: CodeQL,
resultMap: { [language: string]: string[] },
resultMap: Queries,
queryUses: string,
tempDir: string,
checkoutPath: string,
@ -543,7 +569,7 @@ async function addQueriesFromWorkflow(
codeQL: CodeQL,
queriesInput: string,
languages: string[],
resultMap: { [language: string]: string[] },
resultMap: Queries,
tempDir: string,
checkoutPath: string,
githubUrl: string,
@ -599,7 +625,7 @@ export async function getDefaultConfig(
githubAuth,
githubUrl,
logger);
const queries = {};
const queries: Queries = {};
await addDefaultQueries(codeQL, languages, queries);
if (queriesInput) {
await addQueriesFromWorkflow(
@ -672,7 +698,7 @@ async function loadConfig(
githubUrl,
logger);
const queries = {};
const queries: Queries = {};
const pathsIgnore: string[] = [];
const paths: string[] = [];
@ -750,7 +776,8 @@ async function loadConfig(
// The list of queries should not be empty for any language. If it is then
// it is a user configuration error.
for (const language of languages) {
if (queries[language] === undefined || queries[language].length === 0) {
if (queries[language] === undefined ||
(queries[language].builtin.length === 0 && queries[language].custom.length === 0)) {
throw new Error(`Did not detect any queries to run for ${language}. ` +
"Please make sure that the default queries are enabled, or you are specifying queries to run.");
}