Make URL parsing more robust

This commit is contained in:
Robert Brignull 2020-09-28 18:28:46 +01:00
parent 2841489ddf
commit c4dc1b0438
9 changed files with 179 additions and 47 deletions

View file

@ -165,3 +165,46 @@ export function getCodeQLDatabasesDir(tempDir: string) {
export function getCodeQLDatabasePath(tempDir: string, language: Language) {
return path.resolve(getCodeQLDatabasesDir(tempDir), language);
}
/**
* Parses user input of a github.com or GHES URL to a canonical form.
* Removes any API prefix or suffix if one is present.
*/
export function parseGithubUrl(inputUrl: string): string {
const originalUrl = inputUrl;
if (inputUrl.indexOf("://") === -1) {
inputUrl = `https://${inputUrl}`;
}
if (!inputUrl.startsWith("http://") && !inputUrl.startsWith("https://")) {
throw new Error(`"${originalUrl}" is not a http or https URL`);
}
let url: URL;
try {
url = new URL(inputUrl);
} catch (e) {
throw new Error(`"${originalUrl}" is not a valid URL`);
}
// If we detect this is trying to be to github.com
// then return with a fixed canonical URL.
if (url.hostname === "github.com" || url.hostname === "api.github.com") {
return "https://github.com/";
}
// Remove the API prefix if it's present
if (url.pathname.indexOf("/api/v3") !== -1) {
url.pathname = url.pathname.substring(0, url.pathname.indexOf("/api/v3"));
}
// Also consider subdomain isolation on GHES
if (url.hostname.startsWith("api.")) {
url.hostname = url.hostname.substring(4);
}
// Normalise path to having a trailing slash for consistency
if (!url.pathname.endsWith("/")) {
url.pathname = `${url.pathname}/`;
}
return url.toString();
}