Add the github-linguist package

This commit only adds a single package and all of its transitive
dependencies. The github-linguist package will be used for counting
lines of code as a baseline for databases we are analyzing.
This commit is contained in:
Andrew Eisenberg 2021-04-22 15:33:01 -07:00
parent 896b4ff181
commit c4a84a93d4
152 changed files with 17057 additions and 24 deletions

69
node_modules/github-linguist/src/cli.ts generated vendored Normal file
View file

@ -0,0 +1,69 @@
#!/usr/bin/env node
import chalk from 'chalk';
import program from 'commander';
// @ts-ignore
import slash from 'slash2';
import path from 'path';
import { LocDir } from './directory';
import { LocFile, LineInfo } from './file';
import { getVersion } from './utils';
program
.version(getVersion(), '-v')
.command('file <path>')
.description('count lines of code in a file')
.action(async (pathPattern) => {
try {
const info = await (new LocFile(pathPattern).getFileInfo());
// eslint-disable-next-line no-console
console.log(
chalk.cyan(`
path: \t\t${pathPattern}
language: \t${info.languages}
total lines: \t${String(info.lines.total)}
code lines: \t${String(info.lines.code)}
comment lines: \t${String(info.lines.comment)}
`),
);
} catch (e) {
console.error(e);
console.error(e.stacl);
}
});
const formatInfo = (
info: LineInfo,
languages: {
[key: string]: LineInfo & {
sum: number;
};
},
) => `
\ttotal lines: \t${String(info.total)}
\tcode lines: \t${String(info.code)}
\tcomment lines: \t${String(info.comment)}
\t--------------------${Object.keys(languages)
.map((key) => {
const languageInfo = languages[key];
return `\n\t${key.padEnd(10)} \t file count:${String(languageInfo.sum)} \ttotal:${String(
languageInfo.total,
)} \tcomment:${String(languageInfo.comment)} \tcode:${String(languageInfo.code)}`;
})
.join('')}`;
program.arguments('<cmd> [env]').action(async (cmd) => {
try {
const { info, languages } = await (new LocDir({
include: cmd
}).loadInfo());
// eslint-disable-next-line no-console
console.log(chalk.cyan(formatInfo(info, languages)));
} catch (e) {
console.error(e);
console.error(e.stacl);
}
});
program.parse(process.argv);

127
node_modules/github-linguist/src/directory.ts generated vendored Normal file
View file

@ -0,0 +1,127 @@
import globby from 'globby';
import fs from 'fs-extra';
import path from 'path';
// @ts-ignore
import slash from 'slash2';
import { LineInfo, LocFile } from './file';
const defaultInfo: LineInfo = {
total: 0,
code: 0,
comment: 0,
};
export interface LocDirOptions {
cwd?: string;
include?: string[] | string;
exclude?: string[] | string;
}
export interface LocResult {
files: string[];
info: LineInfo;
languages: {
[key: string]: LineInfo & {
sum: number;
};
};
}
const defaultExclude = [
// javascript
'**/*.map',
'**/yarn**',
'**/.github',
'**/node_modules/**',
'**/dist/**',
'**/*.snap',
// java
'**/target'
];
/**
* Collect the info of a directory.
*/
export class LocDir {
private cwd: string;
private include: string[];
private exclude: string[];
constructor(options: LocDirOptions) {
// ensure all excludes are globstar. Note that '**/*.ts/**' matches files
// that end in .ts because the globstar indicates 0 or more directory paths.
this.exclude = ensureArray(options.exclude)
.concat(defaultExclude)
.map(item => item.endsWith('**') ? item : `${item}/**`);
// remove all leading './' since this messes up globstar matches in the
// excludes.
this.include = ensureArray(options.include, '**')
.map(item => item.startsWith('./') ? item.substring(2) : item)
.map(item => item.endsWith('**') ? item : `${item}/**`);
this.cwd = options.cwd || process.cwd();
}
/**
* Calculate directory info.
*/
async loadInfo(): Promise<LocResult> {
const paths = await globby(this.include, {
cwd: this.cwd,
ignore: this.exclude,
nodir: true
});
const files: string[] = [];
const info: LineInfo = { ...defaultInfo };
let languages: {
[key: string]: LineInfo & {
sum: number;
};
} = {};
await Promise.all(paths.map(async (pathItem) => {
const fullPath = slash(path.join(this.cwd, pathItem));
if (
!pathItem ||
!(await fs.pathExists(fullPath)) ||
(await fs.stat(fullPath)).isDirectory()
) {
return;
}
const file = new LocFile(fullPath);
const fileLineInfo = await file.getFileInfo();
const { lines } = fileLineInfo;
info.total += lines.total;
info.code += lines.code;
info.comment += lines.comment;
const language = { ...languages[fileLineInfo.languages] };
language.code = lines.code + (language.code || 0);
language.sum = (language.sum || 0) + 1;
language.comment = lines.comment + (language.comment || 0);
language.total = lines.total + (language.total || 0);
languages = {
...languages,
[fileLineInfo.languages]: language,
};
files.push(fullPath);
}));
return {
files,
info,
languages,
};
}
}
function ensureArray(arr?: string[] | string, dfault?: string) {
if (!arr) {
return dfault ? [dfault] : [];
}
return Array.isArray(arr)
? arr
: [arr];
}

179
node_modules/github-linguist/src/file.ts generated vendored Normal file
View file

@ -0,0 +1,179 @@
/**
* detect file info
*/
import * as fs from 'fs-extra';
import * as Path from 'path';
// @ts-ignore
import slash from 'slash2';
import { Languages, Regexes } from './languages';
export interface LineInfo {
total: number;
code: number;
comment: number;
}
export interface FileInfo {
name: string;
languages: string;
size: number;
lines: LineInfo;
}
const DefaultLine: LineInfo = {
total: 0,
code: 0,
comment: 0,
};
const DefaultFileInfo: FileInfo = {
name: '',
languages: '',
size: 0,
lines: DefaultLine,
};
/**
* Collect language info for a single file
*/
export class LocFile {
public path: string;
private rawPath: string;
private language = new Languages();
/**
* Creates an instance of LocFile.
*/
constructor(rawPath: string, private debug = false) {
this.path = slash(rawPath);
this.rawPath = rawPath;
}
/**
* get file type through a path
*/
private getType(path: string): string {
const fileExtension = `.${path.split('.').pop()}`;
return this.language.extensionMap[fileExtension] || '';
}
private filterData = (data: string, regexes: Regexes): LineInfo => {
const lines = data.split(/\n/);
let commentLength = 0;
let codeLength = lines.length;
const total = codeLength;
let inMultiLineComment = false;
lines.forEach((line) => {
let lineType = 'code';
line = line.trim();
if (inMultiLineComment) {
let noCode = true;
if (regexes.multiLineCommentClose.test(line)) {
// line contains the end of a multi-line comment
inMultiLineComment = false;
if (!regexes.multiLineCommentCloseEnd.test(line)) {
// the multiline comment does not end this line.
// there is real code on it.
noCode = false;
}
}
if (noCode) {
lineType = 'comm';
commentLength += 1;
codeLength -= 1;
}
} else if (line) {
// non-empty line
if (regexes.multiLineCommentOpen.test(line)) {
// line contains the start of a multi-line comment
// might contain some real code, but we'll let that slide
if (!regexes.multiLineCommentOpenAndClose.test(line)) {
// comment is not also closed on this line
inMultiLineComment = true;
}
if (regexes.multiLineCommentOpenStart.test(line)) {
// The comment starts the line. There is no other code on this line
commentLength += 1;
codeLength -= 1;
lineType = 'comm';
}
} else if (regexes.singleLineComment.test(line)) {
// line contains only a single line comment
commentLength += 1;
codeLength -= 1;
lineType = 'comm';
}
} else {
// empty line
codeLength -= 1;
lineType = 'empt';
}
if (this.debug) {
console.log(lineType, line)
}
});
return {
...DefaultLine,
total,
code: codeLength,
comment: commentLength,
};
};
/**
* Get file info when LocFile init
*/
public async getFileInfo(data?: string): Promise<FileInfo> {
if (!(await fs.pathExists(this.rawPath))) {
throw new Error(`Error: file ${this.rawPath} does not exist.`);
}
let newData = data;
const info: FileInfo = Object.assign({}, DefaultFileInfo);
const name = this.path.split(Path.sep).pop() || '';
try {
const stat = await fs.stat(this.path);
if (!stat.isFile()) {
return info;
}
newData = data || await fs.readFile(this.path, 'utf-8');
info.name = name;
info.size = (stat && stat.size) || 0;
info.languages = this.getType(this.path);
if (!info.languages) {
return info;
}
if (newData) {
const regexes = this.language.getRegexes(info.languages);
info.lines = this.filterData(newData, regexes);
}
} catch (err) {
throw new Error('read file failed.');
}
return info;
}
public getFileInfoByContent(name: string, data: string): FileInfo {
const info: FileInfo = Object.assign({}, DefaultFileInfo);
info.name = name;
info.languages = this.getType(name);
info.lines = this.filterData(data, this.language.getRegexes(info.languages));
return info;
}
}

29
node_modules/github-linguist/src/index.ts generated vendored Normal file
View file

@ -0,0 +1,29 @@
// @ts-ignore
import slash from 'slash2';
import fs from 'fs-extra';
import { LocDir, LocResult } from './directory';
import { LocFile } from './file';
export { LocDir, LocDirOptions } from './directory';
export { LocFile, LineInfo } from './file';
const loc = async (
fileOrDir: string,
): Promise<LocResult> => {
const stat = await fs.stat(slash(fileOrDir));
if (stat.isFile()) {
const locFile = new LocFile(slash(fileOrDir));
const info = await locFile.getFileInfo();
const filePath = locFile.path;
return {
info: info.lines,
files: [filePath],
languages: { [info.languages]: { ...info.lines, sum: 1 } },
};
}
const locDir = new LocDir({ cwd: slash(fileOrDir) });
return locDir.loadInfo();
};
export default loc;

4
node_modules/github-linguist/src/justify.json generated vendored Normal file
View file

@ -0,0 +1,4 @@
{
".ts": "typescript",
".tsx": "tsx"
}

176
node_modules/github-linguist/src/languages.ts generated vendored Normal file
View file

@ -0,0 +1,176 @@
import { ExtensionJustify } from './utils';
// tslint:disable-next-line
const languageMap = require('language-map');
// tslint:disable-next-line
// const lang = require('language-classifier');
interface ExtensionsTypes {
[key: string]: string;
}
export interface DetectorOptions {}
/**
* detecte program language through file extension
*
* @export
* @class LanguageDetector
*/
export class Languages {
extensionMap: {
[key: string]: string;
} = {};
/**
* Creates an instance of Detector.
*/
constructor() {
this.extensionMap = this.loadExtensionMap();
}
/**
* load language before detecting
*/
private loadExtensionMap = () => {
const extensions: ExtensionsTypes = {};
Object.keys(languageMap).forEach((language) => {
const languageMode = languageMap[language];
const languageExtensions = (languageMode && languageMode.extensions) || [];
languageExtensions.forEach((extension: string) => {
extensions[extension.toLowerCase()] = language.toLowerCase();
});
});
return Object.assign({}, extensions, ExtensionJustify);
}
/**
* Retrieve the regular expressions for a given language.
* This is incomplete, but covers most of the languages we
* see in the wild.
*
* @param language the language to retrieve regexes for
*/
public getRegexes(language: string): Regexes {
switch(language) {
case 'html':
case 'xml':
return ALL_REGEXES.html;
case 'ruby':
return ALL_REGEXES.ruby;
case 'python':
return ALL_REGEXES.python;
default:
// not exact, but likely the best guess for any other unspecified language.
return ALL_REGEXES.c;
}
}
/**
* return extension map
*/
public getExtensionMap() {
return this.extensionMap;
}
}
export interface Regexes {
singleLineComment: RegExp;
multiLineCommentOpen: RegExp;
multiLineCommentOpenStart: RegExp;
multiLineCommentClose: RegExp;
multiLineCommentCloseEnd: RegExp;
multiLineCommentOpenAndClose: RegExp;
}
const ALL_REGEXES: Record<string, Regexes> = {
c: {
// matches when // are the first two characters of a line
singleLineComment: /^\/\//,
// matches when /* exists in a line
multiLineCommentOpen: /\/\*/,
// matches when /* starts a line
multiLineCommentOpenStart: /^\/\*/,
// matches when */ exists a line
multiLineCommentClose: /\*\//,
// matches when */ ends a line
multiLineCommentCloseEnd: /\*\/$/,
// matches /* ... */
multiLineCommentOpenAndClose: /\/\*.*\*\//
},
python: {
// matches when # the first character of a line
singleLineComment: /^#/,
// matches when """ starts a line. This is not right, since
// a multiline string is not always a comment, but for the
// sake of simplicity, we will do that here.
multiLineCommentOpen: /"""/,
// matches when """ starts a line
multiLineCommentOpenStart: /^"""/,
// matches when """ exists in a line
multiLineCommentClose: /"""/,
// matches when """ ends a line
multiLineCommentCloseEnd: /"""$/,
// matches """ ... """
multiLineCommentOpenAndClose: /""".*"""/
},
ruby: {
// matches when # the first character of a line
singleLineComment: /^#/,
// For ruby multiline comments, =begin and =end must be
// on their own lines
// matches when =begin starts a line
multiLineCommentOpen: /^=begin/,
// matches when "begin starts a line
multiLineCommentOpenStart: /^=begin/,
// matches when "end ends a line
multiLineCommentClose: /^=end/,
// matches when "end ends a line
multiLineCommentCloseEnd: /^=end$/,
// not possible in ruby
multiLineCommentOpenAndClose: /^\0$/
},
html: {
// There is no single line comment
singleLineComment: /^\0$/,
// matches when =begin starts a line
multiLineCommentOpen: /<!--/,
// matches when "begin starts a line
multiLineCommentOpenStart: /^<!--/,
// matches when "end ends a line
multiLineCommentClose: /-->/,
// matches when "end ends a line
multiLineCommentCloseEnd: /-->$/,
// matches <!-- ... -->
multiLineCommentOpenAndClose: /<!--.*-->/
}
};

25
node_modules/github-linguist/src/utils.ts generated vendored Normal file
View file

@ -0,0 +1,25 @@
import fs from 'fs';
import path from 'path';
// @ts-ignore
import slash from 'slash2';
const packagePath = slash(path.join(__dirname, '../', 'package.json'));
/**
* Get package version.
*
* @export getVersion
* @returns {string}
*/
export function getVersion(): string {
const packageInfo = JSON.parse(fs.readFileSync(packagePath, 'utf8'));
return (packageInfo && packageInfo.version) || 'invalid version!';
}
export const ExtensionJustify = {
'.ts': 'typescript',
'.jsx': 'javascript',
'.js': 'javascript',
'.tsx': 'typescript',
};