Upgrade linguist dependency

This version changes how it counts python heredoc. All heredoc is
counted as code.
This commit is contained in:
Andrew Eisenberg 2021-08-25 10:45:44 -07:00
parent a44b61d961
commit b29bf7b05a
32 changed files with 410 additions and 339 deletions

View file

@ -1,16 +1,10 @@
import languageMap from 'language-map/languages.json';
import { ExtensionJustify } from './utils';
// tslint:disable-next-line
const languageMap = require('language-map');
// tslint:disable-next-line
// const lang = require('language-classifier');
interface ExtensionsTypes {
[key: string]: string;
}
export interface DetectorOptions {}
/**
* The extension map can contain multiple languages with the same extension,
* but we only want a single one. For the moment, these clashes are resolved
@ -19,8 +13,116 @@ export interface DetectorOptions {}
* where the extension is ambiguous. The ordering of the list matters and
* languages earlier on will get a higher priority when resolving clashes.
*/
const importantLanguages = ["javascript", "typescript", "ruby", "python", "java", "c", "c++", "c#", "rust", "scala", "perl", "go"];
const importantLanguages = [
'javascript',
'typescript',
'ruby',
'python',
'java',
'c',
'c++',
'c#',
'rust',
'scala',
'perl',
'go',
];
export interface Regexes {
singleLineComment: RegExp;
multiLineCommentOpen?: RegExp;
multiLineCommentOpenStart?: RegExp;
multiLineCommentClose?: RegExp;
multiLineCommentCloseEnd?: RegExp;
multiLineCommentOpenAndClose?: RegExp;
}
const ALL_REGEXES: Record<string, Regexes> = {
c: {
// matches when // are the first two characters of a line
singleLineComment: /^\/\//,
// matches when /* exists in a line
multiLineCommentOpen: /\/\*/,
// matches when /* starts a line
multiLineCommentOpenStart: /^\/\*/,
// matches when */ exists a line
multiLineCommentClose: /\*\//,
// matches when */ ends a line
multiLineCommentCloseEnd: /\*\/$/,
// matches /* ... */
multiLineCommentOpenAndClose: /\/\*.*\*\//,
},
python: {
// matches when # the first character of a line
singleLineComment: /^#/,
// matches when """ starts a line. This is not right, since
// a multiline string is not always a comment, but for the
// sake of simplicity, we will do that here.
// multiLineCommentOpen: /"""/,
// matches when """ starts a line
// multiLineCommentOpenStart: /^"""/,
// matches when """ exists in a line
// multiLineCommentClose: /"""/,
// matches when """ ends a line
// multiLineCommentCloseEnd: /"""$/,
// matches """ ... """
// multiLineCommentOpenAndClose: /""".*"""/,
},
ruby: {
// matches when # the first character of a line
singleLineComment: /^#/,
// For ruby multiline comments, =begin and =end must be
// on their own lines
// matches when =begin starts a line
multiLineCommentOpen: /^=begin/,
// matches when "begin starts a line
multiLineCommentOpenStart: /^=begin/,
// matches when "end ends a line
multiLineCommentClose: /^=end/,
// matches when "end ends a line
multiLineCommentCloseEnd: /^=end$/,
// not possible in ruby
multiLineCommentOpenAndClose: /^\0$/,
},
html: {
// There is no single line comment
singleLineComment: /^\0$/,
// matches when =begin starts a line
multiLineCommentOpen: /<!--/,
// matches when "begin starts a line
multiLineCommentOpenStart: /^<!--/,
// matches when "end ends a line
multiLineCommentClose: /-->/,
// matches when "end ends a line
multiLineCommentCloseEnd: /-->$/,
// matches <!-- ... -->
multiLineCommentOpenAndClose: /<!--.*-->/,
},
};
/**
* detecte program language through file extension
@ -51,11 +153,13 @@ export class Languages {
const languageExtensions = (languageMode && languageMode.extensions) || [];
languageExtensions.forEach((extension: string) => {
const lowerCaseExtension = extension.toLowerCase();
const lowerCaseLanguage = language.toLowerCase()
const lowerCaseLanguage = language.toLowerCase();
if (!extensions[lowerCaseExtension]) {
extensions[lowerCaseExtension] = lowerCaseLanguage;
} else {
const currentLanguagePriority = importantLanguages.indexOf(extensions[lowerCaseExtension]);
const currentLanguagePriority = importantLanguages.indexOf(
extensions[lowerCaseExtension],
);
if (currentLanguagePriority === -1) {
extensions[lowerCaseExtension] = lowerCaseLanguage;
} else {
@ -67,8 +171,8 @@ export class Languages {
});
});
return Object.assign({}, extensions, ExtensionJustify);
}
return { ...extensions, ...ExtensionJustify };
};
/**
* Retrieve the regular expressions for a given language.
@ -78,7 +182,7 @@ export class Languages {
* @param language the language to retrieve regexes for
*/
public getRegexes(language: string): Regexes {
switch(language) {
switch (language) {
case 'html':
case 'xml':
return ALL_REGEXES.html;
@ -105,104 +209,8 @@ export class Languages {
/**
* get file type through a path
*/
public getType(path: string): string {
public getType(path: string): string {
const fileExtension = `.${path.split('.').pop()}`;
return this.extensionMap[fileExtension] || '';
}
}
export interface Regexes {
singleLineComment: RegExp;
multiLineCommentOpen: RegExp;
multiLineCommentOpenStart: RegExp;
multiLineCommentClose: RegExp;
multiLineCommentCloseEnd: RegExp;
multiLineCommentOpenAndClose: RegExp;
}
const ALL_REGEXES: Record<string, Regexes> = {
c: {
// matches when // are the first two characters of a line
singleLineComment: /^\/\//,
// matches when /* exists in a line
multiLineCommentOpen: /\/\*/,
// matches when /* starts a line
multiLineCommentOpenStart: /^\/\*/,
// matches when */ exists a line
multiLineCommentClose: /\*\//,
// matches when */ ends a line
multiLineCommentCloseEnd: /\*\/$/,
// matches /* ... */
multiLineCommentOpenAndClose: /\/\*.*\*\//
},
python: {
// matches when # the first character of a line
singleLineComment: /^#/,
// matches when """ starts a line. This is not right, since
// a multiline string is not always a comment, but for the
// sake of simplicity, we will do that here.
multiLineCommentOpen: /"""/,
// matches when """ starts a line
multiLineCommentOpenStart: /^"""/,
// matches when """ exists in a line
multiLineCommentClose: /"""/,
// matches when """ ends a line
multiLineCommentCloseEnd: /"""$/,
// matches """ ... """
multiLineCommentOpenAndClose: /""".*"""/
},
ruby: {
// matches when # the first character of a line
singleLineComment: /^#/,
// For ruby multiline comments, =begin and =end must be
// on their own lines
// matches when =begin starts a line
multiLineCommentOpen: /^=begin/,
// matches when "begin starts a line
multiLineCommentOpenStart: /^=begin/,
// matches when "end ends a line
multiLineCommentClose: /^=end/,
// matches when "end ends a line
multiLineCommentCloseEnd: /^=end$/,
// not possible in ruby
multiLineCommentOpenAndClose: /^\0$/
},
html: {
// There is no single line comment
singleLineComment: /^\0$/,
// matches when =begin starts a line
multiLineCommentOpen: /<!--/,
// matches when "begin starts a line
multiLineCommentOpenStart: /^<!--/,
// matches when "end ends a line
multiLineCommentClose: /-->/,
// matches when "end ends a line
multiLineCommentCloseEnd: /-->$/,
// matches <!-- ... -->
multiLineCommentOpenAndClose: /<!--.*-->/
}
};