Use new .NET regular expression source generators
This commit is contained in:
parent
5b4a4341ad
commit
ee8c0ae27b
5 changed files with 107 additions and 92 deletions
|
|
@ -20,7 +20,7 @@ namespace DataLayer
|
|||
PartialDownload = 0x1000
|
||||
}
|
||||
|
||||
public class UserDefinedItem
|
||||
public partial class UserDefinedItem
|
||||
{
|
||||
internal int BookId { get; private set; }
|
||||
public Book Book { get; private set; }
|
||||
|
|
@ -51,18 +51,23 @@ namespace DataLayer
|
|||
public IEnumerable<string> TagsEnumerated => Tags == "" ? new string[0] : Tags.Split(null as char[], StringSplitOptions.RemoveEmptyEntries);
|
||||
|
||||
#region sanitize tags: space delimited. Inline/denormalized. Lower case. Alpha numeric and hyphen
|
||||
// only legal chars are letters numbers underscores and separating whitespace
|
||||
//
|
||||
// technically, the only char.s which aren't easily supported are \ [ ]
|
||||
// however, whitelisting is far safer than blacklisting (eg: new lines, non-printable character)
|
||||
// it's easy to expand whitelist as needed
|
||||
// for lucene, ToLower() isn't needed because search is case-inspecific. for here, it prevents duplicates
|
||||
//
|
||||
// there are also other allowed but misleading characters. eg: the ^ operator defines a 'boost' score
|
||||
// full list of characters which must be escaped:
|
||||
// + - && || ! ( ) { } [ ] ^ " ~ * ? : \
|
||||
static Regex regex { get; } = new Regex(@"[^\w\d\s_]", RegexOptions.Compiled);
|
||||
private static string sanitize(string input)
|
||||
|
||||
/// <summary>
|
||||
/// only legal chars are letters numbers underscores and separating whitespace
|
||||
///
|
||||
/// technically, the only char.s which aren't easily supported are \ [ ]
|
||||
/// however, whitelisting is far safer than blacklisting (eg: new lines, non-printable character)
|
||||
/// it's easy to expand whitelist as needed
|
||||
/// for lucene, ToLower() isn't needed because search is case-inspecific. for here, it prevents duplicates
|
||||
///
|
||||
/// there are also other allowed but misleading characters. eg: the ^ operator defines a 'boost' score
|
||||
/// full list of characters which must be escaped:
|
||||
/// + - && || ! ( ) { } [ ] ^ " ~ * ? : \
|
||||
/// </summary>
|
||||
|
||||
[GeneratedRegex(@"[^\w\d\s_]")]
|
||||
private static partial Regex IllegalCharacterRegex();
|
||||
private static string sanitize(string input)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(input))
|
||||
return "";
|
||||
|
|
@ -73,9 +78,9 @@ namespace DataLayer
|
|||
// assume a hyphen is supposed to be an underscore
|
||||
.Replace("-", "_");
|
||||
|
||||
var unique = regex
|
||||
// turn illegal characters into a space. this will also take care of turning new lines into spaces
|
||||
.Replace(str, " ")
|
||||
var unique = IllegalCharacterRegex()
|
||||
// turn illegal characters into a space. this will also take care of turning new lines into spaces
|
||||
.Replace(str, " ")
|
||||
// split and remove excess spaces
|
||||
.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)
|
||||
// de-dup
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue