Use new .NET regular expression source generators

This commit is contained in:
Michael Bucari-Tovo 2023-02-10 18:20:27 -07:00
parent 5b4a4341ad
commit ee8c0ae27b
5 changed files with 107 additions and 92 deletions

View file

@ -20,7 +20,7 @@ namespace DataLayer
PartialDownload = 0x1000
}
public class UserDefinedItem
public partial class UserDefinedItem
{
internal int BookId { get; private set; }
public Book Book { get; private set; }
@ -51,18 +51,23 @@ namespace DataLayer
public IEnumerable<string> TagsEnumerated => Tags == "" ? new string[0] : Tags.Split(null as char[], StringSplitOptions.RemoveEmptyEntries);
#region sanitize tags: space delimited. Inline/denormalized. Lower case. Alpha numeric and hyphen
// only legal chars are letters numbers underscores and separating whitespace
//
// technically, the only char.s which aren't easily supported are \ [ ]
// however, whitelisting is far safer than blacklisting (eg: new lines, non-printable character)
// it's easy to expand whitelist as needed
// for lucene, ToLower() isn't needed because search is case-inspecific. for here, it prevents duplicates
//
// there are also other allowed but misleading characters. eg: the ^ operator defines a 'boost' score
// full list of characters which must be escaped:
// + - && || ! ( ) { } [ ] ^ " ~ * ? : \
static Regex regex { get; } = new Regex(@"[^\w\d\s_]", RegexOptions.Compiled);
private static string sanitize(string input)
/// <summary>
/// only legal chars are letters numbers underscores and separating whitespace
///
/// technically, the only char.s which aren't easily supported are \ [ ]
/// however, whitelisting is far safer than blacklisting (eg: new lines, non-printable character)
/// it's easy to expand whitelist as needed
/// for lucene, ToLower() isn't needed because search is case-inspecific. for here, it prevents duplicates
///
/// there are also other allowed but misleading characters. eg: the ^ operator defines a 'boost' score
/// full list of characters which must be escaped:
/// + - && || ! ( ) { } [ ] ^ " ~ * ? : \
/// </summary>
[GeneratedRegex(@"[^\w\d\s_]")]
private static partial Regex IllegalCharacterRegex();
private static string sanitize(string input)
{
if (string.IsNullOrWhiteSpace(input))
return "";
@ -73,9 +78,9 @@ namespace DataLayer
// assume a hyphen is supposed to be an underscore
.Replace("-", "_");
var unique = regex
// turn illegal characters into a space. this will also take care of turning new lines into spaces
.Replace(str, " ")
var unique = IllegalCharacterRegex()
// turn illegal characters into a space. this will also take care of turning new lines into spaces
.Replace(str, " ")
// split and remove excess spaces
.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)
// de-dup