Refactor LibationSearchEngine
This commit is contained in:
parent
4ab6da132b
commit
b4aa220051
9 changed files with 235 additions and 266 deletions
|
|
@ -1,13 +1,10 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Collections.ObjectModel;
|
||||
using System.Linq;
|
||||
using System.Text.RegularExpressions;
|
||||
using DataLayer;
|
||||
using Dinah.Core;
|
||||
using LibationFileManager;
|
||||
using Lucene.Net.Analysis.Standard;
|
||||
using Lucene.Net.Analysis.Tokenattributes;
|
||||
using Lucene.Net.Documents;
|
||||
using Lucene.Net.Index;
|
||||
using Lucene.Net.Search;
|
||||
|
|
@ -25,164 +22,47 @@ namespace LibationSearchEngine
|
|||
public const string ALL = "all";
|
||||
|
||||
#region index rules
|
||||
// common fields used in the "all" default search field
|
||||
public const string ALL_AUDIBLE_PRODUCT_ID = nameof(Book.AudibleProductId);
|
||||
public const string ALL_TITLE = nameof(Book.Title);
|
||||
public const string ALL_AUTHOR_NAMES = "AuthorNames";
|
||||
public const string ALL_NARRATOR_NAMES = "NarratorNames";
|
||||
public const string ALL_SERIES_NAMES = "SeriesNames";
|
||||
|
||||
internal static ReadOnlyDictionary<string, Func<LibraryBook, string>> idIndexRules { get; }
|
||||
= new ReadOnlyDictionary<string, Func<LibraryBook, string>>(
|
||||
new Dictionary<string, Func<LibraryBook, string>>
|
||||
{
|
||||
[nameof(Book.AudibleProductId)] = lb => lb.Book.AudibleProductId.ToLowerInvariant(),
|
||||
["ProductId"] = lb => lb.Book.AudibleProductId.ToLowerInvariant(),
|
||||
["Id"] = lb => lb.Book.AudibleProductId.ToLowerInvariant(),
|
||||
["ASIN"] = lb => lb.Book.AudibleProductId.ToLowerInvariant()
|
||||
}
|
||||
);
|
||||
|
||||
internal static ReadOnlyDictionary<string, Func<LibraryBook, string>> stringIndexRules { get; }
|
||||
= new ReadOnlyDictionary<string, Func<LibraryBook, string>>(
|
||||
new Dictionary<string, Func<LibraryBook, string>>
|
||||
{
|
||||
[nameof(Book.Title)] = lb => lb.Book.Title,
|
||||
[ALL_AUTHOR_NAMES] = lb => lb.Book.AuthorNames(),
|
||||
["Author"] = lb => lb.Book.AuthorNames(),
|
||||
["Authors"] = lb => lb.Book.AuthorNames(),
|
||||
[ALL_NARRATOR_NAMES] = lb => lb.Book.NarratorNames(),
|
||||
["Narrator"] = lb => lb.Book.NarratorNames(),
|
||||
["Narrators"] = lb => lb.Book.NarratorNames(),
|
||||
[nameof(Book.Publisher)] = lb => lb.Book.Publisher,
|
||||
|
||||
[ALL_SERIES_NAMES] = lb => lb.Book.SeriesNames(),
|
||||
["Series"] = lb => lb.Book.SeriesNames(),
|
||||
["SeriesId"] = lb => string.Join(", ", lb.Book.SeriesLink.Select(s => s.Series.AudibleSeriesId)),
|
||||
|
||||
["CategoriesNames"] = lb => lb.Book.CategoriesIds() is null ? null : string.Join(", ", lb.Book.CategoriesIds()),
|
||||
[nameof(Book.Category)] = lb => lb.Book.CategoriesIds() is null ? null : string.Join(", ", lb.Book.CategoriesIds()),
|
||||
["Categories"] = lb => lb.Book.CategoriesIds() is null ? null : string.Join(", ", lb.Book.CategoriesIds()),
|
||||
["CategoriesId"] = lb => lb.Book.CategoriesIds() is null ? null : string.Join(", ", lb.Book.CategoriesIds()),
|
||||
["CategoryId"] = lb => lb.Book.CategoriesIds() is null ? null : string.Join(", ", lb.Book.CategoriesIds()),
|
||||
|
||||
[TAGS.FirstCharToUpper()] = lb => lb.Book.UserDefinedItem.Tags,
|
||||
|
||||
["Locale"] = lb => lb.Book.Locale,
|
||||
["Region"] = lb => lb.Book.Locale,
|
||||
["Account"] = lb => lb.Account,
|
||||
["Email"] = lb => lb.Account
|
||||
}
|
||||
);
|
||||
|
||||
internal static ReadOnlyDictionary<string, Func<LibraryBook, string>> numberIndexRules { get; }
|
||||
= new ReadOnlyDictionary<string, Func<LibraryBook, string>>(
|
||||
new Dictionary<string, Func<LibraryBook, string>>
|
||||
{
|
||||
// for now, all numbers are padded to 8 char.s
|
||||
// This will allow a single method to auto-pad numbers. The method will match these as well as date: yyyymmdd
|
||||
[nameof(Book.LengthInMinutes)] = lb => lb.Book.LengthInMinutes.ToLuceneString(),
|
||||
["Length"] = lb => lb.Book.LengthInMinutes.ToLuceneString(),
|
||||
["Minutes"] = lb => lb.Book.LengthInMinutes.ToLuceneString(),
|
||||
["Hours"] = lb => (lb.Book.LengthInMinutes / 60).ToLuceneString(),
|
||||
|
||||
["ProductRating"] = lb => lb.Book.Rating.OverallRating.ToLuceneString(),
|
||||
["Rating"] = lb => lb.Book.Rating.OverallRating.ToLuceneString(),
|
||||
["UserRating"] = lb => userOverallRating(lb.Book),
|
||||
["MyRating"] = lb => userOverallRating(lb.Book),
|
||||
|
||||
[nameof(LibraryBook.DateAdded)] = lb => lb.DateAdded.ToLuceneString(),
|
||||
[nameof(Book.DatePublished)] = lb => lb.Book.DatePublished?.ToLuceneString() ?? "",
|
||||
|
||||
["LastDownload"] = lb => lb.Book.UserDefinedItem.LastDownloaded.ToLuceneString(),
|
||||
["LastDownloaded"] = lb => lb.Book.UserDefinedItem.LastDownloaded.ToLuceneString()
|
||||
}
|
||||
);
|
||||
|
||||
internal static ReadOnlyDictionary<string, Func<LibraryBook, bool>> boolIndexRules { get; }
|
||||
= new ReadOnlyDictionary<string, Func<LibraryBook, bool>>(
|
||||
new Dictionary<string, Func<LibraryBook, bool>>
|
||||
{
|
||||
["HasDownloads"] = lb => lb.Book.HasPdf(),
|
||||
["HasDownload"] = lb => lb.Book.HasPdf(),
|
||||
["Downloads"] = lb => lb.Book.HasPdf(),
|
||||
["Download"] = lb => lb.Book.HasPdf(),
|
||||
["HasPDFs"] = lb => lb.Book.HasPdf(),
|
||||
["HasPDF"] = lb => lb.Book.HasPdf(),
|
||||
["PDFs"] = lb => lb.Book.HasPdf(),
|
||||
["PDF"] = lb => lb.Book.HasPdf(),
|
||||
|
||||
["IsRated"] = lb => lb.Book.UserDefinedItem.Rating.OverallRating > 0f,
|
||||
["Rated"] = lb => lb.Book.UserDefinedItem.Rating.OverallRating > 0f,
|
||||
|
||||
["IsAuthorNarrated"] = isAuthorNarrated,
|
||||
["AuthorNarrated"] = isAuthorNarrated,
|
||||
|
||||
[nameof(Book.IsAbridged)] = lb => lb.Book.IsAbridged,
|
||||
["Abridged"] = lb => lb.Book.IsAbridged,
|
||||
|
||||
["IsLiberated"] = lb => isLiberated(lb.Book),
|
||||
["Liberated"] = lb => isLiberated(lb.Book),
|
||||
["LiberatedError"] = lb => liberatedError(lb.Book),
|
||||
|
||||
["Podcast"] = lb => lb.Book.IsEpisodeChild(),
|
||||
["Podcasts"] = lb => lb.Book.IsEpisodeChild(),
|
||||
["IsPodcast"] = lb => lb.Book.IsEpisodeChild(),
|
||||
["Episode"] = lb => lb.Book.IsEpisodeChild(),
|
||||
["Episodes"] = lb => lb.Book.IsEpisodeChild(),
|
||||
["IsEpisode"] = lb => lb.Book.IsEpisodeChild(),
|
||||
|
||||
["Absent"] = lb => lb.AbsentFromLastScan,
|
||||
["AbsentFromLastScan"] = lb => lb.AbsentFromLastScan,
|
||||
}
|
||||
);
|
||||
|
||||
private static bool isAuthorNarrated(LibraryBook lb)
|
||||
private static bool isAuthorNarrated(Book book)
|
||||
{
|
||||
var authors = lb.Book.Authors.Select(a => a.Name).ToArray();
|
||||
var narrators = lb.Book.Narrators.Select(a => a.Name).ToArray();
|
||||
var authors = book.Authors.Select(a => a.Name).ToArray();
|
||||
var narrators = book.Narrators.Select(a => a.Name).ToArray();
|
||||
return authors.Intersect(narrators).Any();
|
||||
}
|
||||
private static string userOverallRating(Book book) => book.UserDefinedItem.Rating.OverallRating.ToLuceneString();
|
||||
private static bool isLiberated(Book book) => book.UserDefinedItem.BookStatus == LiberatedStatus.Liberated;
|
||||
private static bool liberatedError(Book book) => book.UserDefinedItem.BookStatus == LiberatedStatus.Error;
|
||||
|
||||
// use these common fields in the "all" default search field
|
||||
private static IEnumerable<Func<LibraryBook, string>> allFieldIndexRules { get; }
|
||||
= new List<Func<LibraryBook, string>>
|
||||
{
|
||||
idIndexRules[ALL_AUDIBLE_PRODUCT_ID],
|
||||
stringIndexRules[ALL_TITLE],
|
||||
stringIndexRules[ALL_AUTHOR_NAMES],
|
||||
stringIndexRules[ALL_NARRATOR_NAMES],
|
||||
stringIndexRules[ALL_SERIES_NAMES]
|
||||
};
|
||||
#endregion
|
||||
|
||||
#region get search fields. used for display in help
|
||||
public static IEnumerable<string> GetSearchIdFields()
|
||||
// use these common fields in the "all" default search field
|
||||
public static IndexRuleCollection FieldIndexRules { get; } = new IndexRuleCollection
|
||||
{
|
||||
foreach (var key in idIndexRules.Keys)
|
||||
yield return key;
|
||||
}
|
||||
|
||||
public static IEnumerable<string> GetSearchStringFields()
|
||||
{
|
||||
foreach (var key in stringIndexRules.Keys)
|
||||
yield return key;
|
||||
}
|
||||
|
||||
public static IEnumerable<string> GetSearchBoolFields()
|
||||
{
|
||||
foreach (var key in boolIndexRules.Keys)
|
||||
yield return key;
|
||||
}
|
||||
|
||||
public static IEnumerable<string> GetSearchNumberFields()
|
||||
{
|
||||
foreach (var key in numberIndexRules.Keys)
|
||||
yield return key;
|
||||
}
|
||||
{ FieldType.ID, Book => Book.AudibleProductId.ToLowerInvariant(), nameof(Book.AudibleProductId), "ProductId", "Id", "ASIN" },
|
||||
{ FieldType.Raw, Book => Book.AudibleProductId, _ID_ },
|
||||
{ FieldType.String, Book => Book.Title, nameof(Book.Title), "ProductId", "Id", "ASIN" },
|
||||
{ FieldType.String, Book => Book.AuthorNames(), "AuthorNames", "Author", "Authors" },
|
||||
{ FieldType.String, Book => Book.NarratorNames(), "NarratorNames", "Narrator", "Narrators" },
|
||||
{ FieldType.String, Book => Book.Publisher, nameof(Book.Publisher) },
|
||||
{ FieldType.String, Book => Book.SeriesNames(), "SeriesNames", "Narrator", "Series" },
|
||||
{ FieldType.String, Book => string.Join(", ", Book.SeriesLink.Select(s => s.Series.AudibleSeriesId)), "SeriesId" },
|
||||
{ FieldType.String, Book => Book.CategoriesIds() is null ? null : string.Join(", ", Book.CategoriesIds()), nameof(Book.Category), "Categories", "CategoriesId", "CategoryId", "CategoriesNames" },
|
||||
{ FieldType.String, Book => Book.UserDefinedItem.Tags, TAGS.FirstCharToUpper() },
|
||||
{ FieldType.String, Book => Book.Locale, "Locale", "Region" },
|
||||
{ FieldType.String, lb => lb.Account, "Account", "Email" },
|
||||
{ FieldType.Bool, Book => Book.HasPdf().ToString(), "HasDownloads", "HasDownload", "Downloads" , "Download", "HasPDFs", "HasPDF" , "PDFs", "PDF" },
|
||||
{ FieldType.Bool, Book => (Book.UserDefinedItem.Rating.OverallRating > 0f).ToString(), "IsRated", "Rated" },
|
||||
{ FieldType.Bool, Book => isAuthorNarrated(Book).ToString(), "IsAuthorNarrated", "AuthorNarrated" },
|
||||
{ FieldType.Bool, Book => Book.IsAbridged.ToString(), nameof(Book.IsAbridged), "Abridged" },
|
||||
{ FieldType.Bool, Book => (Book.UserDefinedItem.BookStatus == LiberatedStatus.Liberated).ToString(), "IsLiberated", "Liberated" },
|
||||
{ FieldType.Bool, Book => (Book.UserDefinedItem.BookStatus == LiberatedStatus.Error).ToString(), "LiberatedError" },
|
||||
{ FieldType.Bool, Book => Book.IsEpisodeChild().ToString(), "Podcast", "Podcasts", "IsPodcast", "Episode", "Episodes", "IsEpisode" },
|
||||
{ FieldType.Bool, lb => lb.AbsentFromLastScan.ToString(), "AbsentFromLastScan", "Absent" },
|
||||
// all numbers are padded to 8 char.s
|
||||
// This will allow a single method to auto-pad numbers. The method will match these as well as date: yyyymmdd
|
||||
{ FieldType.Number, Book => Book.LengthInMinutes.ToLuceneString(), nameof(Book.LengthInMinutes), "Length", "Minutes" },
|
||||
{ FieldType.Number, Book => (Book.LengthInMinutes / 60).ToLuceneString(), "Hours" },
|
||||
{ FieldType.Number, Book => Book.Rating.OverallRating.ToLuceneString(), "ProductRating", "Rating" },
|
||||
{ FieldType.Number, Book => Book.UserDefinedItem.Rating.OverallRating.ToLuceneString(), "UserRating", "MyRating" },
|
||||
{ FieldType.Number, Book => Book.DatePublished?.ToLuceneString() ?? "", nameof(Book.DatePublished) },
|
||||
{ FieldType.Number, Book => Book.UserDefinedItem.LastDownloaded.ToLuceneString(), nameof(UserDefinedItem.LastDownloaded), "LastDownload" },
|
||||
{ FieldType.Number, lb => lb.DateAdded.ToLuceneString(), nameof(LibraryBook.DateAdded) }
|
||||
};
|
||||
#endregion
|
||||
|
||||
#region create and update index
|
||||
|
|
@ -224,35 +104,15 @@ namespace LibationSearchEngine
|
|||
{
|
||||
var doc = new Document();
|
||||
|
||||
// refine with
|
||||
// http://codeclimber.net.nz/archive/2009/09/10/how-subtext-lucenenet-index-is-structured/
|
||||
|
||||
// fields are key value pairs and MULTIPLE FIELDS CAN HAVE THE SAME KEY.
|
||||
// splitting authors and narrators and/or tags into multiple fields could be interesting research.
|
||||
// it could allow for more advanced searches, or maybe it could break broad searches.
|
||||
|
||||
// all searching should be lowercase
|
||||
// external callers have the reasonable expectation that product id will be returned CASE SPECIFIC
|
||||
doc.AddRaw(_ID_, libraryBook.Book.AudibleProductId);
|
||||
|
||||
// concat all common fields for the default 'all' field
|
||||
var allConcat =
|
||||
allFieldIndexRules
|
||||
.Select(rule => rule(libraryBook))
|
||||
FieldIndexRules
|
||||
.Select(rule => rule.GetValue(libraryBook))
|
||||
.Aggregate((a, b) => $"{a} {b}");
|
||||
doc.AddAnalyzed(ALL, allConcat);
|
||||
|
||||
foreach (var kvp in idIndexRules)
|
||||
doc.AddNotAnalyzed(kvp.Key, kvp.Value(libraryBook));
|
||||
|
||||
foreach (var kvp in stringIndexRules)
|
||||
doc.AddAnalyzed(kvp.Key, kvp.Value(libraryBook));
|
||||
|
||||
foreach (var kvp in boolIndexRules)
|
||||
doc.AddBool(kvp.Key, kvp.Value(libraryBook));
|
||||
|
||||
foreach (var kvp in numberIndexRules)
|
||||
doc.AddNotAnalyzed(kvp.Key, kvp.Value(libraryBook));
|
||||
foreach (var rule in FieldIndexRules)
|
||||
doc.AddIndexRule(rule, libraryBook);
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
|
@ -267,58 +127,39 @@ namespace LibationSearchEngine
|
|||
productId,
|
||||
d =>
|
||||
{
|
||||
// fields are key value pairs. MULTIPLE FIELDS CAN POTENTIALLY HAVE THE SAME KEY.
|
||||
// ie: must remove old before adding new else will create unwanted duplicates.
|
||||
d.RemoveField(fieldName.ToLower());
|
||||
d.RemoveField(fieldName.ToLower());
|
||||
d.AddAnalyzed(fieldName, newValue);
|
||||
});
|
||||
});
|
||||
|
||||
// update single document entry
|
||||
// update single document entry
|
||||
public void UpdateLiberatedStatus(Book book)
|
||||
=> updateDocument(
|
||||
book.AudibleProductId,
|
||||
d =>
|
||||
{
|
||||
//
|
||||
// TODO: better synonym handling. This is too easy to mess up
|
||||
//
|
||||
var lib = FieldIndexRules.GetRuleByFieldName<BookRule>("IsLiberated");
|
||||
var libError = FieldIndexRules.GetRuleByFieldName<BookRule>("LiberatedError");
|
||||
var lastDl = FieldIndexRules.GetRuleByFieldName<BookRule>(nameof(UserDefinedItem.LastDownloaded));
|
||||
|
||||
// fields are key value pairs. MULTIPLE FIELDS CAN POTENTIALLY HAVE THE SAME KEY.
|
||||
// ie: must remove old before adding new else will create unwanted duplicates.
|
||||
var v1 = isLiberated(book);
|
||||
d.RemoveField("isliberated");
|
||||
d.AddBool("IsLiberated", v1);
|
||||
d.RemoveField("liberated");
|
||||
d.AddBool("Liberated", v1);
|
||||
d.RemoveRule(lib);
|
||||
d.RemoveRule(libError);
|
||||
d.RemoveRule(lastDl);
|
||||
|
||||
var v2 = liberatedError(book);
|
||||
d.RemoveField("liberatederror");
|
||||
d.AddBool("LiberatedError", v2);
|
||||
|
||||
var v3 = book.UserDefinedItem.LastDownloaded?.ToLuceneString() ?? "";
|
||||
d.RemoveField("LastDownload");
|
||||
d.AddNotAnalyzed("LastDownload", v3);
|
||||
d.RemoveField("LastDownloaded");
|
||||
d.AddNotAnalyzed("LastDownloaded", v3);
|
||||
d.AddIndexRule(lib, book);
|
||||
d.AddIndexRule(libError, book);
|
||||
d.AddIndexRule(lastDl, book);
|
||||
});
|
||||
|
||||
public void UpdateUserRatings(Book book)
|
||||
=>updateDocument(
|
||||
book.AudibleProductId,
|
||||
d =>
|
||||
{
|
||||
//
|
||||
// TODO: better synonym handling. This is too easy to mess up
|
||||
//
|
||||
{
|
||||
var rating = FieldIndexRules.GetRuleByFieldName<BookRule>("UserRating");
|
||||
|
||||
// fields are key value pairs. MULTIPLE FIELDS CAN POTENTIALLY HAVE THE SAME KEY.
|
||||
// ie: must remove old before adding new else will create unwanted duplicates.
|
||||
var v1 = userOverallRating(book);
|
||||
d.RemoveField("userrating");
|
||||
d.AddNotAnalyzed("UserRating", v1);
|
||||
d.RemoveField("myrating");
|
||||
d.AddNotAnalyzed("MyRating", v1);
|
||||
});
|
||||
d.RemoveRule(rating);
|
||||
d.AddIndexRule(rating, book);
|
||||
});
|
||||
|
||||
private static void updateDocument(string productId, Action<Document> action)
|
||||
{
|
||||
|
|
@ -335,11 +176,9 @@ namespace LibationSearchEngine
|
|||
return;
|
||||
var document = searcher.Doc(scoreDoc.Doc);
|
||||
|
||||
|
||||
// perform update
|
||||
action(document);
|
||||
|
||||
|
||||
// update index
|
||||
var createNewIndex = false;
|
||||
using var analyzer = new StandardAnalyzer(Version);
|
||||
|
|
@ -412,24 +251,24 @@ namespace LibationSearchEngine
|
|||
return returnList;
|
||||
}
|
||||
|
||||
private void displayResults(SearchResultSet docs)
|
||||
{
|
||||
//for (int i = 0; i < docs.Docs.Count(); i++)
|
||||
//{
|
||||
// var sde = docs.Docs.First();
|
||||
private void displayResults(SearchResultSet docs)
|
||||
{
|
||||
//for (int i = 0; i < docs.Docs.Count(); i++)
|
||||
//{
|
||||
// var sde = docs.Docs.First();
|
||||
|
||||
// Document doc = sde.Doc;
|
||||
// float score = sde.Score;
|
||||
// Document doc = sde.Doc;
|
||||
// float score = sde.Score;
|
||||
|
||||
// Serilog.Log.Logger.Debug($"{(i + 1)}) score={score}. Fields:");
|
||||
// var allFields = doc.GetFields();
|
||||
// foreach (var f in allFields)
|
||||
// Serilog.Log.Logger.Debug($" [{f.Name}]={f.StringValue}");
|
||||
//}
|
||||
}
|
||||
#endregion
|
||||
// Serilog.Log.Logger.Debug($"{(i + 1)}) score={score}. Fields:");
|
||||
// var allFields = doc.GetFields();
|
||||
// foreach (var f in allFields)
|
||||
// Serilog.Log.Logger.Debug($" [{f.Name}]={f.StringValue}");
|
||||
//}
|
||||
}
|
||||
#endregion
|
||||
|
||||
private static Directory getIndex() => FSDirectory.Open(SearchEngineDirectory);
|
||||
private static Directory getIndex() => FSDirectory.Open(SearchEngineDirectory);
|
||||
|
||||
// not customizable. don't move to config
|
||||
private static string SearchEngineDirectory { get; }
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue