From 079f389193f7ec3ba607fe5d3c1f9024e3e167e2 Mon Sep 17 00:00:00 2001 From: kreeben Date: Wed, 30 Dec 2020 22:32:08 +0100 Subject: [PATCH] improve graph building api with DDD (graphbuilder extension methods) --- src/Sir.Cmd/Program.cs | 2 +- src/Sir.Search/Models/BagOfCharsModel.cs | 6 +- .../Models/LinearClassifierImageModel.cs | 2 +- .../Session/DocumentStreamSession.cs | 2 +- src/Sir.Store.Tests/ImageModelTests.cs | 2 +- src/Sir.Store.Tests/TextModelTests.cs | 4 +- src/Sir.StringCompare/Program.cs | 2 +- src/Sir.VectorSpace/ColumnReader.cs | 2 +- src/Sir.VectorSpace/ColumnWriter.cs | 4 +- src/Sir.VectorSpace/GraphBuilder.cs | 114 ++---------------- src/Sir.VectorSpace/VectorNode.cs | 11 +- 11 files changed, 31 insertions(+), 120 deletions(-) diff --git a/src/Sir.Cmd/Program.cs b/src/Sir.Cmd/Program.cs index c559067d..2d3794ab 100644 --- a/src/Sir.Cmd/Program.cs +++ b/src/Sir.Cmd/Program.cs @@ -202,7 +202,7 @@ public void Run(IDictionary args, ILogger logger) foreach (var token in tokens) { - GraphBuilder.MergeOrAdd(tree, new VectorNode(token), model); + tree.MergeOrAdd(new VectorNode(token), model); } Console.WriteLine(field.Name); diff --git a/src/Sir.Search/Models/BagOfCharsModel.cs b/src/Sir.Search/Models/BagOfCharsModel.cs index 8befe9d7..66473bcc 100644 --- a/src/Sir.Search/Models/BagOfCharsModel.cs +++ b/src/Sir.Search/Models/BagOfCharsModel.cs @@ -12,7 +12,7 @@ public class BagOfCharsModel : DistanceCalculator, IModel public void ExecutePut(VectorNode column, VectorNode node) { - VectorNode.MergeOrAddLockFree(column, node, this); + column.MergeOrAddConcurrent(node, this); } public IEnumerable Tokenize(string data) @@ -83,7 +83,7 @@ public BocEmbeddingsModel(BagOfCharsModel wordTokenizer) public void ExecutePut(VectorNode column, VectorNode node) { - GraphBuilder.Build(column, node, this); + column.Build(node, this); } public IEnumerable Tokenize(string data) @@ -107,7 +107,7 @@ public ContinuousBagOfWordsModel(BagOfCharsModel wordTokenizer) public void ExecutePut(VectorNode column, VectorNode node) { - GraphBuilder.MergeOrAdd(column, node, this); + column.MergeOrAdd(node, this); } public IEnumerable Tokenize(string data) diff --git a/src/Sir.Search/Models/LinearClassifierImageModel.cs b/src/Sir.Search/Models/LinearClassifierImageModel.cs index 708d2e3f..a45bfeeb 100644 --- a/src/Sir.Search/Models/LinearClassifierImageModel.cs +++ b/src/Sir.Search/Models/LinearClassifierImageModel.cs @@ -13,7 +13,7 @@ public class LinearClassifierImageModel : DistanceCalculator, IModel public void ExecutePut(VectorNode column, VectorNode node) { - GraphBuilder.MergeOrAddSupervised(column, node, this); + column.MergeOrAddSupervised(node, this); } public IEnumerable Tokenize(IImage data) diff --git a/src/Sir.Search/Session/DocumentStreamSession.cs b/src/Sir.Search/Session/DocumentStreamSession.cs index 0e740769..61bf1166 100644 --- a/src/Sir.Search/Session/DocumentStreamSession.cs +++ b/src/Sir.Search/Session/DocumentStreamSession.cs @@ -149,7 +149,7 @@ public IEnumerable ReadDocumentVectors( foreach (var vector in streamReader.GetVectors(vInfo.offset, vInfo.len, vInfo.dataType, value => model.Tokenize(value))) { - GraphBuilder.AddIfUnique(tree, new VectorNode(vector, docId:doc.docId, keyId:kvp.keyId), model); + tree.AddIfUnique(new VectorNode(vector, docId:doc.docId, keyId:kvp.keyId), model); } yield return tree; diff --git a/src/Sir.Store.Tests/ImageModelTests.cs b/src/Sir.Store.Tests/ImageModelTests.cs index bb27cf64..ea122f64 100644 --- a/src/Sir.Store.Tests/ImageModelTests.cs +++ b/src/Sir.Store.Tests/ImageModelTests.cs @@ -21,7 +21,7 @@ public class ImageModelTests public void Can_train_in_memory() { var model = new LinearClassifierImageModel(); - var tree = GraphBuilder.CreateTree(model, model, _data); + var tree = model.CreateTree(model, _data); Print(tree); diff --git a/src/Sir.Store.Tests/TextModelTests.cs b/src/Sir.Store.Tests/TextModelTests.cs index 62a57cce..2456fde3 100644 --- a/src/Sir.Store.Tests/TextModelTests.cs +++ b/src/Sir.Store.Tests/TextModelTests.cs @@ -20,7 +20,7 @@ public class TextModelTests public void Can_traverse_index_in_memory() { var model = new BagOfCharsModel(); - var tree = GraphBuilder.CreateTree(model, model, _data); + var tree = model.CreateTree(model, _data); Debug.WriteLine(PathFinder.Visualize(tree)); @@ -52,7 +52,7 @@ public void Can_traverse_index_in_memory() public void Can_traverse_streamed() { var model = new BagOfCharsModel(); - var tree = GraphBuilder.CreateTree(model, model, _data); + var tree = model.CreateTree(model, _data); using (var indexStream = new MemoryStream()) using (var vectorStream = new MemoryStream()) diff --git a/src/Sir.StringCompare/Program.cs b/src/Sir.StringCompare/Program.cs index b6d04be3..59be2e8d 100644 --- a/src/Sir.StringCompare/Program.cs +++ b/src/Sir.StringCompare/Program.cs @@ -68,7 +68,7 @@ private static void RunInteractiveGraphBuilder(IModel model) var node = new VectorNode(model.Tokenize(command).First()); - GraphBuilder.MergeOrAdd(root, node, model); + root.MergeOrAdd(node, model); } Console.WriteLine(PathFinder.Visualize(root)); diff --git a/src/Sir.VectorSpace/ColumnReader.cs b/src/Sir.VectorSpace/ColumnReader.cs index 4dfe25c9..69b82445 100644 --- a/src/Sir.VectorSpace/ColumnReader.cs +++ b/src/Sir.VectorSpace/ColumnReader.cs @@ -61,7 +61,7 @@ public Hit ClosestMatch(IVector vector, IModel model) } else if (hit.Score >= model.IdenticalAngle || hit.Score.Approximates(best.Score)) { - GraphBuilder.MergePostings(best.Node, hit.Node); + best.Node.MergePostings(hit.Node); } } diff --git a/src/Sir.VectorSpace/ColumnWriter.cs b/src/Sir.VectorSpace/ColumnWriter.cs index a983fa30..03e05218 100644 --- a/src/Sir.VectorSpace/ColumnWriter.cs +++ b/src/Sir.VectorSpace/ColumnWriter.cs @@ -16,7 +16,7 @@ public ColumnWriter(Stream indexStream, bool keepStreamOpen = false) public (int depth, int width) CreatePage(VectorNode column, Stream vectorStream, Stream postingsStream, PageIndexWriter pageIndexWriter) { - var page = GraphBuilder.SerializeTree(column, _ixStream, vectorStream, postingsStream); + var page = column.SerializeTree(_ixStream, vectorStream, postingsStream); pageIndexWriter.Put(page.offset, page.length); @@ -25,7 +25,7 @@ public ColumnWriter(Stream indexStream, bool keepStreamOpen = false) public (int depth, int width) CreatePage(VectorNode column, Stream vectorStream, PageIndexWriter pageIndexWriter) { - var page = GraphBuilder.SerializeTree(column, _ixStream, vectorStream, null); + var page = column.SerializeTree(_ixStream, vectorStream, null); pageIndexWriter.Put(page.offset, page.length); diff --git a/src/Sir.VectorSpace/GraphBuilder.cs b/src/Sir.VectorSpace/GraphBuilder.cs index d16c459b..dadd5d9c 100644 --- a/src/Sir.VectorSpace/GraphBuilder.cs +++ b/src/Sir.VectorSpace/GraphBuilder.cs @@ -1,15 +1,13 @@ using System; -using System.Collections.Concurrent; using System.Collections.Generic; using System.IO; using System.Runtime.InteropServices; -using System.Threading; namespace Sir.VectorSpace { public static class GraphBuilder { - public static VectorNode CreateTree(IModel model, IIndexingStrategy indexingStrategy, params T[] data) + public static VectorNode CreateTree(this IModel model, IIndexingStrategy indexingStrategy, params T[] data) { var root = new VectorNode(); @@ -25,7 +23,7 @@ public static VectorNode CreateTree(IModel model, IIndexingStrategy indexi } public static void MergeOrAddSupervised( - VectorNode root, + this VectorNode root, VectorNode node, IModel model) { @@ -71,7 +69,7 @@ public static void MergeOrAddSupervised( } public static void MergeOrAdd( - VectorNode root, + this VectorNode root, VectorNode node, IModel model) { @@ -115,7 +113,7 @@ public static void MergeOrAdd( } public static void AddIfUnique( - VectorNode root, + this VectorNode root, VectorNode node, IModel model) { @@ -157,7 +155,7 @@ public static void AddIfUnique( } public static bool TryAdd( - VectorNode root, + this VectorNode root, VectorNode node, IModel model) { @@ -201,7 +199,7 @@ public static bool TryAdd( } public static void Build( - VectorNode root, + this VectorNode root, VectorNode node, IModel model) { @@ -242,99 +240,13 @@ public static void Build( } } - public static void MergeOrAddConcurrent( - VectorNode root, - VectorNode node, - IModel model) - { - var cursor = root; - - while (true) - { - var angle = cursor.Vector == null ? 0 : model.CosAngle(node.Vector, cursor.Vector); - - if (angle >= model.IdenticalAngle) - { - lock (cursor.Sync) - { - MergeDocIds(cursor, node); - } - - break; - } - else if (angle > model.FoldAngle) - { - if (cursor.Left == null) - { - lock (cursor.Sync) - { - if (cursor.Left == null) - { - cursor.Left = node; - break; - } - else - { - cursor = cursor.Left; - } - } - } - else - { - cursor = cursor.Left; - } - } - else - { - if (cursor.Right == null) - { - lock (cursor.Sync) - { - if (cursor.Right == null) - { - cursor.Right = node; - break; - } - else - { - cursor = cursor.Right; - } - } - } - else - { - cursor = cursor.Right; - } - } - } - } - - public static void InsertRight(VectorNode parent, VectorNode node) - { - node.Right = parent.Right; - parent.Right = node; - } - - public static void AddRight(VectorNode parent, VectorNode node) - { - var target = parent; - - while(target.Right != null) - { - target = target.Right; - } - - node.Right = target.Right; - target.Right = node; - } - - public static void MergePostings(VectorNode target, VectorNode source) + public static void MergePostings(this VectorNode target, VectorNode source) { if (source.PostingsOffsets != null) ((List)target.PostingsOffsets).AddRange(source.PostingsOffsets); } - public static void MergeDocIds(VectorNode target, VectorNode source) + public static void MergeDocIds(this VectorNode target, VectorNode source) { if (source.DocIds != null) { @@ -342,7 +254,7 @@ public static void MergeDocIds(VectorNode target, VectorNode source) } } - public static void MergeDocIdsConcurrent(VectorNode target, VectorNode source) + public static void MergeDocIdsConcurrent(this VectorNode target, VectorNode source) { lock (target.Sync) { @@ -353,7 +265,7 @@ public static void MergeDocIdsConcurrent(VectorNode target, VectorNode source) } } - public static void SerializeNode(VectorNode node, Stream stream) + public static void Serialize(this VectorNode node, Stream stream) { long terminator = 1; @@ -393,7 +305,7 @@ public static void SerializeNode(VectorNode node, Stream stream) /// stream to persist vectors in /// optional stream to persist any posting references into /// - public static (long offset, long length) SerializeTree(VectorNode node, Stream indexStream, Stream vectorStream, Stream postingsStream = null) + public static (long offset, long length) SerializeTree(this VectorNode node, Stream indexStream, Stream vectorStream, Stream postingsStream = null) { var stack = new Stack(); var offset = indexStream.Position; @@ -411,7 +323,7 @@ public static (long offset, long length) SerializeTree(VectorNode node, Stream i node.VectorOffset = VectorOperations.SerializeVector(node.Vector, vectorStream); - SerializeNode(node, indexStream); + Serialize(node, indexStream); length += VectorNode.BlockSize; @@ -431,7 +343,7 @@ public static (long offset, long length) SerializeTree(VectorNode node, Stream i return (offset, length); } - public static void SerializePostings(VectorNode node, Stream postingsStream) + public static void SerializePostings(this VectorNode node, Stream postingsStream) { node.PostingsOffset = postingsStream.Position; diff --git a/src/Sir.VectorSpace/VectorNode.cs b/src/Sir.VectorSpace/VectorNode.cs index d7f2d598..d463838c 100644 --- a/src/Sir.VectorSpace/VectorNode.cs +++ b/src/Sir.VectorSpace/VectorNode.cs @@ -111,12 +111,11 @@ public VectorNode(long postingsOffset, long vecOffset, long terminator, long wei Vector = vector; } - public static void MergeOrAddLockFree( - VectorNode root, + public void MergeOrAddConcurrent( VectorNode node, IModel model) { - var cursor = root; + var cursor = this; while (true) { @@ -124,7 +123,7 @@ public static void MergeOrAddLockFree( if (angle >= model.IdenticalAngle) { - GraphBuilder.MergeDocIdsConcurrent(cursor, node); + cursor.MergeDocIdsConcurrent(node); break; } @@ -140,7 +139,7 @@ public static void MergeOrAddLockFree( } else { - MergeOrAddLockFree(cursor, node, model); + cursor.MergeOrAddConcurrent(node, model); } } else @@ -160,7 +159,7 @@ public static void MergeOrAddLockFree( } else { - MergeOrAddLockFree(cursor, node, model); + cursor.MergeOrAddConcurrent(node, model); } } else