Skip to content

Commit

Permalink
improve graph building api with DDD (graphbuilder extension methods)
Browse files Browse the repository at this point in the history
  • Loading branch information
kreeben committed Dec 30, 2020
1 parent 8e740d3 commit 079f389
Show file tree
Hide file tree
Showing 11 changed files with 31 additions and 120 deletions.
2 changes: 1 addition & 1 deletion src/Sir.Cmd/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ public void Run(IDictionary<string, string> args, ILogger logger)

foreach (var token in tokens)
{
GraphBuilder.MergeOrAdd(tree, new VectorNode(token), model);
tree.MergeOrAdd(new VectorNode(token), model);
}

Console.WriteLine(field.Name);
Expand Down
6 changes: 3 additions & 3 deletions src/Sir.Search/Models/BagOfCharsModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public class BagOfCharsModel : DistanceCalculator, IModel<string>

public void ExecutePut<T>(VectorNode column, VectorNode node)
{
VectorNode.MergeOrAddLockFree(column, node, this);
column.MergeOrAddConcurrent(node, this);
}

public IEnumerable<IVector> Tokenize(string data)
Expand Down Expand Up @@ -83,7 +83,7 @@ public BocEmbeddingsModel(BagOfCharsModel wordTokenizer)

public void ExecutePut<T>(VectorNode column, VectorNode node)
{
GraphBuilder.Build(column, node, this);
column.Build(node, this);
}

public IEnumerable<IVector> Tokenize(string data)
Expand All @@ -107,7 +107,7 @@ public ContinuousBagOfWordsModel(BagOfCharsModel wordTokenizer)

public void ExecutePut<T>(VectorNode column, VectorNode node)
{
GraphBuilder.MergeOrAdd(column, node, this);
column.MergeOrAdd(node, this);
}

public IEnumerable<IVector> Tokenize(string data)
Expand Down
2 changes: 1 addition & 1 deletion src/Sir.Search/Models/LinearClassifierImageModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ public class LinearClassifierImageModel : DistanceCalculator, IModel<IImage>

public void ExecutePut<T>(VectorNode column, VectorNode node)
{
GraphBuilder.MergeOrAddSupervised(column, node, this);
column.MergeOrAddSupervised(node, this);
}

public IEnumerable<IVector> Tokenize(IImage data)
Expand Down
2 changes: 1 addition & 1 deletion src/Sir.Search/Session/DocumentStreamSession.cs
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ public IEnumerable<VectorNode> ReadDocumentVectors<T>(

foreach (var vector in streamReader.GetVectors<T>(vInfo.offset, vInfo.len, vInfo.dataType, value => model.Tokenize(value)))
{
GraphBuilder.AddIfUnique(tree, new VectorNode(vector, docId:doc.docId, keyId:kvp.keyId), model);
tree.AddIfUnique(new VectorNode(vector, docId:doc.docId, keyId:kvp.keyId), model);
}

yield return tree;
Expand Down
2 changes: 1 addition & 1 deletion src/Sir.Store.Tests/ImageModelTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public class ImageModelTests
public void Can_train_in_memory()
{
var model = new LinearClassifierImageModel();
var tree = GraphBuilder.CreateTree(model, model, _data);
var tree = model.CreateTree(model, _data);

Print(tree);

Expand Down
4 changes: 2 additions & 2 deletions src/Sir.Store.Tests/TextModelTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ public class TextModelTests
public void Can_traverse_index_in_memory()
{
var model = new BagOfCharsModel();
var tree = GraphBuilder.CreateTree(model, model, _data);
var tree = model.CreateTree(model, _data);

Debug.WriteLine(PathFinder.Visualize(tree));

Expand Down Expand Up @@ -52,7 +52,7 @@ public void Can_traverse_index_in_memory()
public void Can_traverse_streamed()
{
var model = new BagOfCharsModel();
var tree = GraphBuilder.CreateTree(model, model, _data);
var tree = model.CreateTree(model, _data);

using (var indexStream = new MemoryStream())
using (var vectorStream = new MemoryStream())
Expand Down
2 changes: 1 addition & 1 deletion src/Sir.StringCompare/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ private static void RunInteractiveGraphBuilder(IModel<string> model)

var node = new VectorNode(model.Tokenize(command).First());

GraphBuilder.MergeOrAdd(root, node, model);
root.MergeOrAdd(node, model);
}

Console.WriteLine(PathFinder.Visualize(root));
Expand Down
2 changes: 1 addition & 1 deletion src/Sir.VectorSpace/ColumnReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ public Hit ClosestMatch(IVector vector, IModel model)
}
else if (hit.Score >= model.IdenticalAngle || hit.Score.Approximates(best.Score))
{
GraphBuilder.MergePostings(best.Node, hit.Node);
best.Node.MergePostings(hit.Node);
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/Sir.VectorSpace/ColumnWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public ColumnWriter(Stream indexStream, bool keepStreamOpen = false)

public (int depth, int width) CreatePage(VectorNode column, Stream vectorStream, Stream postingsStream, PageIndexWriter pageIndexWriter)
{
var page = GraphBuilder.SerializeTree(column, _ixStream, vectorStream, postingsStream);
var page = column.SerializeTree(_ixStream, vectorStream, postingsStream);

pageIndexWriter.Put(page.offset, page.length);

Expand All @@ -25,7 +25,7 @@ public ColumnWriter(Stream indexStream, bool keepStreamOpen = false)

public (int depth, int width) CreatePage(VectorNode column, Stream vectorStream, PageIndexWriter pageIndexWriter)
{
var page = GraphBuilder.SerializeTree(column, _ixStream, vectorStream, null);
var page = column.SerializeTree(_ixStream, vectorStream, null);

pageIndexWriter.Put(page.offset, page.length);

Expand Down
114 changes: 13 additions & 101 deletions src/Sir.VectorSpace/GraphBuilder.cs
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO;
using System.Runtime.InteropServices;
using System.Threading;

namespace Sir.VectorSpace
{
public static class GraphBuilder
{
public static VectorNode CreateTree<T>(IModel<T> model, IIndexingStrategy indexingStrategy, params T[] data)
public static VectorNode CreateTree<T>(this IModel<T> model, IIndexingStrategy indexingStrategy, params T[] data)
{
var root = new VectorNode();

Expand All @@ -25,7 +23,7 @@ public static VectorNode CreateTree<T>(IModel<T> model, IIndexingStrategy indexi
}

public static void MergeOrAddSupervised(
VectorNode root,
this VectorNode root,
VectorNode node,
IModel model)
{
Expand Down Expand Up @@ -71,7 +69,7 @@ public static void MergeOrAddSupervised(
}

public static void MergeOrAdd(
VectorNode root,
this VectorNode root,
VectorNode node,
IModel model)
{
Expand Down Expand Up @@ -115,7 +113,7 @@ public static void MergeOrAdd(
}

public static void AddIfUnique(
VectorNode root,
this VectorNode root,
VectorNode node,
IModel model)
{
Expand Down Expand Up @@ -157,7 +155,7 @@ public static void AddIfUnique(
}

public static bool TryAdd(
VectorNode root,
this VectorNode root,
VectorNode node,
IModel model)
{
Expand Down Expand Up @@ -201,7 +199,7 @@ public static bool TryAdd(
}

public static void Build(
VectorNode root,
this VectorNode root,
VectorNode node,
IModel model)
{
Expand Down Expand Up @@ -242,107 +240,21 @@ public static void Build(
}
}

public static void MergeOrAddConcurrent(
VectorNode root,
VectorNode node,
IModel model)
{
var cursor = root;

while (true)
{
var angle = cursor.Vector == null ? 0 : model.CosAngle(node.Vector, cursor.Vector);

if (angle >= model.IdenticalAngle)
{
lock (cursor.Sync)
{
MergeDocIds(cursor, node);
}

break;
}
else if (angle > model.FoldAngle)
{
if (cursor.Left == null)
{
lock (cursor.Sync)
{
if (cursor.Left == null)
{
cursor.Left = node;
break;
}
else
{
cursor = cursor.Left;
}
}
}
else
{
cursor = cursor.Left;
}
}
else
{
if (cursor.Right == null)
{
lock (cursor.Sync)
{
if (cursor.Right == null)
{
cursor.Right = node;
break;
}
else
{
cursor = cursor.Right;
}
}
}
else
{
cursor = cursor.Right;
}
}
}
}

public static void InsertRight(VectorNode parent, VectorNode node)
{
node.Right = parent.Right;
parent.Right = node;
}

public static void AddRight(VectorNode parent, VectorNode node)
{
var target = parent;

while(target.Right != null)
{
target = target.Right;
}

node.Right = target.Right;
target.Right = node;
}

public static void MergePostings(VectorNode target, VectorNode source)
public static void MergePostings(this VectorNode target, VectorNode source)
{
if (source.PostingsOffsets != null)
((List<long>)target.PostingsOffsets).AddRange(source.PostingsOffsets);
}

public static void MergeDocIds(VectorNode target, VectorNode source)
public static void MergeDocIds(this VectorNode target, VectorNode source)
{
if (source.DocIds != null)
{
target.DocIds.AddRange(source.DocIds);
}
}

public static void MergeDocIdsConcurrent(VectorNode target, VectorNode source)
public static void MergeDocIdsConcurrent(this VectorNode target, VectorNode source)
{
lock (target.Sync)
{
Expand All @@ -353,7 +265,7 @@ public static void MergeDocIdsConcurrent(VectorNode target, VectorNode source)
}
}

public static void SerializeNode(VectorNode node, Stream stream)
public static void Serialize(this VectorNode node, Stream stream)
{
long terminator = 1;

Expand Down Expand Up @@ -393,7 +305,7 @@ public static void SerializeNode(VectorNode node, Stream stream)
/// <param name="vectorStream">stream to persist vectors in</param>
/// <param name="postingsStream">optional stream to persist any posting references into</param>
/// <returns></returns>
public static (long offset, long length) SerializeTree(VectorNode node, Stream indexStream, Stream vectorStream, Stream postingsStream = null)
public static (long offset, long length) SerializeTree(this VectorNode node, Stream indexStream, Stream vectorStream, Stream postingsStream = null)
{
var stack = new Stack<VectorNode>();
var offset = indexStream.Position;
Expand All @@ -411,7 +323,7 @@ public static (long offset, long length) SerializeTree(VectorNode node, Stream i

node.VectorOffset = VectorOperations.SerializeVector(node.Vector, vectorStream);

SerializeNode(node, indexStream);
Serialize(node, indexStream);

length += VectorNode.BlockSize;

Expand All @@ -431,7 +343,7 @@ public static (long offset, long length) SerializeTree(VectorNode node, Stream i
return (offset, length);
}

public static void SerializePostings(VectorNode node, Stream postingsStream)
public static void SerializePostings(this VectorNode node, Stream postingsStream)
{
node.PostingsOffset = postingsStream.Position;

Expand Down
11 changes: 5 additions & 6 deletions src/Sir.VectorSpace/VectorNode.cs
Original file line number Diff line number Diff line change
Expand Up @@ -111,20 +111,19 @@ public VectorNode(long postingsOffset, long vecOffset, long terminator, long wei
Vector = vector;
}

public static void MergeOrAddLockFree(
VectorNode root,
public void MergeOrAddConcurrent(
VectorNode node,
IModel model)
{
var cursor = root;
var cursor = this;

while (true)
{
var angle = cursor.Vector == null ? 0 : model.CosAngle(node.Vector, cursor.Vector);

if (angle >= model.IdenticalAngle)
{
GraphBuilder.MergeDocIdsConcurrent(cursor, node);
cursor.MergeDocIdsConcurrent(node);

break;
}
Expand All @@ -140,7 +139,7 @@ public static void MergeOrAddLockFree(
}
else
{
MergeOrAddLockFree(cursor, node, model);
cursor.MergeOrAddConcurrent(node, model);
}
}
else
Expand All @@ -160,7 +159,7 @@ public static void MergeOrAddLockFree(
}
else
{
MergeOrAddLockFree(cursor, node, model);
cursor.MergeOrAddConcurrent(node, model);
}
}
else
Expand Down

0 comments on commit 079f389

Please sign in to comment.