Skip to content

Commit

Permalink
Adding missing changes to last commit
Browse files Browse the repository at this point in the history
  • Loading branch information
jhonabreul committed Nov 1, 2024
1 parent 2a67590 commit 445e361
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 52 deletions.
66 changes: 44 additions & 22 deletions Common/Python/PandasConverter.DataFrameGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -48,19 +48,21 @@ private class DataFrameGenerator

private int _maxLevels;
private bool _shouldUseSymbolOnlyIndex;
private readonly bool _flatten;

protected DataFrameGenerator(Type dataType = null, bool timeAsColumn = false)
protected DataFrameGenerator(Type dataType = null, bool timeAsColumn = false, bool flatten = false)
{
_dataType = dataType;
// if no data type is requested we check all
_requestedTick = dataType == null || dataType == typeof(Tick) || dataType == typeof(OpenInterest);
_requestedTradeBar = dataType == null || dataType == typeof(TradeBar);
_requestedQuoteBar = dataType == null || dataType == typeof(QuoteBar);
_timeAsColumn = timeAsColumn;
_flatten = flatten;
}

public DataFrameGenerator(IEnumerable<Slice> slices, Type dataType = null)
: this(dataType)
public DataFrameGenerator(IEnumerable<Slice> slices, bool flatten = false, Type dataType = null)
: this(dataType, flatten: flatten)
{
AddData(slices);
}
Expand All @@ -78,7 +80,7 @@ protected void AddData(IEnumerable<Slice> slices)
{
foreach (var data in slice.AllData)
{
if (data is BaseDataCollection collection)
if (_flatten && data is BaseDataCollection collection)
{
AddCollection(collection);
continue;
Expand Down Expand Up @@ -149,7 +151,7 @@ protected void AddData<T>(IEnumerable<T> data)
{
var type = typeof(T);

if (type.IsAssignableTo(typeof(BaseDataCollection)))
if (_flatten && type.IsAssignableTo(typeof(BaseDataCollection)))
{
foreach (var collection in data)
{
Expand Down Expand Up @@ -195,28 +197,38 @@ public PyObject GenerateDataFrame(int? levels = null, bool sort = true, bool fil
var pandasDataDataFrames = GetPandasDataDataFrames(levels, filterMissingValueColumns, symbolOnlyIndex, forceMultiValueSymbol).ToList();
var collectionsDataFrames = GetCollectionsDataFrames(symbolOnlyIndex, forceMultiValueSymbol).ToList();

if (collectionsDataFrames.Count == 0)
try
{
return ConcatDataFrames(pandasDataDataFrames, sort, dropna: true);
}
if (collectionsDataFrames.Count == 0)
{
return ConcatDataFrames(pandasDataDataFrames, sort, dropna: true);
}

var dataFrames = collectionsDataFrames.Select(x => x.Item3).Concat(pandasDataDataFrames);
var dataFrames = collectionsDataFrames.Select(x => x.Item3).Concat(pandasDataDataFrames);

if (_collections.DistinctBy(x => x.Symbol).Count() > 1)
{
var keys = collectionsDataFrames
.Select(x => new object[] { x.Item1, x.Item2 })
.Concat(pandasDataDataFrames.Select(x => new object[] { x, DateTime.MinValue }));
if (_collections.DistinctBy(x => x.Symbol).Count() > 1)
{
var keys = collectionsDataFrames
.Select(x => new object[] { x.Item1, x.Item2 })
.Concat(pandasDataDataFrames.Select(x => new object[] { x, DateTime.MinValue }));

return ConcatDataFrames(dataFrames, keys, MultiBaseDataCollectionDataFrameNames, sort, dropna: true);
return ConcatDataFrames(dataFrames, keys, MultiBaseDataCollectionDataFrameNames, sort, dropna: true);
}
else
{
var keys = collectionsDataFrames
.Select(x => new object[] { x.Item2 })
.Concat(pandasDataDataFrames.Select(x => new object[] { DateTime.MinValue }));

return ConcatDataFrames(dataFrames, keys, SingleBaseDataCollectionDataFrameNames, sort, dropna: true);
}
}
else
finally
{
var keys = collectionsDataFrames
.Select(x => new object[] { x.Item2 })
.Concat(pandasDataDataFrames.Select(x => new object[] { DateTime.MinValue }));

return ConcatDataFrames(dataFrames, keys, SingleBaseDataCollectionDataFrameNames, sort, dropna: true);
foreach (var df in pandasDataDataFrames.Concat(collectionsDataFrames.Select(x => x.Item3)))
{
df.Dispose();
}
}
}

Expand Down Expand Up @@ -254,7 +266,7 @@ private IEnumerable<PyObject> GetPandasDataDataFrames(int? levels, bool filterMi

foreach (var collection in _collections.GroupBy(x => x.Symbol).SelectMany(x => x))
{
var generator = new DataFrameGenerator(_dataType, timeAsColumn: !symbolOnlyIndex);
var generator = new DataFrameGenerator(_dataType, timeAsColumn: !symbolOnlyIndex, flatten: _flatten);
generator.AddData(collection.Data);
var dataFrame = generator.GenerateDataFrame(symbolOnlyIndex: symbolOnlyIndex, forceMultiValueSymbol: forceMultiValueSymbol);

Expand All @@ -281,5 +293,15 @@ private void AddCollection(BaseDataCollection collection)
_collections.Add(collection);
}
}

private class DataFrameGenerator<T> : DataFrameGenerator
where T : ISymbolProvider
{
public DataFrameGenerator(IEnumerable<T> data, bool flatten)
: base(flatten: flatten)
{
AddData(data);
}
}
}
}
21 changes: 8 additions & 13 deletions Common/Python/PandasConverter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,13 @@ static PandasConverter()
/// Converts an enumerable of <see cref="Slice"/> in a pandas.DataFrame
/// </summary>
/// <param name="data">Enumerable of <see cref="Slice"/></param>
/// <param name="flatten">Whether to flatten collections into rows and columns</param>
/// <param name="dataType">Optional type of bars to add to the data frame
/// If true, the base data items time will be ignored and only the base data collection time will be used in the index</param>
/// <returns><see cref="PyObject"/> containing a pandas.DataFrame</returns>
public PyObject GetDataFrame(IEnumerable<Slice> data, Type dataType = null)
public PyObject GetDataFrame(IEnumerable<Slice> data, bool flatten = false, Type dataType = null)
{
var generator = new DataFrameGenerator(data, dataType);
var generator = new DataFrameGenerator(data, flatten, dataType);
return generator.GenerateDataFrame();
}

Expand All @@ -67,12 +68,13 @@ public PyObject GetDataFrame(IEnumerable<Slice> data, Type dataType = null)
/// <param name="forceMultiValueSymbol">Useful when the data contains points for multiple symbols.
/// If false and <paramref name="symbolOnlyIndex"/> is true, it will assume there is a single point for each symbol,
/// and will apply performance improvements for the data frame generation.</param>
/// <param name="flatten">Whether to flatten collections into rows and columns</param>
/// <returns><see cref="PyObject"/> containing a pandas.DataFrame</returns>
/// <remarks>Helper method for testing</remarks>
public PyObject GetDataFrame<T>(IEnumerable<T> data, bool symbolOnlyIndex = false, bool forceMultiValueSymbol = false)
public PyObject GetDataFrame<T>(IEnumerable<T> data, bool symbolOnlyIndex = false, bool forceMultiValueSymbol = false, bool flatten = false)
where T : ISymbolProvider
{
var generator = new DataFrameGenerator<T>(data);
var generator = new DataFrameGenerator<T>(data, flatten);
return generator.GenerateDataFrame(
// Use 2 instead of maxLevels for backwards compatibility
levels: symbolOnlyIndex ? 1 : 2,
Expand Down Expand Up @@ -193,9 +195,11 @@ public static PyObject ConcatDataFrames<T>(IEnumerable<PyObject> dataFrames, IEn
{
pyNames = names.ToPyListUnSafe();
pyKeys = ConvertConcatKeys(keys);
using var pyFalse = false.ToPython();

kwargs.SetItem("keys", pyKeys);
kwargs.SetItem("names", pyNames);
kwargs.SetItem("copy", pyFalse);
}

var result = _concat.Invoke(new[] { pyDataFrames }, kwargs);
Expand Down Expand Up @@ -285,14 +289,5 @@ private PyObject MakeIndicatorDataFrame(PyDict pyDict)
{
return _pandas.DataFrame(pyDict, columns: pyDict.Keys().Select(x => x.As<string>().ToLowerInvariant()).OrderBy(x => x));
}

private class DataFrameGenerator<T> : DataFrameGenerator
where T : ISymbolProvider
{
public DataFrameGenerator(IEnumerable<T> data)
{
AddData(data);
}
}
}
}
22 changes: 21 additions & 1 deletion Common/Python/PandasData.DataTypeMember.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ private class DataTypeMember

public DataTypeMember[] Children { get; }

public bool IsNonExpandable { get; init; }

public bool IsProperty => _property != null;

public bool IsField => _field != null;
Expand All @@ -61,7 +63,7 @@ private class DataTypeMember

public bool IsTickProperty { get; }

public DataTypeMember(MemberInfo member, DataTypeMember[] children = null)
private DataTypeMember(MemberInfo member, DataTypeMember[] children = null)
{
Member = member;
Children = children;
Expand All @@ -81,6 +83,24 @@ public DataTypeMember(MemberInfo member, DataTypeMember[] children = null)
}
}

public static DataTypeMember CreateWithChildren(MemberInfo member, DataTypeMember[] children)
{
return new DataTypeMember(member, children);
}

public static DataTypeMember Create(MemberInfo member)
{
return new DataTypeMember(member);
}

public static DataTypeMember CreateNonExpandableMember(MemberInfo member)
{
return new DataTypeMember(member)
{
IsNonExpandable = true
};
}

public PropertyInfo AsProperty()
{
return _property;
Expand Down
47 changes: 31 additions & 16 deletions Common/Python/PandasData.cs
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,11 @@ public partial class PandasData
nameof(Tick.BidSize)
};

private static Type PandasNonExpandableAttribute = typeof(PandasNonExpandableAttribute);
private static Type PandasIgnoreAttribute = typeof(PandasIgnoreAttribute);
private static Type PandasIgnoreMembersAttribute = typeof(PandasIgnoreMembersAttribute);
private static readonly Type PandasNonExpandableAttribute = typeof(PandasNonExpandableAttribute);
private static readonly Type PandasIgnoreAttribute = typeof(PandasIgnoreAttribute);
private static readonly Type PandasIgnoreMembersAttribute = typeof(PandasIgnoreMembersAttribute);

private static readonly Type[] _leanCommonDataTypes = new[] { typeof(TradeBar), typeof(QuoteBar), typeof(Tick), typeof(OpenInterest) };

private readonly Symbol _symbol;
private readonly bool _isFundamentalType;
Expand Down Expand Up @@ -158,28 +160,35 @@ public void Add(object data)
Add(data, false);
}

private void Add(object baseData, bool overrideValues)
private void Add(object data, bool overrideValues)
{
if (baseData == null)
if (data == null)
{
return;
}

var typeMembers = GetInstanceDataTypeMembers(baseData);
var typeMembers = GetInstanceDataTypeMembers(data).ToList();
var isNonExpandable = typeMembers.Count == 1 && typeMembers[0].IsNonExpandable;

var endTime = default(DateTime);
if (_isBaseData)
{
endTime = ((IBaseData)baseData).EndTime;
if (_timeAsColumn)
endTime = ((IBaseData)data).EndTime;
if (_timeAsColumn && !isNonExpandable)
{
AddToSeries("time", endTime, endTime, overrideValues);
}
}

AddMembersData(baseData, typeMembers, endTime, overrideValues);
if (isNonExpandable)
{
AddToSeries("instance", endTime, data, overrideValues);
return;
}

AddMembersData(data, typeMembers, endTime, overrideValues);

if (baseData is DynamicData dynamicData)
if (data is DynamicData dynamicData)
{
var storage = dynamicData.GetStorageDictionary();
var value = dynamicData.Value;
Expand Down Expand Up @@ -438,11 +447,15 @@ public static PyObject ToPandasDataFrame(IEnumerable<PandasData> pandasDatas, bo
private IEnumerable<DataTypeMember> GetInstanceDataTypeMembers(object data)
{
var type = data.GetType();
if (!_members.TryGetValue(type, out var members))

if (type.IsDefined(PandasNonExpandableAttribute))
{
// TODO: make it static
var leanCommonDataTypes = new[] { typeof(TradeBar), typeof(QuoteBar), typeof(Tick), typeof(OpenInterest) };
_series.TryAdd("instance", new Serie(withTimeIndex: !_timeAsColumn));
return new List<DataTypeMember> { DataTypeMember.CreateNonExpandableMember(type) };
}

if (!_members.TryGetValue(type, out var members))
{
HashSet<string> columnNames;

if (data is DynamicData dynamicData)
Expand All @@ -455,7 +468,9 @@ private IEnumerable<DataTypeMember> GetInstanceDataTypeMembers(object data)
}
else
{
members = leanCommonDataTypes.Contains(type) ? GetTypeMembers(type) : GetTypeMembers(type, nameof(BaseData.Value));
members = _leanCommonDataTypes.Contains(type)
? GetTypeMembers(type)
: GetTypeMembers(type, nameof(BaseData.Value));

columnNames = members.SelectMany(x => x.GetMemberNames()).ToHashSet();
// We add openinterest key so the series is created: open interest tick LastPrice is renamed to OpenInterest
Expand Down Expand Up @@ -528,11 +543,11 @@ private static IEnumerable<DataTypeMember> GetDataTypeMembers(Type type, string[
&& !memberType.IsDefined(PandasNonExpandableAttribute)
&& !member.IsDefined(PandasNonExpandableAttribute))))
{
dataTypeMember = new DataTypeMember(member, GetDataTypeMembers(memberType, forcedInclusionMembers).ToArray());
dataTypeMember = DataTypeMember.CreateWithChildren(member, GetDataTypeMembers(memberType, forcedInclusionMembers).ToArray());
}
else
{
dataTypeMember = new DataTypeMember(member);
dataTypeMember = DataTypeMember.Create(member);
}
return (memberType, dataTypeMember);
Expand Down

0 comments on commit 445e361

Please sign in to comment.