From 445e361c3cb53ce06d2064030e3569691072f17c Mon Sep 17 00:00:00 2001 From: Jhonathan Abreu Date: Fri, 1 Nov 2024 09:14:54 -0400 Subject: [PATCH] Adding missing changes to last commit --- .../PandasConverter.DataFrameGenerator.cs | 66 ++++++++++++------- Common/Python/PandasConverter.cs | 21 +++--- Common/Python/PandasData.DataTypeMember.cs | 22 ++++++- Common/Python/PandasData.cs | 47 ++++++++----- 4 files changed, 104 insertions(+), 52 deletions(-) diff --git a/Common/Python/PandasConverter.DataFrameGenerator.cs b/Common/Python/PandasConverter.DataFrameGenerator.cs index 7716b0bcd928..651834f25658 100644 --- a/Common/Python/PandasConverter.DataFrameGenerator.cs +++ b/Common/Python/PandasConverter.DataFrameGenerator.cs @@ -48,8 +48,9 @@ private class DataFrameGenerator private int _maxLevels; private bool _shouldUseSymbolOnlyIndex; + private readonly bool _flatten; - protected DataFrameGenerator(Type dataType = null, bool timeAsColumn = false) + protected DataFrameGenerator(Type dataType = null, bool timeAsColumn = false, bool flatten = false) { _dataType = dataType; // if no data type is requested we check all @@ -57,10 +58,11 @@ protected DataFrameGenerator(Type dataType = null, bool timeAsColumn = false) _requestedTradeBar = dataType == null || dataType == typeof(TradeBar); _requestedQuoteBar = dataType == null || dataType == typeof(QuoteBar); _timeAsColumn = timeAsColumn; + _flatten = flatten; } - public DataFrameGenerator(IEnumerable slices, Type dataType = null) - : this(dataType) + public DataFrameGenerator(IEnumerable slices, bool flatten = false, Type dataType = null) + : this(dataType, flatten: flatten) { AddData(slices); } @@ -78,7 +80,7 @@ protected void AddData(IEnumerable slices) { foreach (var data in slice.AllData) { - if (data is BaseDataCollection collection) + if (_flatten && data is BaseDataCollection collection) { AddCollection(collection); continue; @@ -149,7 +151,7 @@ protected void AddData(IEnumerable data) { var type = typeof(T); - if (type.IsAssignableTo(typeof(BaseDataCollection))) + if (_flatten && type.IsAssignableTo(typeof(BaseDataCollection))) { foreach (var collection in data) { @@ -195,28 +197,38 @@ public PyObject GenerateDataFrame(int? levels = null, bool sort = true, bool fil var pandasDataDataFrames = GetPandasDataDataFrames(levels, filterMissingValueColumns, symbolOnlyIndex, forceMultiValueSymbol).ToList(); var collectionsDataFrames = GetCollectionsDataFrames(symbolOnlyIndex, forceMultiValueSymbol).ToList(); - if (collectionsDataFrames.Count == 0) + try { - return ConcatDataFrames(pandasDataDataFrames, sort, dropna: true); - } + if (collectionsDataFrames.Count == 0) + { + return ConcatDataFrames(pandasDataDataFrames, sort, dropna: true); + } - var dataFrames = collectionsDataFrames.Select(x => x.Item3).Concat(pandasDataDataFrames); + var dataFrames = collectionsDataFrames.Select(x => x.Item3).Concat(pandasDataDataFrames); - if (_collections.DistinctBy(x => x.Symbol).Count() > 1) - { - var keys = collectionsDataFrames - .Select(x => new object[] { x.Item1, x.Item2 }) - .Concat(pandasDataDataFrames.Select(x => new object[] { x, DateTime.MinValue })); + if (_collections.DistinctBy(x => x.Symbol).Count() > 1) + { + var keys = collectionsDataFrames + .Select(x => new object[] { x.Item1, x.Item2 }) + .Concat(pandasDataDataFrames.Select(x => new object[] { x, DateTime.MinValue })); - return ConcatDataFrames(dataFrames, keys, MultiBaseDataCollectionDataFrameNames, sort, dropna: true); + return ConcatDataFrames(dataFrames, keys, MultiBaseDataCollectionDataFrameNames, sort, dropna: true); + } + else + { + var keys = collectionsDataFrames + .Select(x => new object[] { x.Item2 }) + .Concat(pandasDataDataFrames.Select(x => new object[] { DateTime.MinValue })); + + return ConcatDataFrames(dataFrames, keys, SingleBaseDataCollectionDataFrameNames, sort, dropna: true); + } } - else + finally { - var keys = collectionsDataFrames - .Select(x => new object[] { x.Item2 }) - .Concat(pandasDataDataFrames.Select(x => new object[] { DateTime.MinValue })); - - return ConcatDataFrames(dataFrames, keys, SingleBaseDataCollectionDataFrameNames, sort, dropna: true); + foreach (var df in pandasDataDataFrames.Concat(collectionsDataFrames.Select(x => x.Item3))) + { + df.Dispose(); + } } } @@ -254,7 +266,7 @@ private IEnumerable GetPandasDataDataFrames(int? levels, bool filterMi foreach (var collection in _collections.GroupBy(x => x.Symbol).SelectMany(x => x)) { - var generator = new DataFrameGenerator(_dataType, timeAsColumn: !symbolOnlyIndex); + var generator = new DataFrameGenerator(_dataType, timeAsColumn: !symbolOnlyIndex, flatten: _flatten); generator.AddData(collection.Data); var dataFrame = generator.GenerateDataFrame(symbolOnlyIndex: symbolOnlyIndex, forceMultiValueSymbol: forceMultiValueSymbol); @@ -281,5 +293,15 @@ private void AddCollection(BaseDataCollection collection) _collections.Add(collection); } } + + private class DataFrameGenerator : DataFrameGenerator + where T : ISymbolProvider + { + public DataFrameGenerator(IEnumerable data, bool flatten) + : base(flatten: flatten) + { + AddData(data); + } + } } } diff --git a/Common/Python/PandasConverter.cs b/Common/Python/PandasConverter.cs index 94c7788cc94e..b46f89839cc9 100644 --- a/Common/Python/PandasConverter.cs +++ b/Common/Python/PandasConverter.cs @@ -50,12 +50,13 @@ static PandasConverter() /// Converts an enumerable of in a pandas.DataFrame /// /// Enumerable of + /// Whether to flatten collections into rows and columns /// Optional type of bars to add to the data frame /// If true, the base data items time will be ignored and only the base data collection time will be used in the index /// containing a pandas.DataFrame - public PyObject GetDataFrame(IEnumerable data, Type dataType = null) + public PyObject GetDataFrame(IEnumerable data, bool flatten = false, Type dataType = null) { - var generator = new DataFrameGenerator(data, dataType); + var generator = new DataFrameGenerator(data, flatten, dataType); return generator.GenerateDataFrame(); } @@ -67,12 +68,13 @@ public PyObject GetDataFrame(IEnumerable data, Type dataType = null) /// Useful when the data contains points for multiple symbols. /// If false and is true, it will assume there is a single point for each symbol, /// and will apply performance improvements for the data frame generation. + /// Whether to flatten collections into rows and columns /// containing a pandas.DataFrame /// Helper method for testing - public PyObject GetDataFrame(IEnumerable data, bool symbolOnlyIndex = false, bool forceMultiValueSymbol = false) + public PyObject GetDataFrame(IEnumerable data, bool symbolOnlyIndex = false, bool forceMultiValueSymbol = false, bool flatten = false) where T : ISymbolProvider { - var generator = new DataFrameGenerator(data); + var generator = new DataFrameGenerator(data, flatten); return generator.GenerateDataFrame( // Use 2 instead of maxLevels for backwards compatibility levels: symbolOnlyIndex ? 1 : 2, @@ -193,9 +195,11 @@ public static PyObject ConcatDataFrames(IEnumerable dataFrames, IEn { pyNames = names.ToPyListUnSafe(); pyKeys = ConvertConcatKeys(keys); + using var pyFalse = false.ToPython(); kwargs.SetItem("keys", pyKeys); kwargs.SetItem("names", pyNames); + kwargs.SetItem("copy", pyFalse); } var result = _concat.Invoke(new[] { pyDataFrames }, kwargs); @@ -285,14 +289,5 @@ private PyObject MakeIndicatorDataFrame(PyDict pyDict) { return _pandas.DataFrame(pyDict, columns: pyDict.Keys().Select(x => x.As().ToLowerInvariant()).OrderBy(x => x)); } - - private class DataFrameGenerator : DataFrameGenerator - where T : ISymbolProvider - { - public DataFrameGenerator(IEnumerable data) - { - AddData(data); - } - } } } diff --git a/Common/Python/PandasData.DataTypeMember.cs b/Common/Python/PandasData.DataTypeMember.cs index 5959a3cc66bd..fa5dd94eca25 100644 --- a/Common/Python/PandasData.DataTypeMember.cs +++ b/Common/Python/PandasData.DataTypeMember.cs @@ -42,6 +42,8 @@ private class DataTypeMember public DataTypeMember[] Children { get; } + public bool IsNonExpandable { get; init; } + public bool IsProperty => _property != null; public bool IsField => _field != null; @@ -61,7 +63,7 @@ private class DataTypeMember public bool IsTickProperty { get; } - public DataTypeMember(MemberInfo member, DataTypeMember[] children = null) + private DataTypeMember(MemberInfo member, DataTypeMember[] children = null) { Member = member; Children = children; @@ -81,6 +83,24 @@ public DataTypeMember(MemberInfo member, DataTypeMember[] children = null) } } + public static DataTypeMember CreateWithChildren(MemberInfo member, DataTypeMember[] children) + { + return new DataTypeMember(member, children); + } + + public static DataTypeMember Create(MemberInfo member) + { + return new DataTypeMember(member); + } + + public static DataTypeMember CreateNonExpandableMember(MemberInfo member) + { + return new DataTypeMember(member) + { + IsNonExpandable = true + }; + } + public PropertyInfo AsProperty() { return _property; diff --git a/Common/Python/PandasData.cs b/Common/Python/PandasData.cs index d1295b6e89bc..1b97d39952d1 100644 --- a/Common/Python/PandasData.cs +++ b/Common/Python/PandasData.cs @@ -59,9 +59,11 @@ public partial class PandasData nameof(Tick.BidSize) }; - private static Type PandasNonExpandableAttribute = typeof(PandasNonExpandableAttribute); - private static Type PandasIgnoreAttribute = typeof(PandasIgnoreAttribute); - private static Type PandasIgnoreMembersAttribute = typeof(PandasIgnoreMembersAttribute); + private static readonly Type PandasNonExpandableAttribute = typeof(PandasNonExpandableAttribute); + private static readonly Type PandasIgnoreAttribute = typeof(PandasIgnoreAttribute); + private static readonly Type PandasIgnoreMembersAttribute = typeof(PandasIgnoreMembersAttribute); + + private static readonly Type[] _leanCommonDataTypes = new[] { typeof(TradeBar), typeof(QuoteBar), typeof(Tick), typeof(OpenInterest) }; private readonly Symbol _symbol; private readonly bool _isFundamentalType; @@ -158,28 +160,35 @@ public void Add(object data) Add(data, false); } - private void Add(object baseData, bool overrideValues) + private void Add(object data, bool overrideValues) { - if (baseData == null) + if (data == null) { return; } - var typeMembers = GetInstanceDataTypeMembers(baseData); + var typeMembers = GetInstanceDataTypeMembers(data).ToList(); + var isNonExpandable = typeMembers.Count == 1 && typeMembers[0].IsNonExpandable; var endTime = default(DateTime); if (_isBaseData) { - endTime = ((IBaseData)baseData).EndTime; - if (_timeAsColumn) + endTime = ((IBaseData)data).EndTime; + if (_timeAsColumn && !isNonExpandable) { AddToSeries("time", endTime, endTime, overrideValues); } } - AddMembersData(baseData, typeMembers, endTime, overrideValues); + if (isNonExpandable) + { + AddToSeries("instance", endTime, data, overrideValues); + return; + } + + AddMembersData(data, typeMembers, endTime, overrideValues); - if (baseData is DynamicData dynamicData) + if (data is DynamicData dynamicData) { var storage = dynamicData.GetStorageDictionary(); var value = dynamicData.Value; @@ -438,11 +447,15 @@ public static PyObject ToPandasDataFrame(IEnumerable pandasDatas, bo private IEnumerable GetInstanceDataTypeMembers(object data) { var type = data.GetType(); - if (!_members.TryGetValue(type, out var members)) + + if (type.IsDefined(PandasNonExpandableAttribute)) { - // TODO: make it static - var leanCommonDataTypes = new[] { typeof(TradeBar), typeof(QuoteBar), typeof(Tick), typeof(OpenInterest) }; + _series.TryAdd("instance", new Serie(withTimeIndex: !_timeAsColumn)); + return new List { DataTypeMember.CreateNonExpandableMember(type) }; + } + if (!_members.TryGetValue(type, out var members)) + { HashSet columnNames; if (data is DynamicData dynamicData) @@ -455,7 +468,9 @@ private IEnumerable GetInstanceDataTypeMembers(object data) } else { - members = leanCommonDataTypes.Contains(type) ? GetTypeMembers(type) : GetTypeMembers(type, nameof(BaseData.Value)); + members = _leanCommonDataTypes.Contains(type) + ? GetTypeMembers(type) + : GetTypeMembers(type, nameof(BaseData.Value)); columnNames = members.SelectMany(x => x.GetMemberNames()).ToHashSet(); // We add openinterest key so the series is created: open interest tick LastPrice is renamed to OpenInterest @@ -528,11 +543,11 @@ private static IEnumerable GetDataTypeMembers(Type type, string[ && !memberType.IsDefined(PandasNonExpandableAttribute) && !member.IsDefined(PandasNonExpandableAttribute)))) { - dataTypeMember = new DataTypeMember(member, GetDataTypeMembers(memberType, forcedInclusionMembers).ToArray()); + dataTypeMember = DataTypeMember.CreateWithChildren(member, GetDataTypeMembers(memberType, forcedInclusionMembers).ToArray()); } else { - dataTypeMember = new DataTypeMember(member); + dataTypeMember = DataTypeMember.Create(member); } return (memberType, dataTypeMember);