-
Notifications
You must be signed in to change notification settings - Fork 25
/
DeepQTrading.py
244 lines (197 loc) · 10.4 KB
/
DeepQTrading.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
#Imports the SPEnv library, which will perform the Agent actions themselves
from SpEnv import SpEnv
#Callback used to print the results at each episode
from Callback import ValidationCallback
#Keras library for the NN considered
from keras.models import Sequential
#Keras libraries for layers, activations and optimizers used
from keras.layers import Dense, Activation, Flatten
from keras.layers.advanced_activations import LeakyReLU, PReLU
from keras.optimizers import *
#RL Agent
from rl.agents.dqn import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import EpsGreedyQPolicy
from keras_radam import RAdam
#Mathematical operations used later
from math import floor
#Library to manipulate the dataset in a csv file
import pandas as pd
#Library used to manipulate time
import datetime
import os
import numpy
numpy.random.seed(0)
class DeepQTrading:
#Class constructor
#model: Keras model considered
#explorations_iterations: a vector containing (i) probability of random predictions; (ii) how many iterations will be
#run by the algorithm (we run the algorithm several times-several iterations)
#outputFile: name of the file to print metrics of the training
#ensembleFolderName: name of the file to print predictions
#optimizer: optimizer to run
def __init__(self, model, nbActions, explorations_iterations, outputFile, ensembleFolderName, optimizer="adamax"):
self.ensembleFolderName=ensembleFolderName
self.policy = EpsGreedyQPolicy()
self.explorations_iterations=explorations_iterations
self.nbActions=nbActions
self.model=model
#Define the memory
self.memory = SequentialMemory(limit=10000, window_length=1)
#Instantiate the agent with parameters received
self.agent = DQNAgent(model=self.model, policy=self.policy, nb_actions=self.nbActions, memory=self.memory, nb_steps_warmup=200, target_model_update=1e-1, enable_double_dqn=True,enable_dueling_network=True)
#Compile the agent with the optimizer given as parameter
if optimizer=="adamax":
self.agent.compile(Adamax(), metrics=['mae'])
if optimizer=="adadelta":
self.agent.compile(Adadelta(), metrics=['mae'])
if optimizer=="sgd":
self.agent.compile(SGD(), metrics=['mae'])
if optimizer=="rmsprop":
self.agent.compile(RMSprop(), metrics=['mae'])
if optimizer=="nadam":
self.agent.compile(Nadam(), metrics=['mae'])
if optimizer=="adagrad":
self.agent.compile(Adagrad(), metrics=['mae'])
if optimizer=="adam":
self.agent.compile(Adam(), metrics=['mae'])
if optimizer=="radam":
self.agent.compile(RAdam(total_steps=5000, warmup_proportion=0.1, min_lr=1e-5), metrics=['mae'])
#Save the weights of the agents in the q.weights file
#Save random weights
self.agent.save_weights("q.weights", overwrite=True)
#Load data
self.train_data= pd.read_csv('./dataset/jpm/train_data.csv')
self.validation_data=pd.read_csv('./dataset/jpm/train_data.csv')
self.test_data=pd.read_csv('./dataset/jpm/test_data.csv')
#Call the callback for training, validation and test in order to show results for each iteration
self.trainer=ValidationCallback()
self.validator=ValidationCallback()
self.tester=ValidationCallback()
self.outputFileName=outputFile
def run(self):
#Initiates the environments,
trainEnv=validEnv=testEnv=" "
if not os.path.exists(self.outputFileName):
os.makedirs(self.outputFileName)
file_name=self.outputFileName+"/results-agent-training.csv"
self.outputFile=open(file_name, "w+")
#write the first row of the csv
self.outputFile.write(
"Iteration,"+
"trainAccuracy,"+
"trainCoverage,"+
"trainReward,"+
"trainLong%,"+
"trainShort%,"+
"trainLongAcc,"+
"trainShortAcc,"+
"trainLongPrec,"+
"trainShortPrec,"+
"validationAccuracy,"+
"validationCoverage,"+
"validationReward,"+
"validationLong%,"+
"validationShort%,"+
"validationLongAcc,"+
"validationShortAcc,"+
"validLongPrec,"+
"validShortPrec,"+
"testAccuracy,"+
"testCoverage,"+
"testReward,"+
"testLong%,"+
"testShort%,"+
"testLongAcc,"+
"testShortAcc,"+
"testLongPrec,"+
"testShortPrec\n")
#Prepare the training and validation files for saving them later
ensambleValid=pd.DataFrame(index=self.validation_data[:].ix[:,'date_time'].drop_duplicates().tolist())
ensambleTest=pd.DataFrame(index=self.test_data[:].ix[:,'date_time'].drop_duplicates().tolist())
#Put the name of the index for validation and testing
ensambleValid.index.name='date_time'
ensambleTest.index.name='date_time'
#Explorations are epochs considered, or how many times the agent will play the game.
for eps in self.explorations_iterations:
#policy will use eps[0] (explorations), so the randomness of predictions (actions) will happen with eps[0] of probability
self.policy.eps = eps[0]
#there will be 25 iterations or eps[1] in explorations_iterations)
for i in range(0,eps[1]):
del(trainEnv)
#Define the training, validation and testing environments with their respective callbacks
trainEnv = SpEnv(data=self.train_data, callback=self.trainer)
del(validEnv)
validEnv=SpEnv(data=self.validation_data,ensamble=ensambleValid,callback=self.validator,columnName="iteration"+str(i))
del(testEnv)
testEnv=SpEnv(data=self.test_data, callback=self.tester,ensamble=ensambleTest,columnName="iteration"+str(i))
#Reset the callback
self.trainer.reset()
self.validator.reset()
self.tester.reset()
#Reset the training environment
trainEnv.resetEnv()
#Train the agent
#The agent receives as input one environment
self.agent.fit(trainEnv,nb_steps=len(self.train_data),visualize=False,verbose=0)
#Get the info from the train callback
(_,trainCoverage,trainAccuracy,trainReward,trainLongPerc, trainShortPerc,trainLongAcc,trainShortAcc,trainLongPrec,trainShortPrec)=self.trainer.getInfo()
print("Iteration " + str(i+1) + " TRAIN: accuracy: " + str(trainAccuracy)+ " coverage: " + str(trainCoverage)+ " reward: " + str(trainReward))
#Reset the validation environment
validEnv.resetEnv()
#Test the agent on validation data
self.agent.test(validEnv,nb_episodes=len(self.validation_data),visualize=False,verbose=0)
#Get the info from the validation callback
(_,validCoverage,validAccuracy,validReward,validLongPerc,validShortPerc,
validLongAcc,validShortAcc,validLongPrec,validShortPrec)=self.validator.getInfo()
#Print callback values on the screen
print("Iteration " +str(i+1) + " VALIDATION: accuracy: " + str(validAccuracy)+ " coverage: " + str(validCoverage)+ " reward: " + str(validReward))
#Reset the testing environment
testEnv.resetEnv()
#Test the agent on testing data
self.agent.test(testEnv,nb_episodes=len(self.test_data),visualize=False,verbose=0)
#Get the info from the testing callback
(_,testCoverage,testAccuracy,testReward,testLongPerc,testShortPerc,
testLongAcc,testShortAcc,testLongPrec,testShortPrec)=self.tester.getInfo()
#Print callback values on the screen
print("Iteration " +str(i+1) + " TEST: acc: " + str(testAccuracy)+ " cov: " + str(testCoverage)+ " rew: " + str(testReward))
print(" ")
#write the metrics in a text file
self.outputFile.write(
str(i)+","+
str(trainAccuracy)+","+
str(trainCoverage)+","+
str(trainReward)+","+
str(trainLongPerc)+","+
str(trainShortPerc)+","+
str(trainLongAcc)+","+
str(trainShortAcc)+","+
str(trainLongPrec)+","+
str(trainShortPrec)+","+
str(validAccuracy)+","+
str(validCoverage)+","+
str(validReward)+","+
str(validLongPerc)+","+
str(validShortPerc)+","+
str(validLongAcc)+","+
str(validShortAcc)+","+
str(validLongPrec)+","+
str(validShortPrec)+","+
str(testAccuracy)+","+
str(testCoverage)+","+
str(testReward)+","+
str(testLongPerc)+","+
str(testShortPerc)+","+
str(testLongAcc)+","+
str(testShortAcc)+","+
str(testLongPrec)+","+
str(testShortPrec)+"\n")
#Close the file
self.outputFile.close()
if not os.path.exists("./Output/ensemble/"+self.ensembleFolderName):
os.makedirs("./Output/ensemble/"+self.ensembleFolderName)
ensambleValid.to_csv("./Output/ensemble/"+self.ensembleFolderName+"/ensemble_valid.csv")
ensambleTest.to_csv("./Output/ensemble/"+self.ensembleFolderName+"/ensemble_test.csv")
#Function to end the Agent
def end(self):
print("FINISHED")