Skip to content

Commit

Permalink
fix #1207
Browse files Browse the repository at this point in the history
  • Loading branch information
dmorrill10 committed Aug 18, 2024
1 parent 7ff9e28 commit b48a030
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 17 deletions.
36 changes: 23 additions & 13 deletions open_spiel/python/algorithms/rcfr_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def _new_model():
return rcfr.DeepRcfrModel(
_GAME,
num_hidden_layers=1,
num_hidden_units=13,
num_hidden_units=26,
num_hidden_factors=1,
use_skip_connections=True)

Expand Down Expand Up @@ -476,12 +476,16 @@ def test_rcfr_functions(self):
data = data.batch(12)
data = data.repeat(num_epochs)

optimizer = tf.keras.optimizers.Adam(lr=0.005, amsgrad=True)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.005, amsgrad=True)

model = models[regret_player]
for x, y in data:
optimizer.minimize(
lambda: tf.losses.huber_loss(y, models[regret_player](x)), # pylint: disable=cell-var-from-loop
models[regret_player].trainable_variables)
with tf.GradientTape() as tape:
loss = tf.losses.huber_loss(y, model(x))
optimizer.apply_gradients(
zip(
tape.gradient(loss, model.trainable_variables),
model.trainable_variables))

regret_player = reach_weights_player

Expand All @@ -504,12 +508,15 @@ def _train(model, data):
data = data.batch(12)
data = data.repeat(num_epochs)

optimizer = tf.keras.optimizers.Adam(lr=0.005, amsgrad=True)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.005, amsgrad=True)

for x, y in data:
optimizer.minimize(
lambda: tf.losses.huber_loss(y, model(x)), # pylint: disable=cell-var-from-loop
model.trainable_variables)
with tf.GradientTape() as tape:
loss = tf.losses.huber_loss(y, model(x))
optimizer.apply_gradients(
zip(
tape.gradient(loss, model.trainable_variables),
model.trainable_variables))

average_policy = patient.average_policy()
self.assertGreater(pyspiel.nash_conv(_GAME, average_policy), 0.91)
Expand Down Expand Up @@ -565,12 +572,15 @@ def _train(model, data):
data = data.batch(12)
data = data.repeat(num_epochs)

optimizer = tf.keras.optimizers.Adam(lr=0.005, amsgrad=True)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.005, amsgrad=True)

for x, y in data:
optimizer.minimize(
lambda: tf.losses.huber_loss(y, model(x)), # pylint: disable=cell-var-from-loop
model.trainable_variables)
with tf.GradientTape() as tape:
loss = tf.losses.huber_loss(y, model(x))
optimizer.apply_gradients(
zip(
tape.gradient(loss, model.trainable_variables),
model.trainable_variables))

average_policy = patient.average_policy()
self.assertGreater(pyspiel.nash_conv(_GAME, average_policy), 0.91)
Expand Down
11 changes: 7 additions & 4 deletions open_spiel/python/examples/rcfr_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,14 +87,17 @@ def _train_fn(model, data):
data = data.batch(FLAGS.batch_size)
data = data.repeat(FLAGS.num_epochs)

optimizer = tf.keras.optimizers.Adam(lr=FLAGS.step_size, amsgrad=True)
optimizer = tf.keras.optimizers.Adam(learning_rate=FLAGS.step_size, amsgrad=True)

@tf.function
def _train():
for x, y in data:
optimizer.minimize(
lambda: tf.losses.huber_loss(y, model(x), delta=0.01), # pylint: disable=cell-var-from-loop
model.trainable_variables)
with tf.GradientTape() as tape:
loss = tf.losses.huber_loss(y, model(x), delta=0.01)
optimizer.apply_gradients(
zip(
tape.gradient(loss, model.trainable_variables),
model.trainable_variables))

_train()

Expand Down

0 comments on commit b48a030

Please sign in to comment.