Skip to content

Commit

Permalink
Update feature config generation method & Fix bugs
Browse files Browse the repository at this point in the history
- Adopted a new feature config generation method
- Simplified input mechanism for multi-valued features.
- Fix bug: in some cases the order of model input placeholder and actual model input list does not match.
- Fix bug: Divide by zero overflow in `SequencePoolingLayer` when sequence length is 0.
  • Loading branch information
Weichen Shen authored Jan 24, 2019
1 parent 18cf3c0 commit d97e9cd
Show file tree
Hide file tree
Showing 32 changed files with 408 additions and 346 deletions.
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
The MIT License (MIT)

Copyright (c) 2018 Weichen Shen
Copyright (c) 2018-2019 Weichen Shen

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
4 changes: 2 additions & 2 deletions deepctr/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
from .import layers
from .import sequence
from . import models
from .utils import check_version
__version__ = '0.2.2'
from .utils import check_version, SingleFeat, VarLenFeat
__version__ = '0.2.3'
check_version(__version__)
122 changes: 78 additions & 44 deletions deepctr/input_embedding.py
Original file line number Diff line number Diff line change
@@ -1,75 +1,85 @@
from collections import OrderedDict
from itertools import chain

from tensorflow.python.keras import Input
from tensorflow.python.keras.initializers import RandomNormal
from tensorflow.python.keras.layers import Embedding, Dense, Reshape, Concatenate
from tensorflow.python.keras.layers import Embedding, Dense, Reshape, Concatenate, Input, add
from tensorflow.python.keras.regularizers import l2
from .sequence import SequencePoolingLayer
from .utils import get_linear_logit


def create_input_dict(feature_dim_dict, prefix=''):
sparse_input = {feat: Input(shape=(1,), name=prefix+'sparse_' + str(i) + '-' + feat) for i, feat in
enumerate(feature_dim_dict["sparse"])}
dense_input = {feat: Input(shape=(1,), name=prefix+'dense_' + str(i) + '-' + feat) for i, feat in
enumerate(feature_dim_dict["dense"])}
sparse_input = OrderedDict()
for i, feat in enumerate(feature_dim_dict["sparse"]):
sparse_input[feat.name] = Input(
shape=(1,), name=prefix+'sparse_' + str(i) + '-' + feat.name)

dense_input = OrderedDict()

for i, feat in enumerate(feature_dim_dict["dense"]):
dense_input[feat] = Input(
shape=(1,), name=prefix+'dense_' + str(i) + '-' + feat.name)

return sparse_input, dense_input


def create_sequence_input_dict(feature_dim_dict):
def create_sequence_input_dict(feature_dim_dict, mask_zero=True):

sequence_dim_dict = feature_dim_dict.get('sequence', [])
sequence_input_dict = {feat.name: Input(shape=(feat.maxlen,), name='seq_' + str(
i) + '-' + feat.name) for i, feat in enumerate(sequence_dim_dict)}
sequence_pooling_dict = {feat.name: feat.combiner
for i, feat in enumerate(sequence_dim_dict)}
sequence_len_dict = {feat.name: Input(shape=(
1,), name='seq_length'+str(i)+'-'+feat.name) for i, feat in enumerate(sequence_dim_dict)}
sequence_max_len_dict = {feat.name: feat.maxlen
for i, feat in enumerate(sequence_dim_dict)}
if mask_zero:
sequence_len_dict, sequence_max_len_dict = None, None
else:
sequence_len_dict = {feat.name: Input(shape=(
1,), name='seq_length'+str(i)+'-'+feat.name) for i, feat in enumerate(sequence_dim_dict)}
sequence_max_len_dict = {feat.name: feat.maxlen
for i, feat in enumerate(sequence_dim_dict)}
return sequence_input_dict, sequence_pooling_dict, sequence_len_dict, sequence_max_len_dict


def create_embedding_dict(feature_dim_dict, embedding_size, init_std, seed, l2_reg, prefix='sparse'):
def create_embedding_dict(feature_dim_dict, embedding_size, init_std, seed, l2_reg, prefix='sparse', seq_mask_zero=True):
if embedding_size == 'auto':

sparse_embedding = {feat: Embedding(feature_dim_dict["sparse"][feat], 6 * int(pow(feature_dim_dict["sparse"][feat], 0.25)),
embeddings_initializer=RandomNormal(
sparse_embedding = {feat.name: Embedding(feat.dimension, 6 * int(pow(feat.dimension, 0.25)),
embeddings_initializer=RandomNormal(
mean=0.0, stddev=init_std, seed=seed),
embeddings_regularizer=l2(l2_reg),
name=prefix+'_emb_' + str(i) + '-' + feat) for i, feat in
name=prefix+'_emb_' + str(i) + '-' + feat.name) for i, feat in
enumerate(feature_dim_dict["sparse"])}
else:

sparse_embedding = {feat: Embedding(feature_dim_dict["sparse"][feat], embedding_size,
embeddings_initializer=RandomNormal(
mean=0.0, stddev=init_std, seed=seed),
embeddings_regularizer=l2(l2_reg),
name=prefix+'_emb_' + str(i) + '-' + feat) for i, feat in
enumerate(feature_dim_dict["sparse"])}
sparse_embedding = {feat.name: Embedding(feat.dimension, embedding_size,
embeddings_initializer=RandomNormal(
mean=0.0, stddev=init_std, seed=seed),
embeddings_regularizer=l2(
l2_reg),
name=prefix+'_emb_' + str(i) + '-' + feat.name) for i, feat in
enumerate(feature_dim_dict["sparse"])}

if 'sequence' in feature_dim_dict:
count = len(sparse_embedding)
sequence_dim_list = feature_dim_dict['sequence']
for feat in sequence_dim_list:
if feat.name not in sparse_embedding:
if embedding_size == "auto":
sparse_embedding[feat.name] = Embedding(feat.dimension, 6 * int(pow(feat.dimension, 0.25)),
embeddings_initializer=RandomNormal(
mean=0.0, stddev=init_std, seed=seed),
embeddings_regularizer=l2(
l2_reg),
name=prefix + '_emb_' + str(count) + '-' + feat.name)

else:
sparse_embedding[feat.name] = Embedding(feat.dimension, embedding_size,
embeddings_initializer=RandomNormal(
mean=0.0, stddev=init_std, seed=seed),
embeddings_regularizer=l2(
l2_reg),
name=prefix+'_emb_' + str(count) + '-' + feat.name)

count += 1
# if feat.name not in sparse_embedding:
if embedding_size == "auto":
sparse_embedding[feat.name] = Embedding(feat.dimension, 6 * int(pow(feat.dimension, 0.25)),
embeddings_initializer=RandomNormal(
mean=0.0, stddev=init_std, seed=seed),
embeddings_regularizer=l2(
l2_reg),
name=prefix + '_emb_' + str(count) + '-' + feat.name, mask_zero=seq_mask_zero)

else:
sparse_embedding[feat.name] = Embedding(feat.dimension, embedding_size,
embeddings_initializer=RandomNormal(
mean=0.0, stddev=init_std, seed=seed),
embeddings_regularizer=l2(
l2_reg),
name=prefix+'_emb_' + str(count) + '-' + feat.name, mask_zero=seq_mask_zero)

count += 1

return sparse_embedding

Expand Down Expand Up @@ -109,7 +119,6 @@ def merge_sequence_input(embedding_dict, embed_list, sequence_input_dict, sequen


def get_embedding_vec_list(embedding_dict, input_dict):

return [embedding_dict[feat](v)
for feat, v in input_dict.items()]

Expand All @@ -121,12 +130,15 @@ def get_varlen_embedding_vec_dict(embedding_dict, input_dict):


def get_pooling_vec_list(sequence_embed_dict, sequence_len_dict, sequence_max_len_dict, sequence_pooling_dict):
return [SequencePoolingLayer(sequence_max_len_dict[feat], sequence_pooling_dict[feat])(
[v, sequence_len_dict[feat]]) for feat, v in sequence_embed_dict.items()]
if sequence_max_len_dict is None or sequence_len_dict is None:
return [SequencePoolingLayer(-1, sequence_pooling_dict[feat])(v) for feat, v in sequence_embed_dict.items()]
else:
return [SequencePoolingLayer(sequence_max_len_dict[feat], sequence_pooling_dict[feat])(
[v, sequence_len_dict[feat]]) for feat, v in sequence_embed_dict.items()]


def get_inputs_list(inputs):
return list(chain(*list(map(lambda x: x.values(), inputs))))
return list(chain(*list(map(lambda x: x.values(), filter(lambda x: x is not None, inputs)))))


def get_inputs_embedding(feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear, init_std, seed, include_linear=True):
Expand Down Expand Up @@ -162,3 +174,25 @@ def get_inputs_embedding(feature_dim_dict, embedding_size, l2_reg_embedding, l2_
inputs_list = get_inputs_list(
[sparse_input_dict, dense_input_dict, sequence_input_dict, sequence_input_len_dict])
return deep_emb_list, linear_logit, inputs_list


def get_linear_logit(linear_term, dense_input_, l2_reg):
if len(linear_term) > 1:
linear_term = add(linear_term)
elif len(linear_term) == 1:
linear_term = linear_term[0]
else:
linear_term = None

dense_input = list(dense_input_.values())
if len(dense_input) > 0:
dense_input__ = dense_input[0] if len(
dense_input) == 1 else Concatenate()(dense_input)
linear_dense_logit = Dense(
1, activation=None, use_bias=False, kernel_regularizer=l2(l2_reg))(dense_input__)
if linear_term is not None:
linear_term = add([linear_dense_logit, linear_term])
else:
linear_term = linear_dense_logit

return linear_term
48 changes: 28 additions & 20 deletions deepctr/layers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import itertools
from tensorflow.python.keras.layers import Layer, BatchNormalization
from tensorflow.python.keras.regularizers import l2
from tensorflow.python.keras.initializers import Zeros, glorot_normal, glorot_uniform
from tensorflow.python.keras.initializers import Zeros, Ones, glorot_normal, glorot_uniform
from tensorflow.python.keras import backend as K
import tensorflow as tf
from .activations import activation_fun
Expand Down Expand Up @@ -355,10 +355,9 @@ def call(self, inputs, **kwargs):
x_0 = tf.expand_dims(inputs, axis=2)
x_l = x_0
for i in range(self.layer_num):
xl_w = tf.tensordot(tf.transpose(
x_l, [0, 2, 1]), self.kernels[i], axes=(-1, 0))
xl_w = tf.tensordot(x_l, self.kernels[i], axes=(1, 0))
dot_ = tf.matmul(x_0, xl_w)
x_l = dot_ + x_l + self.bias[i]
x_l = dot_ + self.bias[i] + x_l
x_l = tf.squeeze(x_l, axis=2)
return x_l

Expand Down Expand Up @@ -504,7 +503,6 @@ def get_config(self,):
return dict(list(base_config.items()) + list(config.items()))



class InteractingLayer(Layer):
"""A Layer used in AutoInt that model the correlations between different feature fields by multi-head self-attention mechanism.
Expand All @@ -524,6 +522,7 @@ class InteractingLayer(Layer):
References
- [Song W, Shi C, Xiao Z, et al. AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks[J]. arXiv preprint arXiv:1810.11921, 2018.](https://arxiv.org/abs/1810.11921)
"""

def __init__(self, att_embedding_size=8, head_num=2, use_res=True, seed=1024, **kwargs):
if head_num <= 0:
raise ValueError('head_num must be a int > 0')
Expand All @@ -535,7 +534,8 @@ def __init__(self, att_embedding_size=8, head_num=2, use_res=True, seed=1024, **

def build(self, input_shape):
if len(input_shape) != 3:
raise ValueError("Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape)))
raise ValueError(
"Unexpected inputs dimensions %d, expect to be 3 dimensions" % (len(input_shape)))
embedding_size = input_shape[-1].value
self.W_Query = self.add_weight(name='query', shape=[embedding_size, self.att_embedding_size * self.head_num], dtype=tf.float32,
initializer=tf.keras.initializers.glorot_uniform(seed=self.seed))
Expand All @@ -547,26 +547,32 @@ def build(self, input_shape):
self.W_Res = self.add_weight(name='res', shape=[embedding_size, self.att_embedding_size * self.head_num], dtype=tf.float32,
initializer=tf.keras.initializers.glorot_uniform(seed=self.seed))

super(InteractingLayer, self).build(input_shape) # Be sure to call this somewhere!
# Be sure to call this somewhere!
super(InteractingLayer, self).build(input_shape)

def call(self, inputs, **kwargs):
if K.ndim(inputs) != 3:
raise ValueError("Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs)))
raise ValueError(
"Unexpected inputs dimensions %d, expect to be 3 dimensions" % (K.ndim(inputs)))

querys = tf.tensordot(inputs, self.W_Query, axes=(-1, 0)) # None F D*head_num
querys = tf.tensordot(inputs, self.W_Query,
axes=(-1, 0)) # None F D*head_num
keys = tf.tensordot(inputs, self.W_key, axes=(-1, 0))
values = tf.tensordot(inputs, self.W_Value, axes=(-1, 0))

querys = tf.stack(tf.split(querys, self.head_num, axis=2)) # head_num None F D
# head_num None F D
querys = tf.stack(tf.split(querys, self.head_num, axis=2))
keys = tf.stack(tf.split(keys, self.head_num, axis=2))
values = tf.stack(tf.split(values, self.head_num, axis=2))

inner_product = tf.matmul(querys, keys, transpose_b=True) # head_num None F F
inner_product = tf.matmul(
querys, keys, transpose_b=True) # head_num None F F
self.normalized_att_scores = tf.nn.softmax(inner_product)

result = tf.matmul(self.normalized_att_scores, values)#head_num None F D
result = tf.matmul(self.normalized_att_scores,
values) # head_num None F D
result = tf.concat(tf.split(result, self.head_num, ), axis=-1)
result = tf.squeeze(result, axis=0)#None F D*head_num
result = tf.squeeze(result, axis=0) # None F D*head_num

if self.use_res:
result += tf.tensordot(inputs, self.W_Res, axes=(-1, 0))
Expand Down Expand Up @@ -648,7 +654,7 @@ def build(self, input_shape):
super(LocalActivationUnit, self).build(
input_shape) # Be sure to call this somewhere!

def call(self, inputs, **kwargs):
def call(self, inputs, training=None, **kwargs):

query, keys = inputs

Expand All @@ -657,7 +663,7 @@ def call(self, inputs, **kwargs):

att_input = tf.concat(
[queries, keys, queries - keys, queries * keys], axis=-1)
att_input = tf.layers.batch_normalization(att_input)

att_out = MLP(self.hidden_size, self.activation, self.l2_reg,
self.keep_prob, self.use_bn, seed=self.seed)(att_input)
attention_score = tf.nn.bias_add(tf.tensordot(
Expand Down Expand Up @@ -724,7 +730,8 @@ def build(self, input_shape):

super(MLP, self).build(input_shape) # Be sure to call this somewhere!

def call(self, inputs, **kwargs):
def call(self, inputs, training=None, **kwargs):

deep_input = inputs

for i in range(len(self.hidden_size)):
Expand All @@ -734,9 +741,10 @@ def call(self, inputs, **kwargs):
# kernel_initializer=glorot_normal(seed=self.seed), \
# kernel_regularizer=l2(self.l2_reg))(deep_input)
if self.use_bn:
fc = BatchNormalization()(fc)
fc = tf.keras.layers.BatchNormalization()(fc)
fc = activation_fun(self.activation, fc)
fc = tf.nn.dropout(fc, self.keep_prob)
#fc = tf.nn.dropout(fc, self.keep_prob)
fc = tf.keras.layers.Dropout(1 - self.keep_prob)(fc,)
deep_input = fc

return deep_input
Expand Down Expand Up @@ -901,7 +909,7 @@ class PredictionLayer(Layer):
Arguments
- **activation**: Activation function to use.
- **use_bias**: bool.Whther add bias term.
- **use_bias**: bool.Whether add bias term or not.
"""

def __init__(self, activation='sigmoid', use_bias=True, **kwargs):
Expand Down Expand Up @@ -933,4 +941,4 @@ def compute_output_shape(self, input_shape):
def get_config(self,):
config = {'activation': self.activation, 'use_bias': self.use_bias}
base_config = super(PredictionLayer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
return dict(list(base_config.items()) + list(config.items()))
15 changes: 3 additions & 12 deletions deepctr/models/afm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import tensorflow as tf
from ..input_embedding import get_inputs_embedding
from ..layers import PredictionLayer, AFMLayer, FM
from ..utils import concat_fun
from ..utils import concat_fun, check_feature_config_dict


def AFM(feature_dim_dict, embedding_size=8, use_attention=True, attention_factor=8,
Expand All @@ -34,21 +34,12 @@ def AFM(feature_dim_dict, embedding_size=8, use_attention=True, attention_factor
:return: A Keras model instance.
"""

if not isinstance(feature_dim_dict,
dict) or "sparse" not in feature_dim_dict or "dense" not in feature_dim_dict:
raise ValueError(
"feature_dim_dict must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']}")
if not isinstance(feature_dim_dict["sparse"], dict):
raise ValueError("feature_dim_dict['sparse'] must be a dict,cur is", type(
feature_dim_dict['sparse']))
if not isinstance(feature_dim_dict["dense"], list):
raise ValueError("feature_dim_dict['dense'] must be a list,cur is", type(
feature_dim_dict['dense']))
check_feature_config_dict(feature_dim_dict)

deep_emb_list, linear_logit, inputs_list = get_inputs_embedding(
feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear, init_std, seed)

fm_input = concat_fun(deep_emb_list,axis=1)
fm_input = concat_fun(deep_emb_list, axis=1)
if use_attention:
fm_logit = AFMLayer(attention_factor, l2_reg_att,
keep_prob, seed)(deep_emb_list)
Expand Down
6 changes: 2 additions & 4 deletions deepctr/models/autoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import tensorflow as tf
from ..input_embedding import get_inputs_embedding
from ..layers import PredictionLayer, MLP, InteractingLayer
from ..utils import concat_fun
from ..utils import concat_fun, check_feature_config_dict


def AutoInt(feature_dim_dict, embedding_size=8, att_layer_num=3, att_embedding_size=8, att_head_num=2, att_res=True, hidden_size=(256, 256), activation='relu',
Expand Down Expand Up @@ -40,9 +40,7 @@ def AutoInt(feature_dim_dict, embedding_size=8, att_layer_num=3, att_embedding_s

if len(hidden_size) <= 0 and att_layer_num <= 0:
raise ValueError("Either hidden_layer or att_layer_num must > 0")
if not isinstance(feature_dim_dict, dict) or "sparse" not in feature_dim_dict or "dense" not in feature_dim_dict:
raise ValueError(
"feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}")
check_feature_config_dict(feature_dim_dict)

deep_emb_list, _, inputs_list = get_inputs_embedding(
feature_dim_dict, embedding_size, l2_reg_embedding, 0, init_std, seed, False)
Expand Down
Loading

0 comments on commit d97e9cd

Please sign in to comment.