Skip to content

Commit 73616e8

Browse files
committed
upload codes
1 parent d752be4 commit 73616e8

File tree

293 files changed

+236402
-20
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

293 files changed

+236402
-20
lines changed

ST_DM/KDD2020-ConSTGAT/README.md

Whitespace-only changes.

ST_DM/KDD2020-ConSTGAT/constgat.py

Lines changed: 379 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,379 @@
1+
"""
2+
Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved
3+
"""
4+
5+
import math
6+
import numpy
7+
import paddle
8+
import paddle.fluid as fluid
9+
from paddle.fluid import layers
10+
import random
11+
import sys
12+
import time
13+
import os
14+
from common import *
15+
16+
17+
class Constgat(object):
18+
"""
19+
model class
20+
"""
21+
22+
def __init__(self, method, base_lr=0.05, fc_lr=0.01, reg=0.0001, is_fleet=False):
23+
self.method = method
24+
self.is_fleet = is_fleet
25+
26+
self.base_lr = base_lr
27+
self.fc_lr = fc_lr
28+
self.reg = reg
29+
30+
self.hidden_dim = 32
31+
self.output_hidden_dim = 64
32+
33+
self.duration_class_num = 281
34+
35+
self.history_context_features = [ \
36+
('departure_hour', 'hour')]
37+
38+
self.link_info_features = [ \
39+
('link', 'link'), \
40+
('length', 'length')]
41+
42+
self.start_node_info_features = [ \
43+
('start_node', 'node')]
44+
45+
self.end_node_info_features = [ \
46+
('end_node', 'node')]
47+
48+
self.future_context_features = [ \
49+
('future_hour', 'hour')]
50+
51+
self.neighbor_link_info_features = [ \
52+
('neighbor_link', 'link'), \
53+
('neighbor_length', 'length')]
54+
55+
self.neighbor_start_node_info_features = [ \
56+
('neighbor_start_node', 'node')]
57+
58+
self.neighbor_end_node_info_features = [ \
59+
('neighbor_end_node', 'node')]
60+
61+
self.neighbor_link_state_features = [ \
62+
('neighbor_duration', 'duration')]
63+
64+
self.feature_voc_num_dict = { \
65+
'link': [400000, 8], \
66+
'length': [2000, 8], \
67+
'node': [200000, 8], \
68+
'hour': [24, 8], \
69+
'duration': [self.duration_class_num, 8]}
70+
71+
def create_tensors(self, feature_num, name, shape, dtype):
72+
"""
73+
create tensors
74+
"""
75+
tensors = []
76+
for i in range(feature_num):
77+
tensors.append(
78+
layers.data(
79+
name='%s_%d' % (name, i),
80+
shape=shape,
81+
dtype=dtype,
82+
lod_level=1))
83+
return tensors
84+
85+
def prepare_inputs(self):
86+
"""
87+
prepare inputs
88+
"""
89+
history_context = self.create_tensors(
90+
CONTEXT_FEATURE_NUM, 'history_context', [1], 'int64')
91+
link_info = self.create_tensors(
92+
LINK_INFO_FEATURE_NUM, 'link_info', [1], 'int64')
93+
start_node_info = self.create_tensors(
94+
NODE_INFO_FEATURE_NUM, 'start_node_info', [1], 'int64')
95+
end_node_info = self.create_tensors(
96+
NODE_INFO_FEATURE_NUM, 'end_node_info', [1], 'int64')
97+
future_context = self.create_tensors(
98+
CONTEXT_FEATURE_NUM, 'future_context', [1], 'int64')
99+
neighbor_link_info = self.create_tensors(
100+
LINK_INFO_FEATURE_NUM, 'neighbor_link_info', [1], 'int64')
101+
neighbor_start_node_info = self.create_tensors(
102+
NODE_INFO_FEATURE_NUM, 'neighbor_start_node_info', [1], 'int64')
103+
neighbor_end_node_info = self.create_tensors(
104+
NODE_INFO_FEATURE_NUM, 'neighbor_end_node_info', [1], 'int64')
105+
neighbor_link_state = self.create_tensors(
106+
LINK_STATE_FEATURE_NUM, 'neighbor_link_state', [1], 'int64')
107+
label = layers.data(
108+
name='label', shape=[1], dtype='int64', lod_level=1)
109+
110+
return {'history_context': history_context, \
111+
'link_info': link_info, \
112+
'start_node_info': start_node_info, \
113+
'end_node_info': end_node_info, \
114+
'future_context': future_context, \
115+
'neighbor_link_info': neighbor_link_info, \
116+
'neighbor_start_node_info': neighbor_start_node_info, \
117+
'neighbor_end_node_info': neighbor_end_node_info, \
118+
'neighbor_link_state': neighbor_link_state, \
119+
'label': label, \
120+
}
121+
122+
return inputs
123+
124+
def prepare_emb(self, feature_group, feature_info, out_size=0):
125+
"""
126+
prepare embedding
127+
"""
128+
embs = []
129+
for (i, feature) in enumerate(feature_info):
130+
emb = layers.embedding(
131+
input=feature_group[i],
132+
param_attr=fluid.ParamAttr(name='%s_emb' % feature[1]),
133+
size=self.feature_voc_num_dict[feature[1]],
134+
is_sparse=True)
135+
embs.append(emb)
136+
concat_emb = layers.concat(embs, axis=1)
137+
concat_emb = layers.softsign(concat_emb)
138+
139+
if out_size > 0:
140+
concat_emb = layers.fc(
141+
input=concat_emb,
142+
size=out_size,
143+
param_attr=fluid.ParamAttr(learning_rate=self.fc_lr),
144+
act='relu')
145+
146+
return concat_emb
147+
148+
def prepare_preds(self, feature):
149+
"""
150+
prepare predictions
151+
"""
152+
hidden1 = layers.fc(
153+
input=feature,
154+
size=self.output_hidden_dim,
155+
param_attr=fluid.ParamAttr(learning_rate=self.fc_lr),
156+
act='relu')
157+
158+
hidden2 = layers.fc(
159+
input=hidden1,
160+
size=self.output_hidden_dim,
161+
param_attr=fluid.ParamAttr(learning_rate=self.fc_lr),
162+
act='relu')
163+
164+
pred = layers.fc(
165+
input=hidden2,
166+
size=1,
167+
param_attr=fluid.ParamAttr(learning_rate=self.fc_lr),
168+
act=None)
169+
170+
return pred
171+
172+
def prepare_preds_with_name(self, feature, name=''):
173+
"""
174+
prepare predictions
175+
"""
176+
hidden1 = layers.fc(
177+
input=feature,
178+
size=self.output_hidden_dim,
179+
param_attr=fluid.ParamAttr(name + '_fc1', learning_rate=self.fc_lr),
180+
act='relu')
181+
182+
hidden2 = layers.fc(
183+
input=hidden1,
184+
size=self.output_hidden_dim,
185+
param_attr=fluid.ParamAttr(name + '_fc2', learning_rate=self.fc_lr),
186+
act='relu')
187+
188+
pred = layers.fc(
189+
input=hidden2,
190+
size=1,
191+
param_attr=fluid.ParamAttr(name + '_fc3', learning_rate=self.fc_lr),
192+
act=None)
193+
194+
return pred
195+
196+
def prepare_features(self, inputs):
197+
"""
198+
prepare features
199+
"""
200+
history_context_concat_emb = self.prepare_emb(
201+
inputs['history_context'], self.history_context_features, 8)
202+
link_info_concat_emb = self.prepare_emb(
203+
inputs["link_info"], self.link_info_features, 16)
204+
start_node_info_concat_emb = self.prepare_emb(
205+
inputs["start_node_info"], self.start_node_info_features, 8)
206+
end_node_info_concat_emb = self.prepare_emb(
207+
inputs["end_node_info"], self.end_node_info_features, 8)
208+
future_context_concat_emb = self.prepare_emb(
209+
inputs["future_context"], self.future_context_features, 8)
210+
neighbor_link_info_concat_emb = self.prepare_emb(
211+
inputs["neighbor_link_info"], self.neighbor_link_info_features, 16)
212+
neighbor_start_node_info_concat_emb = self.prepare_emb(
213+
inputs["neighbor_start_node_info"], self.neighbor_start_node_info_features, 8)
214+
neighbor_end_node_info_concat_emb = self.prepare_emb(
215+
inputs["neighbor_end_node_info"], self.neighbor_end_node_info_features, 8)
216+
neighbor_link_state_concat_emb = self.prepare_emb(
217+
inputs["neighbor_link_state"], self.neighbor_link_state_features, 8)
218+
219+
return {'history_context_concat_emb': history_context_concat_emb, \
220+
'link_info_concat_emb': link_info_concat_emb, \
221+
'start_node_info_concat_emb': start_node_info_concat_emb, \
222+
'end_node_info_concat_emb': end_node_info_concat_emb, \
223+
'future_context_concat_emb': future_context_concat_emb, \
224+
'neighbor_link_info_concat_emb': neighbor_link_info_concat_emb, \
225+
'neighbor_start_node_info_concat_emb': neighbor_start_node_info_concat_emb, \
226+
'neighbor_end_node_info_concat_emb': neighbor_end_node_info_concat_emb, \
227+
'neighbor_link_state_concat_emb': neighbor_link_state_concat_emb}
228+
229+
def attention(self, query_feature, key_feature, value_feature, hidden_dim, name):
230+
"""
231+
attention
232+
"""
233+
query_fc = layers.fc(
234+
input=query_feature,
235+
size=hidden_dim,
236+
param_attr=fluid.ParamAttr(name='query_fc_%s' % name, learning_rate=self.fc_lr),
237+
act='relu',
238+
num_flatten_dims=2)
239+
240+
key_fc = layers.fc(
241+
input=key_feature,
242+
size=hidden_dim,
243+
param_attr=fluid.ParamAttr('key_fc_%s' % name, learning_rate=self.fc_lr),
244+
act='relu',
245+
num_flatten_dims=2)
246+
247+
value_fc = layers.fc(
248+
input=value_feature,
249+
size=hidden_dim,
250+
param_attr=fluid.ParamAttr('value_fc_%s' % name, learning_rate=self.fc_lr),
251+
act='relu',
252+
num_flatten_dims=2)
253+
254+
query_key_mat = layers.matmul(query_fc, key_fc, False, True)
255+
query_key_mat = layers.scale(query_key_mat,
256+
scale=1.0 / math.sqrt(hidden_dim))
257+
matching_score = layers.softmax(query_key_mat, axis=2)
258+
attention = layers.matmul(matching_score, value_fc)
259+
attention
260+
261+
def constgat(self, inputs):
262+
"""
263+
constgat
264+
"""
265+
history_context = inputs["history_context"]
266+
link_info = inputs["link_info"]
267+
start_node_info = inputs["start_node_info"]
268+
end_node_info = inputs["end_node_info"]
269+
future_context = inputs["future_context"]
270+
neighbor_link_info = inputs["neighbor_link_info"]
271+
neighbor_start_node_info = inputs["neighbor_start_node_info"]
272+
neighbor_end_node_info = inputs["neighbor_end_node_info"]
273+
neighbor_link_state = inputs["neighbor_link_state"]
274+
275+
query_feature = layers.concat([
276+
link_info,
277+
start_node_info,
278+
end_node_info,
279+
future_context],
280+
axis=1)
281+
query_feature_reshape = layers.reshape(query_feature, shape=[-1, 1, query_feature.shape[-1]])
282+
283+
neighbor_features = layers.concat([
284+
neighbor_link_info,
285+
neighbor_start_node_info,
286+
neighbor_end_node_info],
287+
axis=1)
288+
neighbor_features = layers.reshape(
289+
neighbor_features,
290+
shape=[-1, MAX_NEIGHBOR_NUM, neighbor_features.shape[-1]])
291+
neighbor_features = layers.expand(
292+
x=neighbor_features, expand_times=[1, 1, SEQUENCE_LENGTH])
293+
neighbor_features = layers.reshape(
294+
neighbor_features,
295+
shape=[-1, neighbor_features.shape[1] * SEQUENCE_LENGTH, \
296+
int(neighbor_features.shape[2] / SEQUENCE_LENGTH)])
297+
298+
history_context = layers.reshape(
299+
history_context,
300+
shape=[-1, SEQUENCE_LENGTH, history_context.shape[-1]])
301+
history_context = layers.expand(
302+
x=history_context, expand_times=[1, MAX_NEIGHBOR_NUM, 1])
303+
'''
304+
history_context = layers.sequence_expand(
305+
history_context,
306+
link_info)
307+
'''
308+
309+
neighbor_link_state = layers.reshape(
310+
neighbor_link_state,
311+
shape=[-1, MAX_NEIGHBOR_NUM * SEQUENCE_LENGTH,
312+
neighbor_link_state.shape[-1]])
313+
314+
key_feature = layers.concat([
315+
neighbor_features,
316+
history_context,
317+
neighbor_link_state],
318+
axis=2)
319+
320+
value_feature = key_feature
321+
322+
attention = self.attention(query_feature_reshape, key_feature, value_feature, self.hidden_dim, 'gat')
323+
attention = layers.reshape(attention, shape=[-1, attention.shape[-1]])
324+
325+
out_features = layers.concat([query_feature, attention], axis=1)
326+
327+
return out_features
328+
329+
def constgat_model(self):
330+
"""constgat model"""
331+
inputs = self.prepare_inputs()
332+
333+
features = self.prepare_features(inputs)
334+
history_context_concat_emb = features['history_context_concat_emb']
335+
link_info_concat_emb = features['link_info_concat_emb']
336+
start_node_info_concat_emb = features['start_node_info_concat_emb']
337+
end_node_info_concat_emb = features['end_node_info_concat_emb']
338+
future_context_concat_emb = features['future_context_concat_emb']
339+
neighbor_link_info_concat_emb = features['neighbor_link_info_concat_emb']
340+
neighbor_start_node_info_concat_emb = features['neighbor_start_node_info_concat_emb']
341+
neighbor_end_node_info_concat_emb = features['neighbor_end_node_info_concat_emb']
342+
neighbor_link_state_concat_emb = features['neighbor_link_state_concat_emb']
343+
label = features['label']
344+
345+
x = self.constgat({
346+
'history_context': history_context_concat_emb,
347+
'link_info': link_info_concat_emb,
348+
'start_node_info': start_node_info_concat_emb,
349+
'end_node_info': end_node_info_concat_emb,
350+
'future_context': future_context_concat_emb,
351+
'neighbor_link_info': neighbor_link_info_concat_emb,
352+
'neighbor_start_node_info': neighbor_start_node_info_concat_emb,
353+
'neighbor_end_node_info': neighbor_end_node_info_concat_emb,
354+
'neighbor_link_state': neighbor_link_state_concat_emb})
355+
356+
pred = self.prepare_preds_with_name(x, 'out_pred')
357+
label = layers.scale(label, scale=0.01)
358+
loss = layers.huber_loss(pred, label, 1.0)
359+
loss = layers.mean(loss)
360+
return pred, label, loss
361+
362+
def train(self):
363+
"""
364+
train
365+
"""
366+
pred, label, loss = self.constgat_model()
367+
368+
loss.persistable = True
369+
370+
optimizer = fluid.optimizer.SGD(
371+
learning_rate=self.base_lr,
372+
regularization=fluid.regularizer.L2DecayRegularizer(regularization_coeff=self.reg))
373+
if self.is_fleet:
374+
import paddle.fluid.incubate.fleet.geo_parameter_server as fleet
375+
fleet.init()
376+
optimizer = fleet.DistributedOptimizer(optimizer)
377+
optimizer.minimize(loss)
378+
379+
return pred, label, loss

0 commit comments

Comments
 (0)