-
Notifications
You must be signed in to change notification settings - Fork 397
Open
Description
Hi, I am trying to train OpenImages V4 with 600 classes. The training stopped with the following error. Wonder if this error is from Luminoth. Any suggestion to get this fixed?
INFO:tensorflow:step: 1986, file: 0012c270e7a0d8e9, train_loss: 7.52168273926, in 15.08s
Traceback (most recent call last):
File "/root/venv2/bin/lumi", line 11, in <module>
sys.exit(cli())
File "/root/venv2/local/lib/python2.7/site-packages/click/core.py", line 722, in __call__
return self.main(*args, **kwargs)
File "/root/venv2/local/lib/python2.7/site-packages/click/core.py", line 697, in main
rv = self.invoke(ctx)
File "/root/venv2/local/lib/python2.7/site-packages/click/core.py", line 1066, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/root/venv2/local/lib/python2.7/site-packages/click/core.py", line 895, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/root/venv2/local/lib/python2.7/site-packages/click/core.py", line 535, in invoke
return callback(*args, **kwargs)
File "/root/venv2/local/lib/python2.7/site-packages/luminoth/train.py", line 307, in train
config, environment=environment
File "/root/venv2/local/lib/python2.7/site-packages/luminoth/train.py", line 239, in run
], options=run_options)
File "/root/venv2/local/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py", line 671, in run
run_metadata=run_metadata)
File "/root/venv2/local/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py", line 1148, in run
run_metadata=run_metadata)
File "/root/venv2/local/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py", line 1239, in run
raise six.reraise(*original_exc_info)
File "/root/venv2/local/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py", line 1224, in run
return self._sess.run(*args, **kwargs)
File "/root/venv2/local/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py", line 1296, in run
run_metadata=run_metadata)
File "/root/venv2/local/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py", line 1076, in run
return self._sess.run(*args, **kwargs)
File "/root/venv2/local/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 887, in run
run_metadata_ptr)
File "/root/venv2/local/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1110, in _run
feed_dict_tensor, options, run_metadata)
File "/root/venv2/local/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1286, in _do_run
run_metadata)
File "/root/venv2/local/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1308, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [3,4] vs. [9,4]
[[{{node losses/RCNNLoss/sub_1}} = Sub[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](losses/RCNNLos
atherV2)]]
Caused by op u'losses/RCNNLoss/sub_1', defined at:
File "/root/venv2/bin/lumi", line 11, in <module>
sys.exit(cli())
File "/root/venv2/local/lib/python2.7/site-packages/click/core.py", line 722, in __call__
return self.main(*args, **kwargs)
File "/root/venv2/local/lib/python2.7/site-packages/click/core.py", line 697, in main
rv = self.invoke(ctx)
File "/root/venv2/local/lib/python2.7/site-packages/click/core.py", line 1066, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/root/venv2/local/lib/python2.7/site-packages/click/core.py", line 895, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/root/venv2/local/lib/python2.7/site-packages/click/core.py", line 535, in invoke
return callback(*args, **kwargs)
File "/root/venv2/local/lib/python2.7/site-packages/luminoth/train.py", line 307, in train
config, environment=environment
File "/root/venv2/local/lib/python2.7/site-packages/luminoth/train.py", line 67, in run
total_loss = model.loss(prediction_dict)
File "/root/venv2/local/lib/python2.7/site-packages/luminoth/models/fasterrcnn/fasterrcnn.py", line 192, in loss
prediction_dict['classification_prediction']
File "/root/venv2/local/lib/python2.7/site-packages/luminoth/models/fasterrcnn/rcnn.py", line 391, in loss
sigma=self._l1_sigma
File "/root/venv2/local/lib/python2.7/site-packages/luminoth/utils/losses.py", line 22, in smooth_l1_loss
diff = bbox_prediction - bbox_target
File "/root/venv2/local/lib/python2.7/site-packages/tensorflow/python/ops/math_ops.py", line 862, in binary_op_wrapper
return func(x, y, name=name)
File "/root/venv2/local/lib/python2.7/site-packages/tensorflow/python/ops/gen_math_ops.py", line 8318, in sub
"Sub", x=x, y=y, name=name)
File "/root/venv2/local/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helpe
op_def=op_def)
File "/root/venv2/local/lib/python2.7/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
return func(*args, **kwargs)
File "/root/venv2/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 3272, in create_op
op_def=op_def)
File "/root/venv2/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1768, in __init__
self._traceback = tf_stack.extract_stack()
InvalidArgumentError (see above for traceback): Incompatible shapes: [3,4] vs. [9,4]
[[{{node losses/RCNNLoss/sub_1}} = Sub[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](losses/RCNNLos
atherV2)]]
(venv2) root@3e5d7b5d1a41:~#