SciSharp
diff --git a/‎src/TensorFlowNET.Core/Layers/Layer.cs‎
Lines changed: 4 additions & 2 deletions b/‎src/TensorFlowNET.Core/Layers/Layer.cs‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎src/TensorFlowNET.Core/Operations/NnOps/BasicLSTMCell.cs‎
Lines changed: 121 additions & 6 deletions b/‎src/TensorFlowNET.Core/Operations/NnOps/BasicLSTMCell.cs‎
Lines changed: 121 additions & 6 deletions
diff --git a/‎src/TensorFlowNET.Core/Operations/NnOps/BasicRNNCell.cs‎
Lines changed: 1 addition & 1 deletion b/‎src/TensorFlowNET.Core/Operations/NnOps/BasicRNNCell.cs‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/TensorFlowNET.Core/Operations/NnOps/LSTMStateTuple.cs‎
Lines changed: 8 additions & 14 deletions b/‎src/TensorFlowNET.Core/Operations/NnOps/LSTMStateTuple.cs‎
Lines changed: 8 additions & 14 deletions
diff --git a/‎src/TensorFlowNET.Core/Operations/NnOps/RNNCell.cs‎
Lines changed: 18 additions & 14 deletions b/‎src/TensorFlowNET.Core/Operations/NnOps/RNNCell.cs‎
Lines changed: 18 additions & 14 deletions
@@ -65,7 +65,9 @@ public Tensor[] __call__(Tensor inputs,
             variable_scope scope_context_manager = null;
             if (built)
             {
-
+                scope_context_manager = tf.variable_scope(_scope,
+                    reuse: true,
+                    auxiliary_name_scope: false);
             }
             else
             {
@@ -181,7 +183,7 @@ protected override string _name_scope()
             return _current_scope.original_name_scope;
         }
 
-        private void _set_scope(VariableScope scope = null)
+        protected void _set_scope(VariableScope scope = null)
         {
             if (_scope == null)
             {
 
@@ -14,12 +14,17 @@ namespace Tensorflow
     /// Basic LSTM recurrent network cell.
     /// The implementation is based on: http://arxiv.org/abs/1409.2329.
     /// </summary>
-    public class BasicLSTMCell : LayerRnnCell
+    public class BasicLstmCell : LayerRnnCell
     {
         int _num_units;
         float _forget_bias;
         bool _state_is_tuple;
         IActivation _activation;
+        LSTMStateTuple _state;
+        VariableV1 _kernel;
+        VariableV1 _bias;
+        string _WEIGHTS_VARIABLE_NAME = "kernel";
+        string _BIAS_VARIABLE_NAME = "bias";
 
         /// <summary>
         /// Initialize the basic LSTM cell.
@@ -31,7 +36,7 @@ public class BasicLSTMCell : LayerRnnCell
         /// <param name="reuse"></param>
         /// <param name="name"></param>
         /// <param name="dtype"></param>
-        public BasicLSTMCell(int num_units, float forget_bias = 1.0f, bool state_is_tuple = true,
+        public BasicLstmCell(int num_units, float forget_bias = 1.0f, bool state_is_tuple = true,
             IActivation activation = null, bool? reuse = null, string name = null,
             TF_DataType dtype = TF_DataType.DtInvalid) : base(_reuse: reuse, name: name, dtype: dtype)
         {
@@ -44,13 +49,123 @@ public BasicLSTMCell(int num_units, float forget_bias = 1.0f, bool state_is_tupl
                 _activation = tf.nn.tanh();
         }
 
-        public LSTMStateTuple state_size
+        protected override void build(TensorShape input_shape)
+        {
+            var input_depth = input_shape.dims.Last();
+            var h_depth = _num_units;
+            _kernel = add_weight(_WEIGHTS_VARIABLE_NAME,
+                shape: new[] { input_depth + h_depth, 4 * _num_units });
+            _bias = add_weight(_BIAS_VARIABLE_NAME,
+                shape: new[] { 4 * _num_units },
+                initializer: tf.zeros_initializer);
+            built = true;
+        }
+
+        public Tensor[] __call__(Tensor inputs, LSTMStateTuple state)
+        {
+            _state = state;
+            return base.__call__(inputs);
+        }
+
+        /// <summary>
+        /// Long short-term memory cell (LSTM).
+        /// </summary>
+        /// <param name="inputs"></param>
+        /// <param name="training"></param>
+        /// <param name="state"></param>
+        /// <returns></returns>
+        protected override Tensor[] call(Tensor inputs, Tensor training = null, Tensor state = null)
+        {
+            var one = constant_op.constant(1, dtype: dtypes.int32);
+            // Parameters of gates are concatenated into one multiply for efficiency.
+            Tensor c = null;
+            Tensor h = null;
+            if(_state_is_tuple)
+                (c, h) = ((Tensor)_state.c, (Tensor)_state.h);
+            else
+            {
+                // array_ops.split(value: state, num_or_size_splits: 2, axis: one);
+                throw new NotImplementedException("BasicLstmCell call");
+            }
+            var gate_inputs = math_ops.matmul(array_ops.concat(new[] { inputs, h }, 1), _kernel as RefVariable);
+            gate_inputs = nn_ops.bias_add(gate_inputs, _bias as RefVariable);
+
+            // i = input_gate, j = new_input, f = forget_gate, o = output_gate
+            var tensors = array_ops.split(value: gate_inputs, num_or_size_splits: 4, axis: one);
+            var (i, j, f, o) = (tensors[0], tensors[1], tensors[2], tensors[3]);
+
+            var forget_bias_tensor = constant_op.constant(_forget_bias, dtype: f.dtype);
+            // Note that using `add` and `multiply` instead of `+` and `*` gives a
+            // performance improvement. So using those at the cost of readability.
+            var new_c = gen_math_ops.add(
+                math_ops.multiply(c, math_ops.sigmoid(gen_math_ops.add(f, forget_bias_tensor))),
+                math_ops.multiply(math_ops.sigmoid(i), _activation.Activate(j)));
+
+            var new_h = math_ops.multiply(_activation.Activate(new_c), math_ops.sigmoid(o));
+
+
+            if (_state_is_tuple)
+                return new[] { new_c, new_h };
+            else
+                return new[] { array_ops.concat(new[] { new_c, new_h }, 1) };
+        }
+
+        public override object get_initial_state(Tensor inputs = null, Tensor batch_size = null, TF_DataType dtype = TF_DataType.DtInvalid)
+        {
+            if (inputs != null)
+                throw new NotImplementedException("get_initial_state input is not null");
+
+            return zero_state(batch_size, dtype);
+        }
+
+        /// <summary>
+        /// Return zero-filled state tensor(s).
+        /// </summary>
+        /// <param name="batch_size"></param>
+        /// <param name="dtype"></param>
+        /// <returns></returns>
+        private LSTMStateTuple zero_state(Tensor batch_size, TF_DataType dtype)
+        {
+            LSTMStateTuple output = null;
+            tf_with(ops.name_scope($"{GetType().Name}ZeroState", values: new { batch_size }), delegate
+            {
+                output = _zero_state_tensors(state_size, batch_size, dtype);
+            });
+
+            return output;
+        }
+
+        private LSTMStateTuple _zero_state_tensors(object state_size, Tensor batch_size, TF_DataType dtype)
+        {
+            if (state_size is LSTMStateTuple state_size_tuple)
+            {
+                var outputs = state_size_tuple.Flatten()
+                    .Select(x => (int)x)
+                    .Select(s =>
+                    {
+                        var c = rnn_cell_impl._concat(batch_size, s);
+                        var size = array_ops.zeros(c, dtype: dtype);
+
+                        var c_static = rnn_cell_impl._concat(batch_size, s, @static: true);
+                        size.set_shape(c_static);
+
+                        return size;
+                    }).ToArray();
+
+                return new LSTMStateTuple(outputs[0], outputs[1]);
+            }
+
+            throw new NotImplementedException("_zero_state_tensors");
+        }
+
+        public override object state_size
         {
             get
             {
-                return _state_is_tuple ? 
-                    new LSTMStateTuple(_num_units, _num_units) : 
-                    (LSTMStateTuple)(2 * _num_units);
+                if (_state_is_tuple)
+                    return new LSTMStateTuple(_num_units, _num_units);
+                else
+                    return 2 * _num_units;
             }
         }
     }
 
@@ -26,7 +26,7 @@ public class BasicRnnCell : LayerRnnCell
         int _num_units;
         Func<Tensor, string, Tensor> _activation;
 
-        public override LSTMStateTuple state_size => _num_units;
+        public override object state_size => _num_units;
         public override int output_size => _num_units;
         public VariableV1 _kernel;
         string _WEIGHTS_VARIABLE_NAME = "kernel";
 
@@ -12,30 +12,24 @@ namespace Tensorflow.Operations
     /// 
     /// Only used when `state_is_tuple=True`.
     /// </summary>
-    public class LSTMStateTuple
+    public class LSTMStateTuple : ICanBeFlattened
     {
-        int c;
-        int h;
-
-        public LSTMStateTuple(int c)
-        {
-            this.c = c;
-        }
+        public object c;
+        public object h;
 
         public LSTMStateTuple(int c, int h)
         {
             this.c = c;
             this.h = h;
         }
 
-        public static implicit operator int(LSTMStateTuple tuple)
+        public LSTMStateTuple(Tensor c, Tensor h)
         {
-            return tuple.c;
+            this.c = c;
+            this.h = h;
         }
 
-        public static implicit operator LSTMStateTuple(int c)
-        {
-            return new LSTMStateTuple(c);
-        }
+        public object[] Flatten()
+            => new[] { c, h };
     }
 }
@@ -49,7 +49,7 @@ public abstract class RnnCell : Layers.Layer
         /// difference between TF and Keras RNN cell.
         /// </summary>
         protected bool _is_tf_rnn_cell = false;
-        public virtual LSTMStateTuple state_size { get; }
+        public virtual object state_size { get; }
 
         public virtual int output_size { get; }
 
@@ -64,7 +64,7 @@ public RnnCell(bool trainable = true,
             _is_tf_rnn_cell = true;
         }
 
-        public virtual Tensor get_initial_state(Tensor inputs = null, Tensor batch_size = null, TF_DataType dtype = TF_DataType.DtInvalid)
+        public virtual object get_initial_state(Tensor inputs = null, Tensor batch_size = null, TF_DataType dtype = TF_DataType.DtInvalid)
         {
             if (inputs != null)
                 throw new NotImplementedException("get_initial_state input is not null");
@@ -78,32 +78,36 @@ public virtual Tensor get_initial_state(Tensor inputs = null, Tensor batch_size
         /// <param name="batch_size"></param>
         /// <param name="dtype"></param>
         /// <returns></returns>
-        public Tensor zero_state(Tensor batch_size, TF_DataType dtype)
+        private Tensor zero_state(Tensor batch_size, TF_DataType dtype)
         {
             Tensor output = null;
-            var state_size = this.state_size;
-            tf_with(ops.name_scope($"{this.GetType().Name}ZeroState", values: new { batch_size }), delegate
+            tf_with(ops.name_scope($"{GetType().Name}ZeroState", values: new { batch_size }), delegate
             {
                 output = _zero_state_tensors(state_size, batch_size, dtype);
             });
 
             return output;
         }
 
-        private Tensor _zero_state_tensors(int state_size, Tensor batch_size, TF_DataType dtype)
+        private Tensor _zero_state_tensors(object state_size, Tensor batch_size, TF_DataType dtype)
         {
-            var output = nest.map_structure(s =>
+            if(state_size is int state_size_int)
             {
-                var c = rnn_cell_impl._concat(batch_size, s);
-                var size = array_ops.zeros(c, dtype: dtype);
+                var output = nest.map_structure(s =>
+                {
+                    var c = rnn_cell_impl._concat(batch_size, s);
+                    var size = array_ops.zeros(c, dtype: dtype);
 
-                var c_static = rnn_cell_impl._concat(batch_size, s, @static: true);
-                size.set_shape(c_static);
+                    var c_static = rnn_cell_impl._concat(batch_size, s, @static: true);
+                    size.set_shape(c_static);
 
-                return size;
-            }, state_size);
+                    return size;
+                }, state_size_int);
 
-            return output;
+                return output;
+            }
+
+            throw new NotImplementedException("_zero_state_tensors");
         }
     }
 }
Original file line number	Diff line number	Diff line change
`@@ -65,7 +65,9 @@ public Tensor[] __call__(Tensor inputs,`
`65`	`65`	`variable_scope scope_context_manager = null;`
`66`	`66`	`if (built)`
`67`	`67`	`{`
`68`		`-`
	`68`	`+ scope_context_manager = tf.variable_scope(_scope,`
	`69`	`+ reuse: true,`
	`70`	`+ auxiliary_name_scope: false);`
`69`	`71`	`}`
`70`	`72`	`else`
`71`	`73`	`{`
`@@ -181,7 +183,7 @@ protected override string _name_scope()`
`181`	`183`	`return _current_scope.original_name_scope;`
`182`	`184`	`}`
`183`	`185`
`184`		`- private void _set_scope(VariableScope scope = null)`
	`186`	`+ protected void _set_scope(VariableScope scope = null)`
`185`	`187`	`{`
`186`	`188`	`if (_scope == null)`
`187`	`189`	`{`
Original file line number	Diff line number	Diff line change
`@@ -12,30 +12,24 @@ namespace Tensorflow.Operations`
`12`	`12`	`///`
`13`	`13`	/// Only used when `state_is_tuple=True`.
`14`	`14`	`/// </summary>`
`15`		`- public class LSTMStateTuple`
	`15`	`+ public class LSTMStateTuple : ICanBeFlattened`
`16`	`16`	`{`
`17`		`- int c;`
`18`		`- int h;`
`19`		`-`
`20`		`- public LSTMStateTuple(int c)`
`21`		`- {`
`22`		`- this.c = c;`
`23`		`- }`
	`17`	`+ public object c;`
	`18`	`+ public object h;`
`24`	`19`
`25`	`20`	`public LSTMStateTuple(int c, int h)`
`26`	`21`	`{`
`27`	`22`	`this.c = c;`
`28`	`23`	`this.h = h;`
`29`	`24`	`}`
`30`	`25`
`31`		`- public static implicit operator int(LSTMStateTuple tuple)`
	`26`	`+ public LSTMStateTuple(Tensor c, Tensor h)`
`32`	`27`	`{`
`33`		`- return tuple.c;`
	`28`	`+ this.c = c;`
	`29`	`+ this.h = h;`
`34`	`30`	`}`
`35`	`31`
`36`		`- public static implicit operator LSTMStateTuple(int c)`
`37`		`- {`
`38`		`- return new LSTMStateTuple(c);`
`39`		`- }`
	`32`	`+ public object[] Flatten()`
	`33`	`+ => new[] { c, h };`
`40`	`34`	`}`
`41`	`35`	`}`