AdamOptimizer, reduce_prod

Oceania2018 · Oceania2018 · commit adae3aa6ffff · 2019-03-15T07:17:45.000-05:00
diff --git a/src/TensorFlowNET.Core/Framework/common_shapes.py.cs b/src/TensorFlowNET.Core/Framework/common_shapes.py.cs
@@ -29,5 +29,10 @@ public static Tensor _broadcast_shape_helper(Tensor shape_x, Tensor shape_y)
         {
             throw new NotFiniteNumberException();
         }
+
+        public static int? rank(Tensor tensor)
+        {
+            return tensor.rank;
+        }
     }
 }
diff --git a/src/TensorFlowNET.Core/Gradients/math_grad.py.cs b/src/TensorFlowNET.Core/Gradients/math_grad.py.cs
@@ -57,6 +57,24 @@ public static (Tensor, Tensor) _MulGrad(Operation op, Tensor grad)
             return (reshape1, reshape2);
         }
 
+        public static (Tensor, Tensor) _MeanGrad(Operation op, Tensor grad)
+        {
+            var sum_grad = _SumGrad(op, grad).Item1;
+            var input_shape = op.inputs[0]._shape_tuple();
+            var output_shape = op.outputs[0]._shape_tuple();
+
+            var input_shape_tensor = array_ops.shape(op.inputs[0]);
+            var output_shape_tensor = array_ops.shape(op.outputs[0]);
+            var factor = _safe_shape_div(math_ops.reduce_prod(input_shape_tensor), math_ops.reduce_prod(output_shape_tensor));
+
+            return (math_ops.truediv(sum_grad, math_ops.cast(factor, sum_grad.dtype)), null);
+        }
+
+        private static Tensor _safe_shape_div(Tensor x, Tensor y)
+        {
+            return math_ops.floordiv(x, gen_math_ops.maximum(y, 1));
+        }
+
         public static (Tensor, Tensor) _SubGrad(Operation op, Tensor grad)
         {
             var x = op.inputs[0];
@@ -81,12 +99,25 @@ public static bool _ShapesFullySpecifiedAndEqual(Tensor x, Tensor y, Tensor grad
 
         public static (Tensor, Tensor) _SumGrad(Operation op, Tensor grad)
         {
-            if (op.inputs[0].NDims > -1)
-            {
+            var input_0_shape = op.inputs[0]._shape_tuple();
+            Tensor input_shape = null;
 
+            if (input_0_shape != null)
+            {
+                var axes = tensor_util.constant_value(op.inputs[1]);
+                if(!(axes is null))
+                {
+                    var rank = axes.shape.Rank;
+                    grad = array_ops.reshape(grad, new int[] { 1 });
+                    if (!input_0_shape.Contains(-1))
+                        input_shape = constant_op.constant(input_0_shape);
+                    else
+                        input_shape = array_ops.shape(op.inputs[0]);
+                    return (gen_array_ops.tile(grad, input_shape), null);
+                }
             }
 
-            var input_shape = array_ops.shape(op.inputs[0]);
+            input_shape = array_ops.shape(op.inputs[0]);
             ops.colocate_with(input_shape);
             var output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1]);
             var tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims);
@@ -95,11 +126,6 @@ public static (Tensor, Tensor) _SumGrad(Operation op, Tensor grad)
             return (gen_array_ops.tile(grad, tile_scaling), null);
         }
 
-        public static Tensor _safe_shape_div(Tensor x, Tensor y)
-        {
-            return math_ops.floordiv(x, gen_math_ops.maximum(y, 1));
-        }
-
         public static (Tensor, Tensor) _RealDivGrad(Operation op, Tensor grad)
         {
             var x = op.inputs[0];
diff --git a/src/TensorFlowNET.Core/Keras/Layers/Dense.cs b/src/TensorFlowNET.Core/Keras/Layers/Dense.cs
@@ -63,7 +63,7 @@ protected override Tensor call(Tensor inputs, Tensor training = null)
             var rank = inputs.rank;
             if(rank > 2)
             {
-                throw new NotImplementedException("");
+                throw new NotImplementedException("call rank > 2");
             }
             else
             {
diff --git a/src/TensorFlowNET.Core/Operations/array_ops.py.cs b/src/TensorFlowNET.Core/Operations/array_ops.py.cs
@@ -82,7 +82,7 @@ public static Tensor rank(Tensor input, string name = null)
         public static Tensor ones_like<T>(T tensor, TF_DataType dtype = TF_DataType.DtInvalid, string name = null, bool optimize = true)
             => ones_like_impl(tensor, dtype, name, optimize);
 
-        public static Tensor reshape(Tensor tensor, Tensor shape, string name = null)
+        public static Tensor reshape<T1, T2>(T1 tensor, T2 shape, string name = null)
         {
             return gen_array_ops.reshape(tensor, shape, null);
         }
diff --git a/src/TensorFlowNET.Core/Operations/gen_array_ops.cs b/src/TensorFlowNET.Core/Operations/gen_array_ops.cs
@@ -116,7 +116,7 @@ public static (Tensor, Tensor) broadcast_gradient_args(Tensor s0, Tensor s1, str
             return (_op.outputs[0], _op.outputs[1]);
         }
 
-        public static Tensor reshape(Tensor tensor, Tensor shape, string name = null)
+        public static Tensor reshape<T1, T2>(T1 tensor, T2 shape, string name = null)
         {
             var _op = _op_def_lib._apply_op_helper("Reshape", name, new { tensor, shape });
             return _op.outputs[0];
diff --git a/src/TensorFlowNET.Core/Operations/gen_math_ops.cs b/src/TensorFlowNET.Core/Operations/gen_math_ops.cs
@@ -20,16 +20,16 @@ public static class gen_math_ops
         /// <param name="keep_dims"> An optional `bool`. Defaults to `False`. If true, retain reduced dimensions with length 1.</param>
         /// <param name="name"> A name for the operation (optional).</param>
         /// <returns> A `Tensor`. Has the same type as `input`.</returns>
-        public static Tensor mean(Tensor input, Tensor axis, bool keep_dims= false, string name = null)
+        public static Tensor mean<T1, T2>(T1 input, T2 axis, bool keep_dims= false, string name = null)
         {
             var _op = _op_def_lib._apply_op_helper("Mean", name, args: new { input, reduction_indices = axis, keep_dims = keep_dims });
 
             return _op.outputs[0];
         }
 
-        public static Tensor mean(Tensor input, int[] axis, bool keep_dims = false, string name = null)
+        public static Tensor prod<T1, T2>(T1 input, T2 axis, bool keep_dims = false, string name = null)
         {
-            var _op = _op_def_lib._apply_op_helper("Mean", name, args: new { input, reduction_indices = axis, keep_dims = keep_dims, name });
+            var _op = _op_def_lib._apply_op_helper("Prod", name, args: new { input, reduction_indices = axis, keep_dims });
 
             return _op.outputs[0];
         }
@@ -186,7 +186,7 @@ public static Tensor maximum<T1, T2>(T1 x, T2 y, string name = null)
             return _op.outputs[0];
         }
 
-        public static Tensor _max(Tensor input, int[] axis, bool keep_dims=false, string name = null)
+        public static Tensor _max<Tx, Ty>(Tx input, Ty axis, bool keep_dims=false, string name = null)
         {
             var _op = _op_def_lib._apply_op_helper("Max", name, new { input, reduction_indices = axis, keep_dims });
 
diff --git a/src/TensorFlowNET.Core/Operations/math_ops.py.cs b/src/TensorFlowNET.Core/Operations/math_ops.py.cs
@@ -2,6 +2,7 @@
 using System;
 using System.Collections.Generic;
 using System.Text;
+using Tensorflow.Framework;
 
 namespace Tensorflow
 {
@@ -39,9 +40,41 @@ public static Tensor cast(Tensor x, TF_DataType dtype = TF_DataType.DtInvalid, s
         public static Tensor reduce_mean(Tensor input_tensor, int[] axis = null, bool keepdims = false, string name = null)
         {
             var r = _ReductionDims(input_tensor, axis);
-            var m = gen_math_ops.mean(input_tensor, (int[]) r, keepdims, name);
-            return _may_reduce_to_scalar(keepdims,axis, m);
+            if (axis == null)
+            {
+                var m = gen_math_ops.mean(input_tensor, r, keepdims, name);
+                return _may_reduce_to_scalar(keepdims, axis, m);
+            }
+            else
+            {
+                var m = gen_math_ops.mean(input_tensor, axis, keepdims, name);
+                return _may_reduce_to_scalar(keepdims, axis, m);
+            }
+        }
+
+        /// <summary>
+        /// Computes the product of elements across dimensions of a tensor.
+        /// </summary>
+        /// <param name="input_tensor"></param>
+        /// <param name="axis"></param>
+        /// <param name="keepdims"></param>
+        /// <param name="name"></param>
+        /// <returns></returns>
+        public static Tensor reduce_prod(Tensor input_tensor, int[] axis = null, bool keepdims = false, string name = null)
+        {
+            var r = _ReductionDims(input_tensor, axis);
+            if (axis == null)
+            {
+                var m = gen_math_ops.prod(input_tensor, r, keepdims, name);
+                return _may_reduce_to_scalar(keepdims, axis, m);
+            }
+            else
+            {
+                var m = gen_math_ops.prod(input_tensor, axis, keepdims, name);
+                return _may_reduce_to_scalar(keepdims, axis, m);
+            }
         }
+        
         /// <summary>
         /// Returns (x - y)(x - y) element-wise.
         /// </summary>
@@ -134,7 +167,10 @@ public static Tensor reduce_logsumexp(Tensor input_tensor, int[] axis = null, bo
 
         public static Tensor reduce_max(Tensor input_tensor, int[] axis = null, bool keepdims = false, string name = null)
         {
-            return _may_reduce_to_scalar(keepdims, axis, gen_math_ops._max(input_tensor, (int[])_ReductionDims(input_tensor, axis), keepdims, name));
+            var r = _ReductionDims(input_tensor, axis);
+            var max = (axis != null) ? gen_math_ops._max(input_tensor, axis, keepdims, name) :
+                gen_math_ops._max(input_tensor, r, keepdims, name);
+            return _may_reduce_to_scalar(keepdims, axis, max);
         }
 
         /// <summary>
@@ -197,18 +233,19 @@ private static Tensor _ReductionDims(Tensor x, Tensor axis)
             }
         }
         
-        private static object _ReductionDims(Tensor x, int[] axis)
+        private static Tensor _ReductionDims(Tensor x, int[] axis)
         {
             if (axis != null)
             {
-                return axis;
+                // should return axis. or check before.
+                return null;
             }
             else
             {
-                var rank = array_ops.rank(x);
+                var rank = common_shapes.rank(x);
                 if (rank != null)
                 {
-                   return constant_op.constant(np.arange(rank), TF_DataType.TF_INT32);
+                   return constant_op.constant(np.arange(rank.Value), TF_DataType.TF_INT32);
                 }
                 return range(0, rank, 1);
             }
@@ -303,5 +340,20 @@ public static Tensor conj(Tensor x, string name = null)
                 return x;
             });
         }
+
+        public static Tensor truediv(Tensor x, Tensor y, string name = null)
+            => _truediv_python3(x, y, name);
+
+        public static Tensor _truediv_python3(Tensor x, Tensor y, string name = null)
+        {
+            return with(ops.name_scope(name, "truediv", new { x, y }), scope =>
+            {
+                name = scope;
+                var x_dtype = x.dtype.as_base_dtype();
+                var y_dtype = y.dtype.as_base_dtype();
+
+                return gen_math_ops.real_div(x, y, name: name);
+            });
+        }
     }
 }
diff --git a/src/TensorFlowNET.Core/Tensors/Tensor.cs b/src/TensorFlowNET.Core/Tensors/Tensor.cs
@@ -74,7 +74,8 @@ public long[] shape
 
         public int[] _shape_tuple()
         {
-            return null;
+            if (shape == null) return null;
+            return shape.Select(x => (int)x).ToArray();
         }
 
         public TensorShape getShape()
diff --git a/src/TensorFlowNET.Core/Tensors/tensor_util.cs b/src/TensorFlowNET.Core/Tensors/tensor_util.cs
@@ -51,6 +51,15 @@ public static NDArray MakeNdarray(TensorProto tensor)
             if (tensor.TensorContent.Length > 0)
                 return np.frombuffer(tensor.TensorContent.ToByteArray(), tensor_dtype)
                     .reshape(shape);
+            else if (tensor.Dtype == DataType.DtHalf || tensor.Dtype == DataType.DtBfloat16)
+                ;
+            else if (tensor.Dtype == DataType.DtFloat)
+                ;
+            else if (new DataType[] { DataType.DtInt32, DataType.DtUint8 }.Contains(tensor.Dtype))
+                if (tensor.IntVal.Count == 1)
+                    return np.repeat(np.array(tensor.IntVal[0]), Convert.ToInt32(num_elements))
+                        .reshape(shape);
+
             throw new NotImplementedException("MakeNdarray");
         }
 
diff --git a/src/TensorFlowNET.Core/Train/AdamOptimizer.cs b/src/TensorFlowNET.Core/Train/AdamOptimizer.cs
@@ -0,0 +1,25 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Train
+{
+    /// <summary>
+    /// Optimizer that implements the Adam algorithm.
+    /// http://arxiv.org/abs/1412.6980
+    /// </summary>
+    public class AdamOptimizer : Optimizer
+    {
+        private float _beta1;
+        private float _beta2;
+        private float _epsilon;
+
+        public AdamOptimizer(float learning_rate, float beta1 = 0.9f, float beta2 = 0.999f, float epsilon = 1e-8f, bool use_locking = false, string name = "Adam")
+            : base(learning_rate, use_locking, name)
+        {
+            _beta1 = beta1;
+            _beta2 = beta2;
+            _epsilon = epsilon;
+        }
+    }
+}
diff --git a/src/TensorFlowNET.Core/Train/GradientDescentOptimizer.cs b/src/TensorFlowNET.Core/Train/GradientDescentOptimizer.cs
@@ -2,7 +2,7 @@
 using System.Collections.Generic;
 using System.Text;
 
-namespace Tensorflow
+namespace Tensorflow.Train
 {
     public class GradientDescentOptimizer : Optimizer
     {
diff --git a/src/TensorFlowNET.Core/Train/Optimizer.cs b/src/TensorFlowNET.Core/Train/Optimizer.cs
@@ -34,6 +34,7 @@ public Optimizer(float learning_rate, bool use_locking, string name = null)
 
             Name = name;
             _use_locking = use_locking;
+            LearningRate = learning_rate;
             // Dictionary of slots.
             _slots = new Dictionary<string, object>();
             _non_slot_dict = new Dictionary<string, object>();
@@ -49,6 +50,7 @@ public Optimizer(float learning_rate, bool use_locking, string name = null)
         /// was not `None`, that operation also increments `global_step`.
         /// </returns>
         public Operation minimize(Tensor loss, 
+            RefVariable global_step = null,
             GateGradientType gate_gradients = GateGradientType.GATE_OP,
             bool colocate_gradients_with_ops = false)
         {
diff --git a/src/TensorFlowNET.Core/Train/tf.optimizers.cs b/src/TensorFlowNET.Core/Train/tf.optimizers.cs
@@ -2,6 +2,7 @@
 using System.Collections.Generic;
 using System.IO;
 using System.Text;
+using Tensorflow.Train;
 
 namespace Tensorflow
 {
@@ -11,6 +12,8 @@ public static class train
         {
             public static Optimizer GradientDescentOptimizer(float learning_rate) => new GradientDescentOptimizer(learning_rate);
 
+            public static Optimizer AdamOptimizer(float learning_rate) => new AdamOptimizer(learning_rate);
+
             public static Saver Saver() => new Saver();
 
             public static string write_graph(Graph graph, string logdir, string name, bool as_text = true) => graph_io.write_graph(graph, logdir, name, as_text);
diff --git a/src/TensorFlowNET.Core/ops.py.cs b/src/TensorFlowNET.Core/ops.py.cs
@@ -349,6 +349,7 @@ public static Func<Operation, Tensor, Tensor[]> get_gradient_function(Operation
         {
             if (op.inputs == null) return null;
 
+            // map tensorflow\python\ops\math_grad.py
             return (oper, out_grads) =>
             {
                 // Console.WriteLine($"get_gradient_function: {oper.type} '{oper.name}'");
@@ -364,6 +365,9 @@ public static Func<Operation, Tensor, Tensor[]> get_gradient_function(Operation
                     case "Mul":
                         var mul = math_grad._MulGrad(oper, out_grads);
                         return new Tensor[] { mul.Item1, mul.Item2 };
+                    case "Mean":
+                        var mean = math_grad._MeanGrad(oper, out_grads);
+                        return new Tensor[] { mean.Item1, mean.Item2 };
                     case "Sum":
                         var sum = math_grad._SumGrad(oper, out_grads);
                         return new Tensor[] { sum.Item1, sum.Item2 };
diff --git a/test/TensorFlowNET.Examples/TextClassification/cnn_models/VdCnn.cs b/test/TensorFlowNET.Examples/TextClassification/cnn_models/VdCnn.cs
@@ -119,10 +119,11 @@ public VdCnn(int alphabet_size, int document_max_len, int num_class)
                 var y_one_hot = tf.one_hot(y, num_class);
                 loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits: logits, labels: y_one_hot));
 
-                var update_ops = tf.get_collection(ops.GraphKeys.UPDATE_OPS) as List<Operation>;
-                with(tf.control_dependencies(update_ops.ToArray()), delegate
+                var update_ops = tf.get_collection(ops.GraphKeys.UPDATE_OPS) as List<object>;
+                with(tf.control_dependencies(update_ops.Select(x => (Operation)x).ToArray()), delegate
                 {
-
+                    var adam = tf.train.AdamOptimizer(learning_rate);
+                    adam.minimize(loss, global_step: global_step);
                 });
             });
         }

Original file line number	Diff line number	Diff line change
`@@ -29,5 +29,10 @@ public static Tensor _broadcast_shape_helper(Tensor shape_x, Tensor shape_y)`
`29`	`29`	`{`
`30`	`30`	`throw new NotFiniteNumberException();`
`31`	`31`	`}`
	`32`	`+`
	`33`	`+ public static int? rank(Tensor tensor)`
	`34`	`+ {`
	`35`	`+ return tensor.rank;`
	`36`	`+ }`
`32`	`37`	`}`
`33`	`38`	`}`
Original file line number	Diff line number	Diff line change
`@@ -63,7 +63,7 @@ protected override Tensor call(Tensor inputs, Tensor training = null)`
`63`	`63`	`var rank = inputs.rank;`
`64`	`64`	`if(rank > 2)`
`65`	`65`	`{`
`66`		`- throw new NotImplementedException("");`
	`66`	`+ throw new NotImplementedException("call rank > 2");`
`67`	`67`	`}`
`68`	`68`	`else`
`69`	`69`	`{`
Original file line number	Diff line number	Diff line change
`@@ -82,7 +82,7 @@ public static Tensor rank(Tensor input, string name = null)`
`82`	`82`	`public static Tensor ones_like<T>(T tensor, TF_DataType dtype = TF_DataType.DtInvalid, string name = null, bool optimize = true)`
`83`	`83`	`=> ones_like_impl(tensor, dtype, name, optimize);`
`84`	`84`
`85`		`- public static Tensor reshape(Tensor tensor, Tensor shape, string name = null)`
	`85`	`+ public static Tensor reshape<T1, T2>(T1 tensor, T2 shape, string name = null)`
`86`	`86`	`{`
`87`	`87`	`return gen_array_ops.reshape(tensor, shape, null);`
`88`	`88`	`}`
Original file line number	Diff line number	Diff line change
`@@ -116,7 +116,7 @@ public static (Tensor, Tensor) broadcast_gradient_args(Tensor s0, Tensor s1, str`
`116`	`116`	`return (_op.outputs[0], _op.outputs[1]);`
`117`	`117`	`}`
`118`	`118`
`119`		`- public static Tensor reshape(Tensor tensor, Tensor shape, string name = null)`
	`119`	`+ public static Tensor reshape<T1, T2>(T1 tensor, T2 shape, string name = null)`
`120`	`120`	`{`
`121`	`121`	`var _op = _op_def_lib._apply_op_helper("Reshape", name, new { tensor, shape });`
`122`	`122`	`return _op.outputs[0];`
Original file line number	Diff line number	Diff line change
`@@ -20,16 +20,16 @@ public static class gen_math_ops`
`20`	`20`	/// <param name="keep_dims"> An optional `bool`. Defaults to `False`. If true, retain reduced dimensions with length 1.</param>
`21`	`21`	`/// <param name="name"> A name for the operation (optional).</param>`
`22`	`22`	/// <returns> A `Tensor`. Has the same type as `input`.</returns>
`23`		`- public static Tensor mean(Tensor input, Tensor axis, bool keep_dims= false, string name = null)`
	`23`	`+ public static Tensor mean<T1, T2>(T1 input, T2 axis, bool keep_dims= false, string name = null)`
`24`	`24`	`{`
`25`	`25`	`var _op = _op_def_lib._apply_op_helper("Mean", name, args: new { input, reduction_indices = axis, keep_dims = keep_dims });`
`26`	`26`
`27`	`27`	`return _op.outputs[0];`
`28`	`28`	`}`
`29`	`29`
`30`		`- public static Tensor mean(Tensor input, int[] axis, bool keep_dims = false, string name = null)`
	`30`	`+ public static Tensor prod<T1, T2>(T1 input, T2 axis, bool keep_dims = false, string name = null)`
`31`	`31`	`{`
`32`		`- var _op = _op_def_lib._apply_op_helper("Mean", name, args: new { input, reduction_indices = axis, keep_dims = keep_dims, name });`
	`32`	`+ var _op = _op_def_lib._apply_op_helper("Prod", name, args: new { input, reduction_indices = axis, keep_dims });`
`33`	`33`
`34`	`34`	`return _op.outputs[0];`
`35`	`35`	`}`
`@@ -186,7 +186,7 @@ public static Tensor maximum<T1, T2>(T1 x, T2 y, string name = null)`
`186`	`186`	`return _op.outputs[0];`
`187`	`187`	`}`
`188`	`188`
`189`		`- public static Tensor _max(Tensor input, int[] axis, bool keep_dims=false, string name = null)`
	`189`	`+ public static Tensor _max<Tx, Ty>(Tx input, Ty axis, bool keep_dims=false, string name = null)`
`190`	`190`	`{`
`191`	`191`	`var _op = _op_def_lib._apply_op_helper("Max", name, new { input, reduction_indices = axis, keep_dims });`
`192`	`192`
Original file line number	Diff line number	Diff line change
`@@ -74,7 +74,8 @@ public long[] shape`
`74`	`74`
`75`	`75`	`public int[] _shape_tuple()`
`76`	`76`	`{`
`77`		`- return null;`
	`77`	`+ if (shape == null) return null;`
	`78`	`+ return shape.Select(x => (int)x).ToArray();`
`78`	`79`	`}`
`79`	`80`
`80`	`81`	`public TensorShape getShape()`