_TopKGrad, _SoftmaxCrossEntropyWithLogitsGrad

Oceania2018 · Oceania2018 · commit bad610d5336b · 2019-03-15T12:10:26.000-05:00
diff --git a/src/TensorFlowNET.Core/Gradients/gradients_impl.py.cs b/src/TensorFlowNET.Core/Gradients/gradients_impl.py.cs
@@ -131,6 +131,17 @@ public static Tensor[] _GradientsHelper(Tensor[] ys,
                             // for ops that do not have gradients.
                             var grad_fn = ops.get_gradient_function(op);
 
+                            foreach(var (i, out_grad) in enumerate(out_grads))
+                            {
+                                if(out_grad == null)
+                                {
+                                    if (loop_state != null)
+                                        ;
+                                    else
+                                        out_grads[i] = control_flow_ops.ZerosLikeOutsideLoop(op, i);
+                                }
+                            }
+
                             with(ops.name_scope(op.name + "_grad"), scope1 =>
                             {
                                 string name1 = scope1;
@@ -240,28 +251,27 @@ private static bool _IsPartitionedCall(Operation op)
         private static Tensor[] _AggregatedGrads(Dictionary<string, Tensor[][]> grads, Operation op, string gradient_uid, object loop_state, int aggregation_method = 0)
         {
             var out_grads = _GetGrads(grads, op);
-            for(int i = 0; i < out_grads.Length; i++)
+            var return_grads = new Tensor[out_grads.Length];
+
+            foreach(var (i, out_grad) in enumerate(out_grads))
             {
-                var out_grad = out_grads[i];
-                if(loop_state != null)
+                if (loop_state != null)
                 {
 
                 }
 
-                // Grads have to be Tensors or IndexedSlices
-
                 // Aggregate multiple gradients, and convert [] to None.
-                if(out_grad != null)
+                if (out_grad != null)
                 {
-                    if(out_grad.Length < 2)
+                    if (out_grad.Length < 2)
                     {
                         string used = "nop";
-                        return new Tensor[] { out_grad[0] };
+                        return_grads[i] = out_grad[0];
                     }
                 }
             }
 
-            return null;
+            return return_grads;
         }
 
         /// <summary>
diff --git a/src/TensorFlowNET.Core/Gradients/nn_grad.py.cs b/src/TensorFlowNET.Core/Gradients/nn_grad.py.cs
@@ -1,5 +1,6 @@
 ﻿using System;
 using System.Collections.Generic;
+using System.Linq;
 using System.Text;
 using Tensorflow.Operations;
 
@@ -13,16 +14,17 @@ public class nn_grad
         /// <param name="op"></param>
         /// <param name="grad"></param>
         /// <returns></returns>
-        public static Tensor[] _BiasAddGrad(Operation op, Tensor grad)
+        public static Tensor[] _BiasAddGrad(Operation op, Tensor[] grads)
         {
+            var grad = grads[0];
             string data_format = op.get_attr("data_format")?.ToString();
             var bias_add_grad = gen_nn_ops.bias_add_grad(out_backprop: grad, data_format: data_format);
             return new Tensor[] { grad, bias_add_grad };
         }
 
-        public static Tensor[] _ReluGrad(Operation op, Tensor grad)
+        public static Tensor[] _ReluGrad(Operation op, Tensor[] grads)
         {
-            return new Tensor[] { gen_nn_ops.relu_grad(grad, op.outputs[0]) };
+            return new Tensor[] { gen_nn_ops.relu_grad(grads[0], op.outputs[0]) };
         }
 
         /// <summary>
@@ -37,8 +39,57 @@ public static Tensor[] _SoftmaxCrossEntropyWithLogitsGrad(Operation op, Tensor[]
             var grad_loss = grads[0];
             var grad_grad = grads[1];
             var softmax_grad = op.outputs[1];
+            var grad = _BroadcastMul(grad_loss, softmax_grad);
 
-            throw new NotImplementedException("_SoftmaxCrossEntropyWithLogitsGrad");
+            var logits = op.inputs[0];
+            if(grad_grad != null && !IsZero(grad_grad))
+            {
+                throw new NotImplementedException("_SoftmaxCrossEntropyWithLogitsGrad");
+            }
+
+            return new Tensor[] 
+            {
+                grad,
+                _BroadcastMul(grad_loss, -nn_ops.log_softmax(logits))
+            };
+        }
+
+        private static bool IsZero(Tensor g)
+        {
+            if (new string[] { "ZerosLike", "Zeros" }.Contains(g.op.type))
+                return true;
+
+            throw new NotImplementedException("IsZero");
+        }
+
+        private static Tensor _BroadcastMul(Tensor vec, Tensor mat)
+        {
+            vec = array_ops.expand_dims(vec, -1);
+            return vec * mat;
+        }
+
+        /// <summary>
+        /// Return the gradients for TopK.
+        /// </summary>
+        /// <param name="op"></param>
+        /// <param name="grads"></param>
+        /// <returns></returns>
+        public static Tensor[] _TopKGrad(Operation op, Tensor[] grads)
+        {
+            var grad = grads[0];
+            var _ = grads[1];
+
+            var in_shape = array_ops.shape(op.inputs[0]);
+            var ind_shape = array_ops.shape(op.outputs[1]);
+
+            // int32 is not supported on GPU hence up-casting
+            var ind_lastdim = array_ops.gather(math_ops.cast(
+                ind_shape, TF_DataType.TF_INT64), array_ops.size(ind_shape) - 1);
+
+            // Flatten indices to 2D.
+            var ind_2d = array_ops.reshape(op.outputs[1], array_ops.stack(new object[] { -1, ind_lastdim }));
+
+            throw new NotImplementedException("nn_grad._TopKGrad");
         }
     }
 }
diff --git a/src/TensorFlowNET.Core/Gradients/ops.gradient_function_mapping.cs b/src/TensorFlowNET.Core/Gradients/ops.gradient_function_mapping.cs
@@ -14,14 +14,18 @@ public static Func<Operation, Tensor[], Tensor[]> get_gradient_function(Operatio
             // map tensorflow\python\ops\math_grad.py
             return (oper, out_grads) =>
             {
-                Console.WriteLine($"get_gradient_function: {oper.type} '{oper.name}'");
+                // Console.WriteLine($"get_gradient_function: {oper.type} '{oper.name}'");
 
                 switch (oper.type)
                 {
                     case "Add":
                         return math_grad._AddGrad(oper, out_grads);
+                    case "BiasAdd":
+                        return nn_grad._BiasAddGrad(oper, out_grads);
                     case "Identity":
                         return math_grad._IdGrad(oper, out_grads);
+                    case "MatMul":
+                        return math_grad._MatMulGrad(oper, out_grads);
                     case "Mul":
                         return math_grad._MulGrad(oper, out_grads);
                     case "Mean":
@@ -36,8 +40,13 @@ public static Func<Operation, Tensor[], Tensor[]> get_gradient_function(Operatio
                         return math_grad._RealDivGrad(oper, out_grads);
                     case "Reshape":
                         return array_grad._ReshapeGrad(oper, out_grads);
+                    case "Relu":
+                        return nn_grad._ReluGrad(oper, out_grads);
                     case "SoftmaxCrossEntropyWithLogits":
                         return nn_grad._SoftmaxCrossEntropyWithLogitsGrad(oper, out_grads);
+                    case "TopK":
+                    case "TopKV2":
+                        return nn_grad._TopKGrad(oper, out_grads);
                     default:
                         throw new NotImplementedException($"get_gradient_function {oper.type}");
                 }
diff --git a/src/TensorFlowNET.Core/Operations/NnOps/gen_nn_ops.cs b/src/TensorFlowNET.Core/Operations/NnOps/gen_nn_ops.cs
@@ -94,6 +94,16 @@ public static Tensor[] _fused_batch_norm(Tensor x,
             return _op.outputs;
         }
 
+        public static Tensor log_softmax(Tensor logits, string name = null)
+        {
+            var _op = _op_def_lib._apply_op_helper("LogSoftmax", name: name, args: new
+            {
+                logits
+            });
+
+            return _op.outputs[0];
+        }
+
         public static Tensor max_pool(Tensor input,
             int[] ksize,
             int[] strides,
diff --git a/src/TensorFlowNET.Core/Operations/Operation.cs b/src/TensorFlowNET.Core/Operations/Operation.cs
@@ -185,7 +185,10 @@ public object get_attr(string name)
             if (oneof_value == "type")
                 return x.Type;
 
-            return x.GetType().GetProperty(oneof_value).GetValue(x);
+            object result = x.GetType().GetProperty(oneof_value).GetValue(x);
+            if (result is Google.Protobuf.ByteString byteString)
+                return byteString.ToStringUtf8();
+            return result;
         }
 
         public TF_AttrMetadata GetAttributeMetadata(string attr_name, Status s)
diff --git a/src/TensorFlowNET.Core/Operations/array_ops.py.cs b/src/TensorFlowNET.Core/Operations/array_ops.py.cs
@@ -46,10 +46,10 @@ private static Tensor _constant_if_small<T>(T value, Shape shape, TF_DataType dt
             }
         }
 
-        public static Tensor _autopacking_helper(Tensor[] list_or_tuple, TF_DataType dtype, string name)
+        public static Tensor _autopacking_helper(object[] list_or_tuple, TF_DataType dtype, string name)
         {
             var must_pack = false;
-            var converted_elems = new List<Tensor>();
+            var converted_elems = new List<object>();
             return with(ops.name_scope(name), scope =>
             {
                 foreach (var (i, elem) in enumerate(list_or_tuple))
@@ -58,7 +58,27 @@ public static Tensor _autopacking_helper(Tensor[] list_or_tuple, TF_DataType dty
                     must_pack = true;
                 }
 
-                return gen_array_ops.pack(converted_elems.ToArray(), name: scope);
+                if(must_pack)
+                {
+                    var elems_as_tensors = new List<Tensor>();
+                    foreach (var (i, elem) in enumerate(converted_elems))
+                    {
+                        if (elem is Tensor tensor)
+                            elems_as_tensors.Add(tensor);
+                        else
+                        {
+                            var elem_tensor = constant_op.constant(elem, dtype: dtype, name: i.ToString());
+                            elems_as_tensors.Add(elem_tensor);
+                        }
+                    }
+
+                    return gen_array_ops.pack(elems_as_tensors.ToArray(), name: scope);
+                }
+                else
+                {
+                    // return converted_elems.ToArray();
+                    throw new NotImplementedException("_autopacking_helper.converted_elems");
+                }
             });
         }
 
@@ -355,5 +375,15 @@ public static Tensor transpose(Tensor a, int[] perm = null, string name = "trans
 
         public static Tensor slice<Tb, Ts>(Tensor input, Tb[] begin, Ts[] size, string name = null)
             => gen_array_ops.slice(input, begin, size, name: name);
+
+        public static Tensor stack(object values, int axis = 0, string name = "stack")
+        {
+            if (axis == 0)
+                // If the input is a constant list, it can be converted to a constant op
+                return ops.convert_to_tensor(values, name: name);
+
+            throw new NotImplementedException("array_ops.stack");
+        }
+
     }
 }
diff --git a/src/TensorFlowNET.Core/Operations/control_flow_ops.py.cs b/src/TensorFlowNET.Core/Operations/control_flow_ops.py.cs
@@ -3,6 +3,7 @@
 using System.Linq;
 using System.Text;
 using Tensorflow.Operations;
+using util = Tensorflow.control_flow_util;
 
 namespace Tensorflow
 {
@@ -226,5 +227,18 @@ public static (Tensor, Tensor) @switch(Tensor data,
                 return gen_control_flow_ops.@switch(data, pred, name: name);
             });
         }
+
+        public static Tensor ZerosLikeOutsideLoop(Operation op, int index)
+        {
+            var val = op.outputs[index];
+            if (!util.IsSwitch(op))
+            {
+                if (val.dtype == TF_DataType.TF_RESOURCE)
+                    throw new NotImplementedException("ZerosLikeOutsideLoop");
+                return array_ops.zeros_like(val, optimize: false);
+            }
+
+            throw new NotImplementedException("ZerosLikeOutsideLoop");
+        }
     }
 }
diff --git a/src/TensorFlowNET.Core/Operations/control_flow_util.py.cs b/src/TensorFlowNET.Core/Operations/control_flow_util.py.cs
@@ -15,5 +15,15 @@ public static bool IsLoopExit(Operation op)
         {
             return op.type == "Exit" || op.type == "RefExit";
         }
+
+        /// <summary>
+        /// Return true if `op` is a Switch.
+        /// </summary>
+        /// <param name="op"></param>
+        /// <returns></returns>
+        public static bool IsSwitch(Operation op)
+        {
+            return op.type == "Switch" || op.type == "RefSwitch";
+        }
     }
 }
diff --git a/src/TensorFlowNET.Core/Operations/nn_ops.cs b/src/TensorFlowNET.Core/Operations/nn_ops.cs
@@ -42,6 +42,23 @@ public static Tensor bias_add(Tensor value,
             });
         }
 
+        public static Tensor log_softmax(Tensor logits, int axis = -1, string name = null)
+        {
+            return _softmax(logits, gen_nn_ops.log_softmax, axis, name);
+        }
+
+        public static Tensor _softmax(Tensor logits, Func<Tensor, string, Tensor> compute_op, int dim = -1, string name = null)
+        {
+            logits = ops.convert_to_tensor(logits);
+
+            var shape = logits.shape;
+            bool is_last_dim = dim == -1 || dim == shape.Length - 1;
+            if (is_last_dim)
+                return compute_op(logits, name);
+
+            throw new NotImplementedException("_softmax helper");
+        }
+
         public static Tensor softmax_cross_entropy_with_logits_v2_helper(Tensor labels,
             Tensor logits,
             int axis = -1,
diff --git a/src/TensorFlowNET.Core/ops.py.cs b/src/TensorFlowNET.Core/ops.py.cs
@@ -426,6 +426,8 @@ public static Tensor internal_convert_to_tensor(object value, TF_DataType dtype
                     return constant_op.constant(doubleVal, dtype: dtype, name: name);
                 case RefVariable varVal:
                     return varVal._TensorConversionFunction(as_ref: as_ref);
+                case object[] objects:
+                    return array_ops._autopacking_helper(objects, dtype: dtype, name: name);
                 default:
                     throw new NotImplementedException($"internal_convert_to_tensor: Can't convert {value.GetType().Name} to Tensor");
             }

Original file line number	Diff line number	Diff line change
`@@ -131,6 +131,17 @@ public static Tensor[] _GradientsHelper(Tensor[] ys,`
`131`	`131`	`// for ops that do not have gradients.`
`132`	`132`	`var grad_fn = ops.get_gradient_function(op);`
`133`	`133`
	`134`	`+ foreach(var (i, out_grad) in enumerate(out_grads))`
	`135`	`+ {`
	`136`	`+ if(out_grad == null)`
	`137`	`+ {`
	`138`	`+ if (loop_state != null)`
	`139`	`+ ;`
	`140`	`+ else`
	`141`	`+ out_grads[i] = control_flow_ops.ZerosLikeOutsideLoop(op, i);`
	`142`	`+ }`
	`143`	`+ }`
	`144`	`+`
`134`	`145`	`with(ops.name_scope(op.name + "_grad"), scope1 =>`
`135`	`146`	`{`
`136`	`147`	`string name1 = scope1;`
`@@ -240,28 +251,27 @@ private static bool _IsPartitionedCall(Operation op)`
`240`	`251`	`private static Tensor[] _AggregatedGrads(Dictionary<string, Tensor[][]> grads, Operation op, string gradient_uid, object loop_state, int aggregation_method = 0)`
`241`	`252`	`{`
`242`	`253`	`var out_grads = _GetGrads(grads, op);`
`243`		`- for(int i = 0; i < out_grads.Length; i++)`
	`254`	`+ var return_grads = new Tensor[out_grads.Length];`
	`255`	`+`
	`256`	`+ foreach(var (i, out_grad) in enumerate(out_grads))`
`244`	`257`	`{`
`245`		`- var out_grad = out_grads[i];`
`246`		`- if(loop_state != null)`
	`258`	`+ if (loop_state != null)`
`247`	`259`	`{`
`248`	`260`
`249`	`261`	`}`
`250`	`262`
`251`		`- // Grads have to be Tensors or IndexedSlices`
`252`		`-`
`253`	`263`	`// Aggregate multiple gradients, and convert [] to None.`
`254`		`- if(out_grad != null)`
	`264`	`+ if (out_grad != null)`
`255`	`265`	`{`
`256`		`- if(out_grad.Length < 2)`
	`266`	`+ if (out_grad.Length < 2)`
`257`	`267`	`{`
`258`	`268`	`string used = "nop";`
`259`		`- return new Tensor[] { out_grad[0] };`
	`269`	`+ return_grads[i] = out_grad[0];`
`260`	`270`	`}`
`261`	`271`	`}`
`262`	`272`	`}`
`263`	`273`
`264`		`- return null;`
	`274`	`+ return return_grads;`
`265`	`275`	`}`
`266`	`276`
`267`	`277`	`/// <summary>`
Original file line number	Diff line number	Diff line change
`@@ -15,5 +15,15 @@ public static bool IsLoopExit(Operation op)`
`15`	`15`	`{`
`16`	`16`	`return op.type == "Exit" \|\| op.type == "RefExit";`
`17`	`17`	`}`
	`18`	`+`
	`19`	`+ /// <summary>`
	`20`	+ /// Return true if `op` is a Switch.
	`21`	`+ /// </summary>`
	`22`	`+ /// <param name="op"></param>`
	`23`	`+ /// <returns></returns>`
	`24`	`+ public static bool IsSwitch(Operation op)`
	`25`	`+ {`
	`26`	`+ return op.type == "Switch" \|\| op.type == "RefSwitch";`
	`27`	`+ }`
`18`	`28`	`}`
`19`	`29`	`}`
Original file line number	Diff line number	Diff line change
`@@ -426,6 +426,8 @@ public static Tensor internal_convert_to_tensor(object value, TF_DataType dtype`
`426`	`426`	`return constant_op.constant(doubleVal, dtype: dtype, name: name);`
`427`	`427`	`case RefVariable varVal:`
`428`	`428`	`return varVal._TensorConversionFunction(as_ref: as_ref);`
	`429`	`+ case object[] objects:`
	`430`	`+ return array_ops._autopacking_helper(objects, dtype: dtype, name: name);`
`429`	`431`	`default:`
`430`	`432`	`throw new NotImplementedException($"internal_convert_to_tensor: Can't convert {value.GetType().Name} to Tensor");`
`431`	`433`	`}`