@@ -44,6 +44,22 @@ public Tensor substr<T>(T input, int pos, int len,
4444 => tf . Context . ExecuteOp ( "Substr" , name , new ExecuteOpArgs ( input , pos , len )
4545 . SetAttributes ( new { unit = @uint } ) ) ;
4646
47+ /// <summary>
48+ /// Computes the length of each string given in the input tensor.
49+ /// </summary>
50+ /// <param name="input"></param>
51+ /// <param name="name"></param>
52+ /// <param name="unit"></param>
53+ /// <returns></returns>
54+ public Tensor string_length ( Tensor input , string name = null , string unit = "BYTE" )
55+ => tf . Context . ExecuteOp ( "StringLength" , name , new ExecuteOpArgs ( input )
56+ {
57+ GetGradientAttrs = op => new
58+ {
59+ unit = op . get_attr < string > ( "unit" )
60+ }
61+ } . SetAttributes ( new { unit } ) ) ;
62+
4763 public RaggedTensor string_split_v2 ( Tensor input , string sep = "" , int maxsplit = - 1 , string name = null )
4864 {
4965 return tf_with ( ops . name_scope ( name , "StringSplit" ) , scope =>
@@ -69,5 +85,49 @@ public RaggedTensor string_split_v2(Tensor input, string sep = "", int maxsplit
6985 validate : false ) ;
7086 } ) ;
7187 }
88+
89+ public ( RaggedTensor , RaggedTensor ) unicode_decode_with_offsets ( Tensor input , string input_encoding , string errors ,
90+ int replacement_char = 0xFFFD , bool replace_control_characters = false , string name = null )
91+ {
92+ return tf_with ( ops . name_scope ( name , "UnicodeDecodeWithOffsets" ) , scope =>
93+ {
94+ var ( codepoints , byte_start_offsets ) = _unicode_decode ( input , input_encoding , errors ,
95+ replacement_char , replace_control_characters ,
96+ with_offsets : true , name : name ) ;
97+ return ( codepoints , byte_start_offsets ) ;
98+ } ) ;
99+ }
100+
101+ ( RaggedTensor , RaggedTensor ) _unicode_decode ( Tensor input , string input_encoding , string errors , int replacement_char ,
102+ bool replace_control_characters , bool with_offsets , string name = null )
103+ {
104+ if ( with_offsets )
105+ {
106+ var flat_result = tf . Context . ExecuteOp ( "UnicodeDecodeWithOffsets" , name , new ExecuteOpArgs ( input )
107+ {
108+ GetGradientAttrs = op => new
109+ {
110+ input_encoding = op . get_attr < string > ( "input_encoding" ) ,
111+ errors = op . get_attr < string > ( "errors" ) ,
112+ replacement_char = op . get_attr < int > ( "replacement_char" ) ,
113+ replace_control_characters = op . get_attr < bool > ( "replace_control_characters" ) ,
114+ Tsplits = op . get_attr < TF_DataType > ( "Tsplits" )
115+ }
116+ } . SetAttributes ( new
117+ {
118+ input_encoding ,
119+ errors ,
120+ replacement_char ,
121+ replace_control_characters
122+ } ) ) ;
123+
124+ var codepoints = RaggedTensor . from_row_splits ( flat_result [ 1 ] , flat_result [ 0 ] , validate : false ) ;
125+
126+ var offsets = RaggedTensor . from_row_splits ( flat_result [ 2 ] , flat_result [ 0 ] , validate : false ) ;
127+ return ( codepoints , offsets ) ;
128+ }
129+
130+ return ( null , null ) ;
131+ }
72132 }
73133}
0 commit comments