@@ -94,8 +94,6 @@ public DatasetPass load_data(
9494 var fileBytes = File . ReadAllBytes ( path ) ;
9595 var ( x_train , x_test ) = LoadX ( fileBytes ) ;
9696 var ( labels_train , labels_test ) = LoadY ( fileBytes ) ;
97- x_test . astype ( np . int32 ) ;
98- labels_test . astype ( np . int32 ) ;
9997
10098 var indices = np . arange < int > ( len ( x_train ) ) ;
10199 np . random . shuffle ( indices , seed ) ;
@@ -107,100 +105,113 @@ public DatasetPass load_data(
107105 x_test = x_test [ indices ] ;
108106 labels_test = labels_test [ indices ] ;
109107
108+ var x_train_array = ( int [ , ] ) x_train . ToMultiDimArray < int > ( ) ;
109+ var x_test_array = ( int [ , ] ) x_test . ToMultiDimArray < int > ( ) ;
110+ var labels_train_array = ( long [ ] ) labels_train . ToArray < long > ( ) ;
111+ var labels_test_array = ( long [ ] ) labels_test . ToArray < long > ( ) ;
112+
110113 if ( start_char != null )
111114 {
112- int [ , ] new_x_train = new int [ x_train . shape [ 0 ] , x_train . shape [ 1 ] + 1 ] ;
113- for ( var i = 0 ; i < x_train . shape [ 0 ] ; i ++ )
115+ int [ , ] new_x_train_array = new int [ x_train_array . GetLength ( 0 ) , x_train_array . GetLength ( 1 ) + 1 ] ;
116+ for ( var i = 0 ; i < x_train_array . GetLength ( 0 ) ; i ++ )
114117 {
115- new_x_train [ i , 0 ] = ( int ) start_char ;
116- for ( var j = 0 ; j < x_train . shape [ 1 ] ; j ++ )
118+ new_x_train_array [ i , 0 ] = ( int ) start_char ;
119+ for ( var j = 0 ; j < x_train_array . GetLength ( 1 ) ; j ++ )
117120 {
118- new_x_train [ i , j + 1 ] = x_train [ i ] [ j ] ;
121+ if ( x_train_array [ i , j ] == 0 )
122+ break ;
123+ new_x_train_array [ i , j + 1 ] = x_train_array [ i , j ] ;
119124 }
120125 }
121- int [ , ] new_x_test = new int [ x_test . shape [ 0 ] , x_test . shape [ 1 ] + 1 ] ;
122- for ( var i = 0 ; i < x_test . shape [ 0 ] ; i ++ )
126+ int [ , ] new_x_test_array = new int [ x_test_array . GetLength ( 0 ) , x_test_array . GetLength ( 1 ) + 1 ] ;
127+ for ( var i = 0 ; i < x_test_array . GetLength ( 0 ) ; i ++ )
123128 {
124- new_x_test [ i , 0 ] = ( int ) start_char ;
125- for ( var j = 0 ; j < x_test . shape [ 1 ] ; j ++ )
129+ new_x_test_array [ i , 0 ] = ( int ) start_char ;
130+ for ( var j = 0 ; j < x_test_array . GetLength ( 1 ) ; j ++ )
126131 {
127- new_x_test [ i , j + 1 ] = x_test [ i ] [ j ] ;
132+ if ( x_test_array [ i , j ] == 0 )
133+ break ;
134+ new_x_test_array [ i , j + 1 ] = x_test_array [ i , j ] ;
128135 }
129136 }
130- x_train = new NDArray ( new_x_train ) ;
131- x_test = new NDArray ( new_x_test ) ;
137+ x_train_array = new_x_train_array ;
138+ x_test_array = new_x_test_array ;
132139 }
133140 else if ( index_from != 0 )
134141 {
135- for ( var i = 0 ; i < x_train . shape [ 0 ] ; i ++ )
142+ for ( var i = 0 ; i < x_train_array . GetLength ( 0 ) ; i ++ )
136143 {
137- for ( var j = 0 ; j < x_train . shape [ 1 ] ; j ++ )
144+ for ( var j = 0 ; j < x_train_array . GetLength ( 1 ) ; j ++ )
138145 {
139- if ( x_train [ i , j ] != 0 )
140- x_train [ i , j ] += index_from ;
146+ if ( x_train_array [ i , j ] == 0 )
147+ break ;
148+ x_train_array [ i , j ] += index_from ;
141149 }
142150 }
143- for ( var i = 0 ; i < x_test . shape [ 0 ] ; i ++ )
151+ for ( var i = 0 ; i < x_test_array . GetLength ( 0 ) ; i ++ )
144152 {
145- for ( var j = 0 ; j < x_test . shape [ 1 ] ; j ++ )
153+ for ( var j = 0 ; j < x_test_array . GetLength ( 1 ) ; j ++ )
146154 {
147- if ( x_test [ i , j ] != 0 )
148- x_test [ i , j ] += index_from ;
155+ if ( x_test_array [ i , j ] == 0 )
156+ break ;
157+ x_test [ i , j ] += index_from ;
149158 }
150159 }
151160 }
152161
153- if ( maxlen ! = null )
162+ if ( maxlen = = null )
154163 {
155- ( x_train , labels_train ) = data_utils . _remove_long_seq ( ( int ) maxlen , x_train , labels_train ) ;
156- ( x_test , labels_test ) = data_utils . _remove_long_seq ( ( int ) maxlen , x_test , labels_test ) ;
157- if ( x_train . size == 0 || x_test . size == 0 )
158- throw new ValueError ( "After filtering for sequences shorter than maxlen=" +
159- $ "{ maxlen } , no sequence was kept. Increase maxlen.") ;
164+ maxlen = max ( x_train_array . GetLength ( 1 ) , x_test_array . GetLength ( 1 ) ) ;
160165 }
166+ ( x_train , labels_train ) = data_utils . _remove_long_seq ( ( int ) maxlen , x_train_array , labels_train_array ) ;
167+ ( x_test , labels_test ) = data_utils . _remove_long_seq ( ( int ) maxlen , x_test_array , labels_test_array ) ;
168+ if ( x_train . size == 0 || x_test . size == 0 )
169+ throw new ValueError ( "After filtering for sequences shorter than maxlen=" +
170+ $ "{ maxlen } , no sequence was kept. Increase maxlen.") ;
161171
162172 var xs = np . concatenate ( new [ ] { x_train , x_test } ) ;
163173 var labels = np . concatenate ( new [ ] { labels_train , labels_test } ) ;
174+ var xs_array = ( int [ , ] ) xs . ToMultiDimArray < int > ( ) ;
164175
165- if ( num_words == null )
176+ if ( num_words == null )
166177 {
167178 num_words = 0 ;
168- for ( var i = 0 ; i < xs . shape [ 0 ] ; i ++ )
169- for ( var j = 0 ; j < xs . shape [ 1 ] ; j ++ )
170- num_words = max ( ( int ) num_words , ( int ) xs [ i ] [ j ] ) ;
179+ for ( var i = 0 ; i < xs_array . GetLength ( 0 ) ; i ++ )
180+ for ( var j = 0 ; j < xs_array . GetLength ( 1 ) ; j ++ )
181+ num_words = max ( ( int ) num_words , ( int ) xs_array [ i , j ] ) ;
171182 }
172183
173184 // by convention, use 2 as OOV word
174185 // reserve 'index_from' (=3 by default) characters:
175186 // 0 (padding), 1 (start), 2 (OOV)
176187 if ( oov_char != null )
177188 {
178- int [ , ] new_xs = new int [ xs . shape [ 0 ] , xs . shape [ 1 ] ] ;
179- for ( var i = 0 ; i < xs . shape [ 0 ] ; i ++ )
189+ int [ , ] new_xs_array = new int [ xs_array . GetLength ( 0 ) , xs_array . GetLength ( 1 ) ] ;
190+ for ( var i = 0 ; i < xs_array . GetLength ( 0 ) ; i ++ )
180191 {
181- for ( var j = 0 ; j < xs . shape [ 1 ] ; j ++ )
192+ for ( var j = 0 ; j < xs_array . GetLength ( 1 ) ; j ++ )
182193 {
183- if ( ( int ) xs [ i ] [ j ] == 0 || skip_top <= ( int ) xs [ i ] [ j ] && ( int ) xs [ i ] [ j ] < num_words )
184- new_xs [ i , j ] = ( int ) xs [ i ] [ j ] ;
194+ if ( xs_array [ i , j ] == 0 || skip_top <= xs_array [ i , j ] && xs_array [ i , j ] < num_words )
195+ new_xs_array [ i , j ] = xs_array [ i , j ] ;
185196 else
186- new_xs [ i , j ] = ( int ) oov_char ;
197+ new_xs_array [ i , j ] = ( int ) oov_char ;
187198 }
188199 }
189- xs = new NDArray ( new_xs ) ;
200+ xs = new NDArray ( new_xs_array ) ;
190201 }
191202 else
192203 {
193- int [ , ] new_xs = new int [ xs . shape [ 0 ] , xs . shape [ 1 ] ] ;
194- for ( var i = 0 ; i < xs . shape [ 0 ] ; i ++ )
204+ int [ , ] new_xs_array = new int [ xs_array . GetLength ( 0 ) , xs_array . GetLength ( 1 ) ] ;
205+ for ( var i = 0 ; i < xs_array . GetLength ( 0 ) ; i ++ )
195206 {
196207 int k = 0 ;
197- for ( var j = 0 ; j < xs . shape [ 1 ] ; j ++ )
208+ for ( var j = 0 ; j < xs_array . GetLength ( 1 ) ; j ++ )
198209 {
199- if ( ( int ) xs [ i ] [ j ] == 0 || skip_top <= ( int ) xs [ i ] [ j ] && ( int ) xs [ i ] [ j ] < num_words )
200- new_xs [ i , k ++ ] = ( int ) xs [ i ] [ j ] ;
210+ if ( xs_array [ i , j ] == 0 || skip_top <= xs_array [ i , j ] && xs_array [ i , j ] < num_words )
211+ new_xs_array [ i , k ++ ] = xs_array [ i , j ] ;
201212 }
202213 }
203- xs = new NDArray ( new_xs ) ;
214+ xs = new NDArray ( new_xs_array ) ;
204215 }
205216
206217 var idx = len ( x_train ) ;
0 commit comments