@@ -125,95 +125,93 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
125125 BLASFUNC (xerbla )(ERROR_NAME , & info , sizeof (ERROR_NAME ));
126126 return ;
127127 }
128+
128129#ifdef NEW_IMATCOPY
129- if (* lda == * ldb && * cols == * rows ) {
130+ if (* lda == * ldb ) {
130131 if ( order == BlasColMajor )
131132 {
132133
133134 if ( trans == BlasNoTrans )
134135 {
135136 IMATCOPY_K_CN (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda );
137+ return ;
136138 }
137139 if ( trans == BlasConj )
138140 {
139141 IMATCOPY_K_CNC (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda );
142+ return ;
140143 }
141- if ( trans == BlasTrans )
144+ if ( trans == BlasTrans && * rows == * cols )
142145 {
143146 IMATCOPY_K_CT (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda );
147+ return ;
144148 }
145- if ( trans == BlasTransConj )
149+ if ( trans == BlasTransConj && * rows == * cols )
146150 {
147151 IMATCOPY_K_CTC (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda );
152+ return ;
148153 }
154+
149155 }
150156 else
151157 {
152158
153159 if ( trans == BlasNoTrans )
154160 {
155161 IMATCOPY_K_RN (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda );
162+ return ;
156163 }
157164 if ( trans == BlasConj )
158165 {
159166 IMATCOPY_K_RNC (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda );
167+ return ;
160168 }
161- if ( trans == BlasTrans )
169+ if ( trans == BlasTrans && * rows == * cols )
162170 {
163171 IMATCOPY_K_RT (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda );
172+ return ;
164173 }
165- if ( trans == BlasTransConj )
174+ if ( trans == BlasTransConj && * rows == * cols )
166175 {
167176 IMATCOPY_K_RTC (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda );
177+ return ;
168178 }
179+
169180 }
170- return ;
171181 }
172182#endif
173183
174- if ( * lda > * ldb )
175- msize = (size_t )(* lda ) * (* ldb ) * sizeof (FLOAT ) * 2 ;
176- else
177- msize = (size_t )(* ldb ) * (* ldb ) * sizeof (FLOAT ) * 2 ;
178-
179- b = malloc (msize );
180- if ( b == NULL )
181- {
182- printf ("Memory alloc failed in zimatcopy\n" );
183- exit (1 );
184- }
184+ msize = (size_t )(* rows ) * (* cols ) * sizeof (FLOAT ) * 2 ;
185185
186+ b = malloc (msize );
187+ if ( b == NULL )
188+ {
189+ printf ("Memory alloc failed in zimatcopy\n" );
190+ exit (1 );
191+ }
186192
187193 if ( order == BlasColMajor )
188194 {
189195
190196 if ( trans == BlasNoTrans )
191197 {
192- OMATCOPY_K_CN (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * ldb );
193- OMATCOPY_K_CN (* rows , * cols , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * ldb , a , * ldb );
194- free (b );
195- return ;
198+ OMATCOPY_K_CN (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * rows );
199+ OMATCOPY_K_CN (* rows , * cols , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * rows , a , * ldb );
196200 }
197- if ( trans == BlasConj )
201+ else if ( trans == BlasConj )
198202 {
199- OMATCOPY_K_CNC (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * ldb );
200- OMATCOPY_K_CN (* rows , * cols , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * ldb , a , * ldb );
201- free (b );
202- return ;
203+ OMATCOPY_K_CNC (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * rows );
204+ OMATCOPY_K_CN (* rows , * cols , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * rows , a , * ldb );
203205 }
204- if ( trans == BlasTrans )
206+ else if ( trans == BlasTrans )
205207 {
206- OMATCOPY_K_CT (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * ldb );
207- OMATCOPY_K_CN (* cols , * rows , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * ldb , a , * ldb );
208- free (b );
209- return ;
208+ OMATCOPY_K_CT (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * cols );
209+ OMATCOPY_K_CN (* cols , * rows , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * cols , a , * ldb );
210210 }
211- if ( trans == BlasTransConj )
211+ else if ( trans == BlasTransConj )
212212 {
213- OMATCOPY_K_CTC (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * ldb );
214- OMATCOPY_K_CN (* cols , * rows , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * ldb , a , * ldb );
215- free (b );
216- return ;
213+ OMATCOPY_K_CTC (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * cols );
214+ OMATCOPY_K_CN (* cols , * rows , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * cols , a , * ldb );
217215 }
218216
219217 }
@@ -222,34 +220,27 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
222220
223221 if ( trans == BlasNoTrans )
224222 {
225- OMATCOPY_K_RN (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * ldb );
226- OMATCOPY_K_RN (* rows , * cols , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * ldb , a , * ldb );
227- free (b );
228- return ;
223+ OMATCOPY_K_RN (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * cols );
224+ OMATCOPY_K_RN (* rows , * cols , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * cols , a , * ldb );
229225 }
230- if ( trans == BlasConj )
226+ else if ( trans == BlasConj )
231227 {
232- OMATCOPY_K_RNC (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * ldb );
233- OMATCOPY_K_RN (* rows , * cols , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * ldb , a , * ldb );
234- free (b );
235- return ;
228+ OMATCOPY_K_RNC (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * cols );
229+ OMATCOPY_K_RN (* rows , * cols , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * cols , a , * ldb );
236230 }
237- if ( trans == BlasTrans )
231+ else if ( trans == BlasTrans )
238232 {
239- OMATCOPY_K_RT (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * ldb );
240- OMATCOPY_K_RN (* cols , * rows , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * ldb , a , * ldb );
241- free (b );
242- return ;
233+ OMATCOPY_K_RT (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * rows );
234+ OMATCOPY_K_RN (* cols , * rows , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * rows , a , * ldb );
243235 }
244- if ( trans == BlasTransConj )
236+ else if ( trans == BlasTransConj )
245237 {
246- OMATCOPY_K_RTC (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * ldb );
247- OMATCOPY_K_RN (* cols , * rows , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * ldb , a , * ldb );
248- free (b );
249- return ;
238+ OMATCOPY_K_RTC (* rows , * cols , alpha [0 ], alpha [1 ], a , * lda , b , * rows );
239+ OMATCOPY_K_RN (* cols , * rows , (FLOAT ) 1.0 , (FLOAT ) 0.0 , b , * rows , a , * ldb );
250240 }
251241
252242 }
243+
253244 free (b );
254245 return ;
255246
0 commit comments