@@ -6078,204 +6078,6 @@ vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
60786078}
60796079
60806080
6081- /* Function vect_permute_store_chain.
6082-
6083- Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
6084- a power of 2 or equal to 3, generate interleave_high/low stmts to reorder
6085- the data correctly for the stores. Return the final references for stores
6086- in RESULT_CHAIN.
6087-
6088- E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
6089- The input is 4 vectors each containing 8 elements. We assign a number to
6090- each element, the input sequence is:
6091-
6092- 1st vec: 0 1 2 3 4 5 6 7
6093- 2nd vec: 8 9 10 11 12 13 14 15
6094- 3rd vec: 16 17 18 19 20 21 22 23
6095- 4th vec: 24 25 26 27 28 29 30 31
6096-
6097- The output sequence should be:
6098-
6099- 1st vec: 0 8 16 24 1 9 17 25
6100- 2nd vec: 2 10 18 26 3 11 19 27
6101- 3rd vec: 4 12 20 28 5 13 21 30
6102- 4th vec: 6 14 22 30 7 15 23 31
6103-
6104- i.e., we interleave the contents of the four vectors in their order.
6105-
6106- We use interleave_high/low instructions to create such output. The input of
6107- each interleave_high/low operation is two vectors:
6108- 1st vec 2nd vec
6109- 0 1 2 3 4 5 6 7
6110- the even elements of the result vector are obtained left-to-right from the
6111- high/low elements of the first vector. The odd elements of the result are
6112- obtained left-to-right from the high/low elements of the second vector.
6113- The output of interleave_high will be: 0 4 1 5
6114- and of interleave_low: 2 6 3 7
6115-
6116-
6117- The permutation is done in log LENGTH stages. In each stage interleave_high
6118- and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
6119- where the first argument is taken from the first half of DR_CHAIN and the
6120- second argument from it's second half.
6121- In our example,
6122-
6123- I1: interleave_high (1st vec, 3rd vec)
6124- I2: interleave_low (1st vec, 3rd vec)
6125- I3: interleave_high (2nd vec, 4th vec)
6126- I4: interleave_low (2nd vec, 4th vec)
6127-
6128- The output for the first stage is:
6129-
6130- I1: 0 16 1 17 2 18 3 19
6131- I2: 4 20 5 21 6 22 7 23
6132- I3: 8 24 9 25 10 26 11 27
6133- I4: 12 28 13 29 14 30 15 31
6134-
6135- The output of the second stage, i.e. the final result is:
6136-
6137- I1: 0 8 16 24 1 9 17 25
6138- I2: 2 10 18 26 3 11 19 27
6139- I3: 4 12 20 28 5 13 21 30
6140- I4: 6 14 22 30 7 15 23 31. */
6141-
6142- void
6143- vect_permute_store_chain (vec_info *vinfo, vec<tree> &dr_chain,
6144- unsigned int length,
6145- stmt_vec_info stmt_info,
6146- gimple_stmt_iterator *gsi,
6147- vec<tree> *result_chain)
6148- {
6149- tree vect1, vect2, high, low;
6150- gimple *perm_stmt;
6151- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6152- tree perm_mask_low, perm_mask_high;
6153- tree data_ref;
6154- tree perm3_mask_low, perm3_mask_high;
6155- unsigned int i, j, n, log_length = exact_log2 (length);
6156-
6157- result_chain->quick_grow (length);
6158- memcpy (result_chain->address (), dr_chain.address (),
6159- length * sizeof (tree));
6160-
6161- if (length == 3 )
6162- {
6163- /* vect_grouped_store_supported ensures that this is constant. */
6164- unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
6165- unsigned int j0 = 0 , j1 = 0 , j2 = 0 ;
6166-
6167- vec_perm_builder sel (nelt, nelt, 1 );
6168- sel.quick_grow (nelt);
6169- vec_perm_indices indices;
6170- for (j = 0 ; j < 3 ; j++)
6171- {
6172- int nelt0 = ((3 - j) * nelt) % 3 ;
6173- int nelt1 = ((3 - j) * nelt + 1 ) % 3 ;
6174- int nelt2 = ((3 - j) * nelt + 2 ) % 3 ;
6175-
6176- for (i = 0 ; i < nelt; i++)
6177- {
6178- if (3 * i + nelt0 < nelt)
6179- sel[3 * i + nelt0] = j0++;
6180- if (3 * i + nelt1 < nelt)
6181- sel[3 * i + nelt1] = nelt + j1++;
6182- if (3 * i + nelt2 < nelt)
6183- sel[3 * i + nelt2] = 0 ;
6184- }
6185- indices.new_vector (sel, 2 , nelt);
6186- perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
6187-
6188- for (i = 0 ; i < nelt; i++)
6189- {
6190- if (3 * i + nelt0 < nelt)
6191- sel[3 * i + nelt0] = 3 * i + nelt0;
6192- if (3 * i + nelt1 < nelt)
6193- sel[3 * i + nelt1] = 3 * i + nelt1;
6194- if (3 * i + nelt2 < nelt)
6195- sel[3 * i + nelt2] = nelt + j2++;
6196- }
6197- indices.new_vector (sel, 2 , nelt);
6198- perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
6199-
6200- vect1 = dr_chain[0 ];
6201- vect2 = dr_chain[1 ];
6202-
6203- /* Create interleaving stmt:
6204- low = VEC_PERM_EXPR <vect1, vect2,
6205- {j, nelt, *, j + 1, nelt + j + 1, *,
6206- j + 2, nelt + j + 2, *, ...}> */
6207- data_ref = make_temp_ssa_name (vectype, NULL , " vect_shuffle3_low" );
6208- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
6209- vect2, perm3_mask_low);
6210- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6211-
6212- vect1 = data_ref;
6213- vect2 = dr_chain[2 ];
6214- /* Create interleaving stmt:
6215- low = VEC_PERM_EXPR <vect1, vect2,
6216- {0, 1, nelt + j, 3, 4, nelt + j + 1,
6217- 6, 7, nelt + j + 2, ...}> */
6218- data_ref = make_temp_ssa_name (vectype, NULL , " vect_shuffle3_high" );
6219- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
6220- vect2, perm3_mask_high);
6221- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6222- (*result_chain)[j] = data_ref;
6223- }
6224- }
6225- else
6226- {
6227- /* If length is not equal to 3 then only power of 2 is supported. */
6228- gcc_assert (pow2p_hwi (length));
6229-
6230- /* The encoding has 2 interleaved stepped patterns. */
6231- poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
6232- vec_perm_builder sel (nelt, 2 , 3 );
6233- sel.quick_grow (6 );
6234- for (i = 0 ; i < 3 ; i++)
6235- {
6236- sel[i * 2 ] = i;
6237- sel[i * 2 + 1 ] = i + nelt;
6238- }
6239- vec_perm_indices indices (sel, 2 , nelt);
6240- perm_mask_high = vect_gen_perm_mask_checked (vectype, indices);
6241-
6242- for (i = 0 ; i < 6 ; i++)
6243- sel[i] += exact_div (nelt, 2 );
6244- indices.new_vector (sel, 2 , nelt);
6245- perm_mask_low = vect_gen_perm_mask_checked (vectype, indices);
6246-
6247- for (i = 0 , n = log_length; i < n; i++)
6248- {
6249- for (j = 0 ; j < length/2 ; j++)
6250- {
6251- vect1 = dr_chain[j];
6252- vect2 = dr_chain[j+length/2 ];
6253-
6254- /* Create interleaving stmt:
6255- high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1,
6256- ...}> */
6257- high = make_temp_ssa_name (vectype, NULL , " vect_inter_high" );
6258- perm_stmt = gimple_build_assign (high, VEC_PERM_EXPR, vect1,
6259- vect2, perm_mask_high);
6260- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6261- (*result_chain)[2 *j] = high;
6262-
6263- /* Create interleaving stmt:
6264- low = VEC_PERM_EXPR <vect1, vect2,
6265- {nelt/2, nelt*3/2, nelt/2+1, nelt*3/2+1,
6266- ...}> */
6267- low = make_temp_ssa_name (vectype, NULL , " vect_inter_low" );
6268- perm_stmt = gimple_build_assign (low, VEC_PERM_EXPR, vect1,
6269- vect2, perm_mask_low);
6270- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6271- (*result_chain)[2 *j+1 ] = low;
6272- }
6273- memcpy (dr_chain.address (), result_chain->address (),
6274- length * sizeof (tree));
6275- }
6276- }
6277- }
6278-
62796081/* Function vect_setup_realignment
62806082
62816083 This function is called when vectorizing an unaligned load using
0 commit comments