@@ -156,8 +156,8 @@ perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
156156 * count to the generic event atomically:
157157 */
158158 prev_raw_count = local64_read (& hwc -> prev_count );
159- if (local64_cmpxchg (& hwc -> prev_count , prev_raw_count ,
160- new_raw_count ) != prev_raw_count )
159+ if (! local64_try_cmpxchg (& hwc -> prev_count ,
160+ & prev_raw_count , new_raw_count ) )
161161 return 0 ;
162162
163163 /*
@@ -247,11 +247,33 @@ int forward_event_to_ibs(struct perf_event *event)
247247 return - ENOENT ;
248248}
249249
250+ /*
251+ * Grouping of IBS events is not possible since IBS can have only
252+ * one event active at any point in time.
253+ */
254+ static int validate_group (struct perf_event * event )
255+ {
256+ struct perf_event * sibling ;
257+
258+ if (event -> group_leader == event )
259+ return 0 ;
260+
261+ if (event -> group_leader -> pmu == event -> pmu )
262+ return - EINVAL ;
263+
264+ for_each_sibling_event (sibling , event -> group_leader ) {
265+ if (sibling -> pmu == event -> pmu )
266+ return - EINVAL ;
267+ }
268+ return 0 ;
269+ }
270+
250271static int perf_ibs_init (struct perf_event * event )
251272{
252273 struct hw_perf_event * hwc = & event -> hw ;
253274 struct perf_ibs * perf_ibs ;
254275 u64 max_cnt , config ;
276+ int ret ;
255277
256278 perf_ibs = get_ibs_pmu (event -> attr .type );
257279 if (!perf_ibs )
@@ -265,6 +287,10 @@ static int perf_ibs_init(struct perf_event *event)
265287 if (config & ~perf_ibs -> config_mask )
266288 return - EINVAL ;
267289
290+ ret = validate_group (event );
291+ if (ret )
292+ return ret ;
293+
268294 if (hwc -> sample_period ) {
269295 if (config & perf_ibs -> cnt_mask )
270296 /* raw max_cnt may not be set */
@@ -702,38 +728,63 @@ static u8 perf_ibs_data_src(union ibs_op_data2 *op_data2)
702728 return op_data2 -> data_src_lo ;
703729}
704730
705- static void perf_ibs_get_mem_lvl (union ibs_op_data2 * op_data2 ,
706- union ibs_op_data3 * op_data3 ,
707- struct perf_sample_data * data )
731+ #define L (x ) (PERF_MEM_S(LVL, x) | PERF_MEM_S(LVL, HIT))
732+ #define LN (x ) PERF_MEM_S(LVLNUM, x)
733+ #define REM PERF_MEM_S(REMOTE, REMOTE)
734+ #define HOPS (x ) PERF_MEM_S(HOPS, x)
735+
736+ static u64 g_data_src [8 ] = {
737+ [IBS_DATA_SRC_LOC_CACHE ] = L (L3 ) | L (REM_CCE1 ) | LN (ANY_CACHE ) | HOPS (0 ),
738+ [IBS_DATA_SRC_DRAM ] = L (LOC_RAM ) | LN (RAM ),
739+ [IBS_DATA_SRC_REM_CACHE ] = L (REM_CCE2 ) | LN (ANY_CACHE ) | REM | HOPS (1 ),
740+ [IBS_DATA_SRC_IO ] = L (IO ) | LN (IO ),
741+ };
742+
743+ #define RMT_NODE_BITS (1 << IBS_DATA_SRC_DRAM)
744+ #define RMT_NODE_APPLICABLE (x ) (RMT_NODE_BITS & (1 << x))
745+
746+ static u64 g_zen4_data_src [32 ] = {
747+ [IBS_DATA_SRC_EXT_LOC_CACHE ] = L (L3 ) | LN (L3 ),
748+ [IBS_DATA_SRC_EXT_NEAR_CCX_CACHE ] = L (REM_CCE1 ) | LN (ANY_CACHE ) | REM | HOPS (0 ),
749+ [IBS_DATA_SRC_EXT_DRAM ] = L (LOC_RAM ) | LN (RAM ),
750+ [IBS_DATA_SRC_EXT_FAR_CCX_CACHE ] = L (REM_CCE2 ) | LN (ANY_CACHE ) | REM | HOPS (1 ),
751+ [IBS_DATA_SRC_EXT_PMEM ] = LN (PMEM ),
752+ [IBS_DATA_SRC_EXT_IO ] = L (IO ) | LN (IO ),
753+ [IBS_DATA_SRC_EXT_EXT_MEM ] = LN (CXL ),
754+ };
755+
756+ #define ZEN4_RMT_NODE_BITS ((1 << IBS_DATA_SRC_EXT_DRAM) | \
757+ (1 << IBS_DATA_SRC_EXT_PMEM) | \
758+ (1 << IBS_DATA_SRC_EXT_EXT_MEM))
759+ #define ZEN4_RMT_NODE_APPLICABLE (x ) (ZEN4_RMT_NODE_BITS & (1 << x))
760+
761+ static __u64 perf_ibs_get_mem_lvl (union ibs_op_data2 * op_data2 ,
762+ union ibs_op_data3 * op_data3 ,
763+ struct perf_sample_data * data )
708764{
709765 union perf_mem_data_src * data_src = & data -> data_src ;
710766 u8 ibs_data_src = perf_ibs_data_src (op_data2 );
711767
712768 data_src -> mem_lvl = 0 ;
769+ data_src -> mem_lvl_num = 0 ;
713770
714771 /*
715772 * DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached
716773 * memory accesses. So, check DcUcMemAcc bit early.
717774 */
718- if (op_data3 -> dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO ) {
719- data_src -> mem_lvl = PERF_MEM_LVL_UNC | PERF_MEM_LVL_HIT ;
720- return ;
721- }
775+ if (op_data3 -> dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO )
776+ return L (UNC ) | LN (UNC );
722777
723778 /* L1 Hit */
724- if (op_data3 -> dc_miss == 0 ) {
725- data_src -> mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT ;
726- return ;
727- }
779+ if (op_data3 -> dc_miss == 0 )
780+ return L (L1 ) | LN (L1 );
728781
729782 /* L2 Hit */
730783 if (op_data3 -> l2_miss == 0 ) {
731784 /* Erratum #1293 */
732785 if (boot_cpu_data .x86 != 0x19 || boot_cpu_data .x86_model > 0xF ||
733- !(op_data3 -> sw_pf || op_data3 -> dc_miss_no_mab_alloc )) {
734- data_src -> mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT ;
735- return ;
736- }
786+ !(op_data3 -> sw_pf || op_data3 -> dc_miss_no_mab_alloc ))
787+ return L (L2 ) | LN (L2 );
737788 }
738789
739790 /*
@@ -743,82 +794,36 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
743794 if (data_src -> mem_op != PERF_MEM_OP_LOAD )
744795 goto check_mab ;
745796
746- /* L3 Hit */
747797 if (ibs_caps & IBS_CAPS_ZEN4 ) {
748- if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE ) {
749- data_src -> mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT ;
750- return ;
751- }
752- } else {
753- if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE ) {
754- data_src -> mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_REM_CCE1 |
755- PERF_MEM_LVL_HIT ;
756- return ;
757- }
758- }
798+ u64 val = g_zen4_data_src [ibs_data_src ];
759799
760- /* A peer cache in a near CCX */
761- if (ibs_caps & IBS_CAPS_ZEN4 &&
762- ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE ) {
763- data_src -> mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT ;
764- return ;
765- }
800+ if (!val )
801+ goto check_mab ;
766802
767- /* A peer cache in a far CCX */
768- if (ibs_caps & IBS_CAPS_ZEN4 ) {
769- if (ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE ) {
770- data_src -> mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT ;
771- return ;
803+ /* HOPS_1 because IBS doesn't provide remote socket detail */
804+ if (op_data2 -> rmt_node && ZEN4_RMT_NODE_APPLICABLE (ibs_data_src )) {
805+ if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM )
806+ val = L (REM_RAM1 ) | LN (RAM ) | REM | HOPS (1 );
807+ else
808+ val |= REM | HOPS (1 );
772809 }
773- } else {
774- if (ibs_data_src == IBS_DATA_SRC_REM_CACHE ) {
775- data_src -> mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT ;
776- return ;
777- }
778- }
779810
780- /* DRAM */
781- if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM ) {
782- if (op_data2 -> rmt_node == 0 )
783- data_src -> mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT ;
784- else
785- data_src -> mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT ;
786- return ;
787- }
811+ return val ;
812+ } else {
813+ u64 val = g_data_src [ibs_data_src ];
788814
789- /* PMEM */
790- if (ibs_caps & IBS_CAPS_ZEN4 && ibs_data_src == IBS_DATA_SRC_EXT_PMEM ) {
791- data_src -> mem_lvl_num = PERF_MEM_LVLNUM_PMEM ;
792- if (op_data2 -> rmt_node ) {
793- data_src -> mem_remote = PERF_MEM_REMOTE_REMOTE ;
794- /* IBS doesn't provide Remote socket detail */
795- data_src -> mem_hops = PERF_MEM_HOPS_1 ;
796- }
797- return ;
798- }
815+ if (!val )
816+ goto check_mab ;
799817
800- /* Extension Memory */
801- if (ibs_caps & IBS_CAPS_ZEN4 &&
802- ibs_data_src == IBS_DATA_SRC_EXT_EXT_MEM ) {
803- data_src -> mem_lvl_num = PERF_MEM_LVLNUM_CXL ;
804- if (op_data2 -> rmt_node ) {
805- data_src -> mem_remote = PERF_MEM_REMOTE_REMOTE ;
806- /* IBS doesn't provide Remote socket detail */
807- data_src -> mem_hops = PERF_MEM_HOPS_1 ;
818+ /* HOPS_1 because IBS doesn't provide remote socket detail */
819+ if (op_data2 -> rmt_node && RMT_NODE_APPLICABLE (ibs_data_src )) {
820+ if (ibs_data_src == IBS_DATA_SRC_DRAM )
821+ val = L (REM_RAM1 ) | LN (RAM ) | REM | HOPS (1 );
822+ else
823+ val |= REM | HOPS (1 );
808824 }
809- return ;
810- }
811825
812- /* IO */
813- if (ibs_data_src == IBS_DATA_SRC_EXT_IO ) {
814- data_src -> mem_lvl = PERF_MEM_LVL_IO ;
815- data_src -> mem_lvl_num = PERF_MEM_LVLNUM_IO ;
816- if (op_data2 -> rmt_node ) {
817- data_src -> mem_remote = PERF_MEM_REMOTE_REMOTE ;
818- /* IBS doesn't provide Remote socket detail */
819- data_src -> mem_hops = PERF_MEM_HOPS_1 ;
820- }
821- return ;
826+ return val ;
822827 }
823828
824829check_mab :
@@ -829,12 +834,11 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
829834 * DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set
830835 * MAB only when IBS fails to provide DataSrc.
831836 */
832- if (op_data3 -> dc_miss_no_mab_alloc ) {
833- data_src -> mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT ;
834- return ;
835- }
837+ if (op_data3 -> dc_miss_no_mab_alloc )
838+ return L (LFB ) | LN (LFB );
836839
837- data_src -> mem_lvl = PERF_MEM_LVL_NA ;
840+ /* Don't set HIT with NA */
841+ return PERF_MEM_S (LVL , NA ) | LN (NA );
838842}
839843
840844static bool perf_ibs_cache_hit_st_valid (void )
@@ -924,7 +928,9 @@ static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data,
924928 union ibs_op_data2 * op_data2 ,
925929 union ibs_op_data3 * op_data3 )
926930{
927- perf_ibs_get_mem_lvl (op_data2 , op_data3 , data );
931+ union perf_mem_data_src * data_src = & data -> data_src ;
932+
933+ data_src -> val |= perf_ibs_get_mem_lvl (op_data2 , op_data3 , data );
928934 perf_ibs_get_mem_snoop (op_data2 , data );
929935 perf_ibs_get_tlb_lvl (op_data3 , data );
930936 perf_ibs_get_mem_lock (op_data3 , data );
0 commit comments