@@ -176,6 +176,28 @@ struct psi_group psi_system = {
176176 .pcpu = & system_group_pcpu ,
177177};
178178
179+ static DEFINE_PER_CPU (seqcount_t , psi_seq ) ;
180+
181+ static inline void psi_write_begin (int cpu )
182+ {
183+ write_seqcount_begin (per_cpu_ptr (& psi_seq , cpu ));
184+ }
185+
186+ static inline void psi_write_end (int cpu )
187+ {
188+ write_seqcount_end (per_cpu_ptr (& psi_seq , cpu ));
189+ }
190+
191+ static inline u32 psi_read_begin (int cpu )
192+ {
193+ return read_seqcount_begin (per_cpu_ptr (& psi_seq , cpu ));
194+ }
195+
196+ static inline bool psi_read_retry (int cpu , u32 seq )
197+ {
198+ return read_seqcount_retry (per_cpu_ptr (& psi_seq , cpu ), seq );
199+ }
200+
179201static void psi_avgs_work (struct work_struct * work );
180202
181203static void poll_timer_fn (struct timer_list * t );
@@ -186,7 +208,7 @@ static void group_init(struct psi_group *group)
186208
187209 group -> enabled = true;
188210 for_each_possible_cpu (cpu )
189- seqcount_init (& per_cpu_ptr (group -> pcpu , cpu )-> seq );
211+ seqcount_init (per_cpu_ptr (& psi_seq , cpu ));
190212 group -> avg_last_update = sched_clock ();
191213 group -> avg_next_update = group -> avg_last_update + psi_period ;
192214 mutex_init (& group -> avgs_lock );
@@ -266,14 +288,14 @@ static void get_recent_times(struct psi_group *group, int cpu,
266288
267289 /* Snapshot a coherent view of the CPU state */
268290 do {
269- seq = read_seqcount_begin ( & groupc -> seq );
291+ seq = psi_read_begin ( cpu );
270292 now = cpu_clock (cpu );
271293 memcpy (times , groupc -> times , sizeof (groupc -> times ));
272294 state_mask = groupc -> state_mask ;
273295 state_start = groupc -> state_start ;
274296 if (cpu == current_cpu )
275297 memcpy (tasks , groupc -> tasks , sizeof (groupc -> tasks ));
276- } while (read_seqcount_retry ( & groupc -> seq , seq ));
298+ } while (psi_read_retry ( cpu , seq ));
277299
278300 /* Calculate state time deltas against the previous snapshot */
279301 for (s = 0 ; s < NR_PSI_STATES ; s ++ ) {
@@ -772,30 +794,20 @@ static void record_times(struct psi_group_cpu *groupc, u64 now)
772794 groupc -> times [PSI_NONIDLE ] += delta ;
773795}
774796
797+ #define for_each_group (iter , group ) \
798+ for (typeof(group) iter = group; iter; iter = iter->parent)
799+
775800static void psi_group_change (struct psi_group * group , int cpu ,
776801 unsigned int clear , unsigned int set ,
777- bool wake_clock )
802+ u64 now , bool wake_clock )
778803{
779804 struct psi_group_cpu * groupc ;
780805 unsigned int t , m ;
781806 u32 state_mask ;
782- u64 now ;
783807
784808 lockdep_assert_rq_held (cpu_rq (cpu ));
785809 groupc = per_cpu_ptr (group -> pcpu , cpu );
786810
787- /*
788- * First we update the task counts according to the state
789- * change requested through the @clear and @set bits.
790- *
791- * Then if the cgroup PSI stats accounting enabled, we
792- * assess the aggregate resource states this CPU's tasks
793- * have been in since the last change, and account any
794- * SOME and FULL time these may have resulted in.
795- */
796- write_seqcount_begin (& groupc -> seq );
797- now = cpu_clock (cpu );
798-
799811 /*
800812 * Start with TSK_ONCPU, which doesn't have a corresponding
801813 * task count - it's just a boolean flag directly encoded in
@@ -847,7 +859,6 @@ static void psi_group_change(struct psi_group *group, int cpu,
847859
848860 groupc -> state_mask = state_mask ;
849861
850- write_seqcount_end (& groupc -> seq );
851862 return ;
852863 }
853864
@@ -868,8 +879,6 @@ static void psi_group_change(struct psi_group *group, int cpu,
868879
869880 groupc -> state_mask = state_mask ;
870881
871- write_seqcount_end (& groupc -> seq );
872-
873882 if (state_mask & group -> rtpoll_states )
874883 psi_schedule_rtpoll_work (group , 1 , false);
875884
@@ -904,24 +913,29 @@ static void psi_flags_change(struct task_struct *task, int clear, int set)
904913void psi_task_change (struct task_struct * task , int clear , int set )
905914{
906915 int cpu = task_cpu (task );
907- struct psi_group * group ;
916+ u64 now ;
908917
909918 if (!task -> pid )
910919 return ;
911920
912921 psi_flags_change (task , clear , set );
913922
914- group = task_psi_group (task );
915- do {
916- psi_group_change (group , cpu , clear , set , true);
917- } while ((group = group -> parent ));
923+ psi_write_begin (cpu );
924+ now = cpu_clock (cpu );
925+ for_each_group (group , task_psi_group (task ))
926+ psi_group_change (group , cpu , clear , set , now , true);
927+ psi_write_end (cpu );
918928}
919929
920930void psi_task_switch (struct task_struct * prev , struct task_struct * next ,
921931 bool sleep )
922932{
923- struct psi_group * group , * common = NULL ;
933+ struct psi_group * common = NULL ;
924934 int cpu = task_cpu (prev );
935+ u64 now ;
936+
937+ psi_write_begin (cpu );
938+ now = cpu_clock (cpu );
925939
926940 if (next -> pid ) {
927941 psi_flags_change (next , 0 , TSK_ONCPU );
@@ -930,16 +944,15 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
930944 * ancestors with @prev, those will already have @prev's
931945 * TSK_ONCPU bit set, and we can stop the iteration there.
932946 */
933- group = task_psi_group (next );
934- do {
935- if ( per_cpu_ptr ( group -> pcpu , cpu ) -> state_mask &
936- PSI_ONCPU ) {
947+ for_each_group ( group , task_psi_group (next )) {
948+ struct psi_group_cpu * groupc = per_cpu_ptr ( group -> pcpu , cpu );
949+
950+ if ( groupc -> state_mask & PSI_ONCPU ) {
937951 common = group ;
938952 break ;
939953 }
940-
941- psi_group_change (group , cpu , 0 , TSK_ONCPU , true);
942- } while ((group = group -> parent ));
954+ psi_group_change (group , cpu , 0 , TSK_ONCPU , now , true);
955+ }
943956 }
944957
945958 if (prev -> pid ) {
@@ -972,12 +985,11 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
972985
973986 psi_flags_change (prev , clear , set );
974987
975- group = task_psi_group (prev );
976- do {
988+ for_each_group (group , task_psi_group (prev )) {
977989 if (group == common )
978990 break ;
979- psi_group_change (group , cpu , clear , set , wake_clock );
980- } while (( group = group -> parent ));
991+ psi_group_change (group , cpu , clear , set , now , wake_clock );
992+ }
981993
982994 /*
983995 * TSK_ONCPU is handled up to the common ancestor. If there are
@@ -987,20 +999,21 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
987999 */
9881000 if ((prev -> psi_flags ^ next -> psi_flags ) & ~TSK_ONCPU ) {
9891001 clear &= ~TSK_ONCPU ;
990- for (; group ; group = group -> parent )
991- psi_group_change (group , cpu , clear , set , wake_clock );
1002+ for_each_group ( group , common )
1003+ psi_group_change (group , cpu , clear , set , now , wake_clock );
9921004 }
9931005 }
1006+ psi_write_end (cpu );
9941007}
9951008
9961009#ifdef CONFIG_IRQ_TIME_ACCOUNTING
9971010void psi_account_irqtime (struct rq * rq , struct task_struct * curr , struct task_struct * prev )
9981011{
9991012 int cpu = task_cpu (curr );
1000- struct psi_group * group ;
10011013 struct psi_group_cpu * groupc ;
10021014 s64 delta ;
10031015 u64 irq ;
1016+ u64 now ;
10041017
10051018 if (static_branch_likely (& psi_disabled ) || !irqtime_enabled ())
10061019 return ;
@@ -1009,8 +1022,7 @@ void psi_account_irqtime(struct rq *rq, struct task_struct *curr, struct task_st
10091022 return ;
10101023
10111024 lockdep_assert_rq_held (rq );
1012- group = task_psi_group (curr );
1013- if (prev && task_psi_group (prev ) == group )
1025+ if (prev && task_psi_group (prev ) == task_psi_group (curr ))
10141026 return ;
10151027
10161028 irq = irq_time_read (cpu );
@@ -1019,25 +1031,22 @@ void psi_account_irqtime(struct rq *rq, struct task_struct *curr, struct task_st
10191031 return ;
10201032 rq -> psi_irq_time = irq ;
10211033
1022- do {
1023- u64 now ;
1034+ psi_write_begin ( cpu );
1035+ now = cpu_clock ( cpu ) ;
10241036
1037+ for_each_group (group , task_psi_group (curr )) {
10251038 if (!group -> enabled )
10261039 continue ;
10271040
10281041 groupc = per_cpu_ptr (group -> pcpu , cpu );
10291042
1030- write_seqcount_begin (& groupc -> seq );
1031- now = cpu_clock (cpu );
1032-
10331043 record_times (groupc , now );
10341044 groupc -> times [PSI_IRQ_FULL ] += delta ;
10351045
1036- write_seqcount_end (& groupc -> seq );
1037-
10381046 if (group -> rtpoll_states & (1 << PSI_IRQ_FULL ))
10391047 psi_schedule_rtpoll_work (group , 1 , false);
1040- } while ((group = group -> parent ));
1048+ }
1049+ psi_write_end (cpu );
10411050}
10421051#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
10431052
@@ -1225,12 +1234,14 @@ void psi_cgroup_restart(struct psi_group *group)
12251234 return ;
12261235
12271236 for_each_possible_cpu (cpu ) {
1228- struct rq * rq = cpu_rq (cpu );
1229- struct rq_flags rf ;
1237+ u64 now ;
12301238
1231- rq_lock_irq (rq , & rf );
1232- psi_group_change (group , cpu , 0 , 0 , true);
1233- rq_unlock_irq (rq , & rf );
1239+ guard (rq_lock_irq )(cpu_rq (cpu ));
1240+
1241+ psi_write_begin (cpu );
1242+ now = cpu_clock (cpu );
1243+ psi_group_change (group , cpu , 0 , 0 , now , true);
1244+ psi_write_end (cpu );
12341245 }
12351246}
12361247#endif /* CONFIG_CGROUPS */
0 commit comments