@@ -482,6 +482,8 @@ struct ring_buffer_per_cpu {
482482 unsigned long nr_pages ;
483483 unsigned int current_context ;
484484 struct list_head * pages ;
485+ /* pages generation counter, incremented when the list changes */
486+ unsigned long cnt ;
485487 struct buffer_page * head_page ; /* read from head */
486488 struct buffer_page * tail_page ; /* write to tail */
487489 struct buffer_page * commit_page ; /* committed pages */
@@ -1475,40 +1477,87 @@ static void rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
14751477 RB_WARN_ON (cpu_buffer , val & RB_FLAG_MASK );
14761478}
14771479
1480+ static bool rb_check_links (struct ring_buffer_per_cpu * cpu_buffer ,
1481+ struct list_head * list )
1482+ {
1483+ if (RB_WARN_ON (cpu_buffer ,
1484+ rb_list_head (rb_list_head (list -> next )-> prev ) != list ))
1485+ return false;
1486+
1487+ if (RB_WARN_ON (cpu_buffer ,
1488+ rb_list_head (rb_list_head (list -> prev )-> next ) != list ))
1489+ return false;
1490+
1491+ return true;
1492+ }
1493+
14781494/**
14791495 * rb_check_pages - integrity check of buffer pages
14801496 * @cpu_buffer: CPU buffer with pages to test
14811497 *
14821498 * As a safety measure we check to make sure the data pages have not
14831499 * been corrupted.
1484- *
1485- * Callers of this function need to guarantee that the list of pages doesn't get
1486- * modified during the check. In particular, if it's possible that the function
1487- * is invoked with concurrent readers which can swap in a new reader page then
1488- * the caller should take cpu_buffer->reader_lock.
14891500 */
14901501static void rb_check_pages (struct ring_buffer_per_cpu * cpu_buffer )
14911502{
1492- struct list_head * head = rb_list_head (cpu_buffer -> pages );
1493- struct list_head * tmp ;
1503+ struct list_head * head , * tmp ;
1504+ unsigned long buffer_cnt ;
1505+ unsigned long flags ;
1506+ int nr_loops = 0 ;
14941507
1495- if (RB_WARN_ON (cpu_buffer ,
1496- rb_list_head (rb_list_head (head -> next )-> prev ) != head ))
1508+ /*
1509+ * Walk the linked list underpinning the ring buffer and validate all
1510+ * its next and prev links.
1511+ *
1512+ * The check acquires the reader_lock to avoid concurrent processing
1513+ * with code that could be modifying the list. However, the lock cannot
1514+ * be held for the entire duration of the walk, as this would make the
1515+ * time when interrupts are disabled non-deterministic, dependent on the
1516+ * ring buffer size. Therefore, the code releases and re-acquires the
1517+ * lock after checking each page. The ring_buffer_per_cpu.cnt variable
1518+ * is then used to detect if the list was modified while the lock was
1519+ * not held, in which case the check needs to be restarted.
1520+ *
1521+ * The code attempts to perform the check at most three times before
1522+ * giving up. This is acceptable because this is only a self-validation
1523+ * to detect problems early on. In practice, the list modification
1524+ * operations are fairly spaced, and so this check typically succeeds at
1525+ * most on the second try.
1526+ */
1527+ again :
1528+ if (++ nr_loops > 3 )
14971529 return ;
14981530
1499- if (RB_WARN_ON (cpu_buffer ,
1500- rb_list_head (rb_list_head (head -> prev )-> next ) != head ))
1501- return ;
1531+ raw_spin_lock_irqsave (& cpu_buffer -> reader_lock , flags );
1532+ head = rb_list_head (cpu_buffer -> pages );
1533+ if (!rb_check_links (cpu_buffer , head ))
1534+ goto out_locked ;
1535+ buffer_cnt = cpu_buffer -> cnt ;
1536+ tmp = head ;
1537+ raw_spin_unlock_irqrestore (& cpu_buffer -> reader_lock , flags );
15021538
1503- for (tmp = rb_list_head (head -> next ); tmp != head ; tmp = rb_list_head (tmp -> next )) {
1504- if (RB_WARN_ON (cpu_buffer ,
1505- rb_list_head (rb_list_head (tmp -> next )-> prev ) != tmp ))
1506- return ;
1539+ while (true) {
1540+ raw_spin_lock_irqsave (& cpu_buffer -> reader_lock , flags );
15071541
1508- if (RB_WARN_ON (cpu_buffer ,
1509- rb_list_head (rb_list_head (tmp -> prev )-> next ) != tmp ))
1510- return ;
1542+ if (buffer_cnt != cpu_buffer -> cnt ) {
1543+ /* The list was updated, try again. */
1544+ raw_spin_unlock_irqrestore (& cpu_buffer -> reader_lock , flags );
1545+ goto again ;
1546+ }
1547+
1548+ tmp = rb_list_head (tmp -> next );
1549+ if (tmp == head )
1550+ /* The iteration circled back, all is done. */
1551+ goto out_locked ;
1552+
1553+ if (!rb_check_links (cpu_buffer , tmp ))
1554+ goto out_locked ;
1555+
1556+ raw_spin_unlock_irqrestore (& cpu_buffer -> reader_lock , flags );
15111557 }
1558+
1559+ out_locked :
1560+ raw_spin_unlock_irqrestore (& cpu_buffer -> reader_lock , flags );
15121561}
15131562
15141563/*
@@ -2384,9 +2433,9 @@ EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
23842433 * __ring_buffer_alloc_range - allocate a new ring_buffer from existing memory
23852434 * @size: the size in bytes per cpu that is needed.
23862435 * @flags: attributes to set for the ring buffer.
2436+ * @order: sub-buffer order
23872437 * @start: start of allocated range
23882438 * @range_size: size of allocated range
2389- * @order: sub-buffer order
23902439 * @key: ring buffer reader_lock_key.
23912440 *
23922441 * Currently the only flag that is available is the RB_FL_OVERWRITE
@@ -2532,6 +2581,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
25322581
25332582 /* make sure pages points to a valid page in the ring buffer */
25342583 cpu_buffer -> pages = next_page ;
2584+ cpu_buffer -> cnt ++ ;
25352585
25362586 /* update head page */
25372587 if (head_bit )
@@ -2638,6 +2688,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
26382688 * pointer to point to end of list
26392689 */
26402690 head_page -> prev = last_page ;
2691+ cpu_buffer -> cnt ++ ;
26412692 success = true;
26422693 break ;
26432694 }
@@ -2873,12 +2924,8 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
28732924 */
28742925 synchronize_rcu ();
28752926 for_each_buffer_cpu (buffer , cpu ) {
2876- unsigned long flags ;
2877-
28782927 cpu_buffer = buffer -> buffers [cpu ];
2879- raw_spin_lock_irqsave (& cpu_buffer -> reader_lock , flags );
28802928 rb_check_pages (cpu_buffer );
2881- raw_spin_unlock_irqrestore (& cpu_buffer -> reader_lock , flags );
28822929 }
28832930 atomic_dec (& buffer -> record_disabled );
28842931 }
@@ -4010,7 +4057,7 @@ static const char *show_irq_str(int bits)
40104057 return type [bits ];
40114058}
40124059
4013- /* Assume this is an trace event */
4060+ /* Assume this is a trace event */
40144061static const char * show_flags (struct ring_buffer_event * event )
40154062{
40164063 struct trace_entry * entry ;
@@ -5296,6 +5343,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
52965343 rb_list_head (reader -> list .next )-> prev = & cpu_buffer -> reader_page -> list ;
52975344 rb_inc_page (& cpu_buffer -> head_page );
52985345
5346+ cpu_buffer -> cnt ++ ;
52995347 local_inc (& cpu_buffer -> pages_read );
53005348
53015349 /* Finally update the reader page to the new head */
@@ -5835,12 +5883,9 @@ void
58355883ring_buffer_read_finish (struct ring_buffer_iter * iter )
58365884{
58375885 struct ring_buffer_per_cpu * cpu_buffer = iter -> cpu_buffer ;
5838- unsigned long flags ;
58395886
58405887 /* Use this opportunity to check the integrity of the ring buffer. */
5841- raw_spin_lock_irqsave (& cpu_buffer -> reader_lock , flags );
58425888 rb_check_pages (cpu_buffer );
5843- raw_spin_unlock_irqrestore (& cpu_buffer -> reader_lock , flags );
58445889
58455890 atomic_dec (& cpu_buffer -> resize_disabled );
58465891 kfree (iter -> event );
@@ -6757,6 +6802,7 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order)
67576802 /* Install the new pages, remove the head from the list */
67586803 cpu_buffer -> pages = cpu_buffer -> new_pages .next ;
67596804 list_del_init (& cpu_buffer -> new_pages );
6805+ cpu_buffer -> cnt ++ ;
67606806
67616807 cpu_buffer -> head_page
67626808 = list_entry (cpu_buffer -> pages , struct buffer_page , list );
0 commit comments