[Lustre-discuss] Problems patching fs/jbd/checkpoint.c in RHEL4 2.6.9-67.0.4 kernel

Tue Mar 18 10:44:55 PDT 2008

My manual patching of the rejects in checkpoint.c didn't work out; a
delete of 10,000 files caused a panic (in any ext fs, not just
Lustre).

In the new checkpoint.c, two routines expected by the patch no longer
exist: __cleanup_transaction and __flush_buffer.

I can avoid the panic if I omit (don't try to manually patch) the following:

Index: linux-2.6.9/fs/jbd/checkpoint.c
===================================================================

--- linux-2.6.9.orig/fs/jbd/checkpoint.c
+++ linux-2.6.9/fs/jbd/checkpoint.c
@@ -166,6 +166,7 @@ static int __cleanup_transaction(journal
                        transaction_t *t = jh->b_transaction;
                        tid_t tid = t->t_tid;

+                       transaction->t_chp_stats.cs_forced_to_close++;
                        spin_unlock(&journal->j_list_lock);
                        jbd_unlock_bh_state(bh);
                        log_start_commit(journal, tid);
@@ -226,7 +227,7 @@ __flush_batch(journal_t *journal, struct
  */
 static int __flush_buffer(journal_t *journal, struct journal_head *jh,
                        struct buffer_head **bhs, int *batch_count,
-                       int *drop_count)
+                       int *drop_count, transaction_t *transaction)
 {
        struct buffer_head *bh = jh2bh(jh);
        int ret = 0;
@@ -247,6 +248,7 @@ static int __flush_buffer(journal_t *jou
                set_buffer_jwrite(bh);
                bhs[*batch_count] = bh;
                jbd_unlock_bh_state(bh);
+               transaction->t_chp_stats.cs_written++;
                (*batch_count)++;
                if (*batch_count == NR_BATCH) {
                        __flush_batch(journal, bhs, batch_count);
@@ -315,6 +317,8 @@ int log_do_checkpoint(journal_t *journal
                tid_t this_tid;

                transaction = journal->j_checkpoint_transactions;
+               if (transaction->t_chp_stats.cs_chp_time == 0)
+                       transaction->t_chp_stats.cs_chp_time = CURRENT_MSECS;
                this_tid = transaction->t_tid;
                jh = transaction->t_checkpoint_list;
                this_tid = transaction->t_tid;
                jh = transaction->t_checkpoint_list;
                last_jh = jh->b_cpprev;
@@ -331,7 +335,8 @@ int log_do_checkpoint(journal_t *journal
                                retry = 1;
                                break;
                        }
-                       retry = __flush_buffer(journal, jh, bhs,
&batch_count, &drop_count);
+                       retry = __flush_buffer(journal, jh, bhs, &batch_count,
+                                       &drop_count, transaction);
                } while (jh != last_jh && !retry);

                if (batch_count) {

How critical are/were these patches?

Thanks,

Chris