--- ../mysql-5.0.27-orig/innobase/btr/btr0cur.c 2006-10-20 17:22:28.000000000 -0700 +++ innobase/btr/btr0cur.c 2007-03-02 10:57:56.673009568 -0800 @@ -54,7 +54,9 @@ /* When estimating number of different kay values in an index sample this many index pages */ -#define BTR_KEY_VAL_ESTIMATE_N_PAGES 8 +#define BTR_KEY_VAL_ESTIMATE_N_PAGES_MIN 8 +#define BTR_KEY_VAL_ESTIMATE_N_PAGES_MAX 40 +#define BTR_KEY_VAL_ESTIMATE_MARGIN 20 /* Percent */ /* The structure of a BLOB part header */ /*--------------------------------------*/ @@ -2834,11 +2836,13 @@ ulint matched_fields; ulint matched_bytes; ib_longlong* n_diff; + ib_longlong last_n_diff; ulint not_empty_flag = 0; ulint total_external_size = 0; ulint i; ulint j; ulint add_on; + ulint btr_key_val_estimate_n_pages; mtr_t mtr; mem_heap_t* heap = NULL; ulint offsets_rec_[REC_OFFS_NORMAL_SIZE]; @@ -2857,7 +2861,7 @@ /* We sample some pages in the index to get an estimate */ - for (i = 0; i < BTR_KEY_VAL_ESTIMATE_N_PAGES; i++) { + for (i = 0; i < BTR_KEY_VAL_ESTIMATE_N_PAGES_MAX; i++) { rec_t* supremum; mtr_start(&mtr); @@ -2944,10 +2948,27 @@ btr_rec_get_externally_stored_len(rec, offsets_rec); mtr_commit(&mtr); + + /* At some point, we have checked enough pages so we can now + test for the delta cardinality as a percentage. + We look at the last column in the index for the + overall cardinality of the index. + */ + if (i >= BTR_KEY_VAL_ESTIMATE_N_PAGES_MIN + && n_diff[n_cols] /*No divide by zero! */ + && (( n_diff[n_cols] - last_n_diff ) / n_diff[n_cols] * 100 ) < BTR_KEY_VAL_ESTIMATE_MARGIN + ) { + break; + } + + last_n_diff = n_diff[n_cols]; } + /* Remember How Far We Got */ + btr_key_val_estimate_n_pages = i; + /* If we saw k borders between different key values on - BTR_KEY_VAL_ESTIMATE_N_PAGES leaf pages, we can estimate how many + btr_key_val_estimate_n_pages leaf pages, we can estimate how many there will be in index->stat_n_leaf_pages */ /* We must take into account that our sample actually represents @@ -2958,25 +2979,25 @@ index->stat_n_diff_key_vals[j] = (n_diff[j] * (ib_longlong)index->stat_n_leaf_pages - + BTR_KEY_VAL_ESTIMATE_N_PAGES - 1 + + btr_key_val_estimate_n_pages - 1 + total_external_size + not_empty_flag) - / (BTR_KEY_VAL_ESTIMATE_N_PAGES + / (btr_key_val_estimate_n_pages + total_external_size); /* If the tree is small, smaller than < - 10 * BTR_KEY_VAL_ESTIMATE_N_PAGES + total_external_size, then + 10 * btr_key_val_estimate_n_pages + total_external_size, then the above estimate is ok. For bigger trees it is common that we do not see any borders between key values in the few pages - we pick. But still there may be BTR_KEY_VAL_ESTIMATE_N_PAGES + we pick. But still there may be btr_key_val_estimate_n_pages different key values, or even more. Let us try to approximate that: */ add_on = index->stat_n_leaf_pages / - (10 * (BTR_KEY_VAL_ESTIMATE_N_PAGES + total_external_size)); + (10 * (btr_key_val_estimate_n_pages + total_external_size)); - if (add_on > BTR_KEY_VAL_ESTIMATE_N_PAGES) { - add_on = BTR_KEY_VAL_ESTIMATE_N_PAGES; + if (add_on > btr_key_val_estimate_n_pages) { + add_on = btr_key_val_estimate_n_pages; } index->stat_n_diff_key_vals[j] += add_on;