diff --git a/sql/field.cc b/sql/field.cc index 201f8a62cbc..03d4d89ec79 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -10523,6 +10523,67 @@ void Field::init(TABLE *table_arg) { table_name = &table_arg->alias; } +Use_field::Use_field(Field *field) : m_field(field) { + m_field->backup_field(&m_ptr, &m_null_ptr, &m_null_bit); + // Field_typed_array is accessed through its conversion field. + assert(!m_field->is_array()); + switch (field->type()) { + case MYSQL_TYPE_VARCHAR: + u.length_bytes = field->get_length_bytes(); + ((Field_varstring *)m_field)->length_bytes = HA_KEY_BLOB_LENGTH; + break; + case MYSQL_TYPE_BIT: + u.bit_len = ((Field_bit *)m_field)->bit_len; + // All bits are in the data storage. There is no uneven bits. + ((Field_bit *)m_field)->bit_len = 0; + break; + default: + break; + } +} + +Use_field::~Use_field() { + m_field->move_field(m_ptr, m_null_ptr, m_null_bit); + switch (m_field->type()) { + case MYSQL_TYPE_VARCHAR: + ((Field_varstring *)m_field)->length_bytes = u.length_bytes; + break; + case MYSQL_TYPE_BIT: + ((Field_bit *)m_field)->bit_len = u.bit_len; + break; + default: + break; + } +} + +void Use_field::move_field(uchar *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg) { + switch(m_field->type()) { + case MYSQL_TYPE_TINY_BLOB: + case MYSQL_TYPE_BLOB: + case MYSQL_TYPE_MEDIUM_BLOB: + case MYSQL_TYPE_VECTOR: + case MYSQL_TYPE_LONG_BLOB: + case MYSQL_TYPE_GEOMETRY: + case MYSQL_TYPE_JSON: { + /* + Craft a blob head to use keypart-value-bytes as overflow area. + blob-head: length_bytes overflow_pointer + keypart-value-bytes: value_length value_bytes + See also Field_blob::get_key_image(). + */ + uint16 value_length = uint2korr(ptr_arg); + uint32 pack_length = ((Field_blob *)m_field)->pack_length_no_ptr(); + store_blob_length(u.blob_head, pack_length, value_length); + *(uchar **)(u.blob_head + pack_length) = ptr_arg; + m_field->move_field(u.blob_head, null_ptr_arg, null_bit_arg); + break; + } + default: + m_field->move_field(ptr_arg, null_ptr_arg, null_bit_arg); + break; + } +} + // Byteswaps and/or truncates int16 values; used for both pack() and unpack(). static inline void handle_int16(uchar *to, const uchar *from, size_t max_length, bool low_byte_first_from, diff --git a/sql/field.h b/sql/field.h index a6f3bf23b75..e0c484c9ae6 100644 --- a/sql/field.h +++ b/sql/field.h @@ -1411,6 +1411,12 @@ class Field { null_bit = null_bit_arg; } + void backup_field(uchar **old_ptr, uchar **old_null_ptr, uchar *old_null_bit) { + *old_ptr = ptr; + *old_null_ptr = m_null_ptr; + *old_null_bit = null_bit; + } + virtual void move_field_offset(ptrdiff_t ptr_diff) { ptr += ptr_diff; if (is_nullable()) m_null_ptr += ptr_diff; @@ -3593,6 +3599,7 @@ class Field_varstring : public Field_longstr { uint32 get_length_bytes() const override { return length_bytes; } private: + friend class Use_field; /* Store number of bytes used to store length (1 or 2) */ uint32 length_bytes; @@ -4767,6 +4774,44 @@ class Copy_field { Field *to_field() const { return m_to_field; } }; +/// RAII class to read data in KeyTupleFormat through a wrapper field. +class Use_field { + public: + /** + Constructor for Use_field. + + @param field The wrapper field. + */ + Use_field(Field *field); + + ~Use_field(); + + /** + Move the wrapper field to use given data storage and null bits storage. + + @param ptr_arg Pointer to a data storage in keypart-value-bytes format. + @param null_ptr_arg Pointer to a temporary null bits storage. + @param null_bit_arg The content of the null bits storage. + */ + void move_field(uchar *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg); + + private: + // The wrapper field. + Field *m_field; + union { + // Temporary blob head with key image as overflow storage. + uchar blob_head[4 + portable_sizeof_char_ptr]; + // Saved length_bytes for Field_varstring. + uint32 length_bytes; + // Saved bit_len for Field_bit. + uint bit_len; + } u; + // Saved ptrs and null bits storage. + uchar *m_ptr; + uchar *m_null_ptr; + uchar m_null_bit; +}; + enum_field_types get_blob_type_from_length(size_t length); size_t calc_pack_length(enum_field_types type, size_t length); diff --git a/sql/histograms/histogram.cc b/sql/histograms/histogram.cc index c470feec5c5..c9a163a82c0 100644 --- a/sql/histograms/histogram.cc +++ b/sql/histograms/histogram.cc @@ -79,6 +79,7 @@ #include "sql/mem_root_array.h" // Mem_root_array #include "sql/mysqld.h" // read_only #include "sql/psi_memory_key.h" // key_memory_histograms +#include "sql/range_optimizer/tree.h" // invert_min_max_flag #include "sql/sql_base.h" // open_and_lock_tables, close_thread_tables #include "sql/sql_bitmap.h" #include "sql/sql_class.h" // make_lex_string_root @@ -2233,6 +2234,40 @@ static bool get_temporal(Item *item, Value_map_type preferred_type, return false; } +inline const CHARSET_INFO *charset_of(Field *field) { + return field->charset(); +} + +inline enum_field_types data_type_of(Field *field) { + return field->type(); +} + +inline bool is_null(Field *field) { + return field->is_real_null(); +} + +static bool get_temporal(Field *field, Value_map_type preferred_type, + MYSQL_TIME *time_value) { + switch (preferred_type) { + case Value_map_type::DATETIME: + TIME_from_longlong_datetime_packed(time_value, field->val_date_temporal()); + break; + case Value_map_type::DATE: + TIME_from_longlong_date_packed(time_value, field->val_date_temporal()); + break; + case Value_map_type::TIME: + TIME_from_longlong_time_packed(time_value, field->val_time_temporal()); + break; + default: + /* purecov: begin deadcode */ + assert(0); + break; + /* purecov: end deadcode */ + } + + return false; +} + template double Histogram::apply_operator(const enum_operator op, const T &value) const { switch (op) { @@ -2380,6 +2415,136 @@ template bool Histogram::get_selectivity_dispatcher(Item *, const enum_operator, const TYPELIB *, double *) const; +template +bool Histogram::get_selectivity_dispatcher(Field *, const enum_operator, + const TYPELIB *, double *) const; + +bool Histogram::get_raw_range_selectivity(Field *fld, uint flag, + const uchar *minp, const uchar *maxp, + double *selectivity) const { + // Temporarily reuse the field to provide endpoint value. + Use_field use_field(fld); + uchar null_bit = 0; + + double null_selectivity = 0.0; + double min_selectivity = 0.0; + double max_selectivity = 0.0; + + /* + SEL_ROOT::store_min/max_value() store reversed endpoints if the + first key part is descending. + */ + if ((flag & DESC_FLAG)) { + flag = invert_min_max_flag(flag); + std::swap(minp, maxp); + } + + /* + Note that histogram separates null ratio from value buckets, while a field + range might include NULL, a value range, or both. So the selectivity formula + of a field range is: + + Prange = Pnull + Px + Py - Pnon_null + + where, + + 1) Px is for 'fld > x' or 'fld >= x', and x can be any value or -Inf. + 2) Py is for 'fld < y' or 'fld <= y', and y can be any value or +Inf. + 3) Pnon_null is for (3.1) 'fld > -Inf', (3.2) 'fld < +Inf' or both. + 4) Obviously, Pnull + Pnon_null = 1. + + Note that a field range, represented by endpoint values in keypart_data + format as well as flags, could be any of these forms: + + id | field range | expression | flag + ----|--------------|--------------------------|-------- + F1 | [X, Y] | fld >= X AND fld <= Y | - + F2 | (X, Y] | fld > X AND fld <=Y | NEAR_MIN + F3 | [X, Y) | fld >= X AND fld < Y | NEAR_MAX + F4 | (X, Y) | fld > X AND fld < Y | NEAR_MIN NEAR_MAX + F5 | [X, +Inf) | fld >= X | NO_MAX_RANGE + F6 | (X, +Inf) | fld > X | NO_MAX_RANGE NEAR_MIN + F7 | [NULL, +Inf) | fld IS NULL OR fld IS NOT NULL | NO_MAX_RANGE + F8 | (NULL, +Inf) | fld IS NOT NULL (**) | NO_MAX_RANGE NEAR_MIN + F9 | (-Inf, Y) (*)| fld < Y | NO_MIN_RANGE NEAR_MAX + F10 | (-Inf, Y] | fld <= Y | NO_MIN_RANGE + F11 | (NULL, Y) (*)| fld < Y | NEAR_MIN NEAR_MAX + F12 | (NULL, Y] | fld <= Y | NEAR_MIN + F13 | [NULL, Y) | fid IS NULL OR fld < Y | NEAR_MAX + F14 | [NULL, Y] | fld IS NULL OR fld <= Y | - + F15 | [NULL, NULL] | fld IS NULL | - + + (*) Although the forms for 'fld NOT NULL' and 'fld NULL' are different, + they are the same in essence. + (**) 'fld IS NOT NULL' with 'fld NOT NULL' evaluates to true, so there is no + such field range. + + Here are some observations: + + (A) An inclusive NULL at the lower end implies null selectivity. + (B) NULL at the lower end implies 'fld > -Inf'. + (C) NULL at the upper end is meaningless with respect to value range (F15) + + If we define Py = 0 for the third observation, then 'Px + Py - Pnon_null' + evaluates to 0. Then the selectivity formula applies to all forms. + */ + + // Note that we only support equality/inequality operators for ENUM and SET, + // a reduced form, 'Prange = P', is required for get_raw_selectivity(). + if ((flag & EQ_RANGE)) { + if (flag & NULL_RANGE) { + *selectivity = get_null_values_fraction(); + return false; + } + use_field.move_field(const_cast(minp) + fld->is_nullable(), + &null_bit, null_bit); + return get_raw_selectivity(fld, histograms::enum_operator::EQUALS_TO, + selectivity); + } + + if ((flag & NO_MIN_RANGE)) { // (3.1) + min_selectivity = get_non_null_values_fraction(); + } else { + if (fld->is_nullable() && *minp) { + if (!(flag & NEAR_MIN)) { // (A) + null_selectivity = get_null_values_fraction(); + } + // (B) + min_selectivity = get_non_null_values_fraction(); + } else { + histograms::enum_operator op; + if (flag & NEAR_MIN) op = histograms::enum_operator::GREATER_THAN; + else op = histograms::enum_operator::GREATER_THAN_OR_EQUAL; + + use_field.move_field(const_cast(minp) + fld->is_nullable(), + &null_bit, null_bit); + if (get_raw_selectivity(fld, op, &min_selectivity)) return true; + } + } + + if ((flag & NO_MAX_RANGE)) { // (3.2) + max_selectivity = get_non_null_values_fraction(); + } else { + if (fld->is_nullable() && *maxp) { // (C) + assert(0); // already caught by the reduced form. + assert(!(flag & NO_MIN_RANGE) && (flag & NULL_RANGE) && *minp); + max_selectivity = 0; + } else { + histograms::enum_operator op; + if ((flag & NEAR_MAX)) op = histograms::enum_operator::LESS_THAN; + else op = histograms::enum_operator::LESS_THAN_OR_EQUAL; + + use_field.move_field(const_cast(maxp) + fld->is_nullable(), + &null_bit, null_bit); + if (get_raw_selectivity(fld, op, &max_selectivity)) return true; + } + } + + *selectivity = null_selectivity + min_selectivity + max_selectivity - + get_non_null_values_fraction(); + return false; +} + bool Histogram::get_selectivity(Item **items, size_t item_count, enum_operator op, double *selectivity) const { if (get_raw_selectivity(items, item_count, op, selectivity)) return true; @@ -2426,6 +2591,94 @@ bool Histogram::get_selectivity(Item **items, size_t item_count, return false; } +bool Histogram::get_selectivity(Field *field, enum_operator op, + double *selectivity) const { + if (get_raw_selectivity(field, op, selectivity)) return true; + + const double minimum_selectivity = 0.001; + *selectivity = std::max(*selectivity, minimum_selectivity); + return false; +} + +bool Histogram::get_raw_selectivity(Field *field, enum_operator op, + double *selectivity) const { + const TYPELIB *typelib = nullptr; + if (field->real_type() == MYSQL_TYPE_ENUM || + field->real_type() == MYSQL_TYPE_SET) { + const Field_enum *field_enum = + down_cast(field); + typelib = field_enum->typelib; + } + + switch (op) { + case enum_operator::LESS_THAN: + case enum_operator::EQUALS_TO: + case enum_operator::GREATER_THAN: { + return get_selectivity_dispatcher(field, op, typelib, selectivity); + } + case enum_operator::LESS_THAN_OR_EQUAL: { + double less_than_selectivity; + double equals_to_selectivity; + if (get_selectivity_dispatcher(field, enum_operator::LESS_THAN, + typelib, &less_than_selectivity) || + get_selectivity_dispatcher(field, enum_operator::EQUALS_TO, + typelib, &equals_to_selectivity)) + return true; + + *selectivity = std::min(less_than_selectivity + equals_to_selectivity, + get_non_null_values_fraction()); + return false; + } + case enum_operator::GREATER_THAN_OR_EQUAL: { + double greater_than_selectivity; + double equals_to_selectivity; + if (get_selectivity_dispatcher(field, enum_operator::GREATER_THAN, + typelib, &greater_than_selectivity) || + get_selectivity_dispatcher(field, enum_operator::EQUALS_TO, + typelib, &equals_to_selectivity)) + return true; + + *selectivity = std::min(greater_than_selectivity + equals_to_selectivity, + get_non_null_values_fraction()); + return false; + } + case enum_operator::BETWEEN: { + double less_than_selectivity; + double greater_than_selectivity; + if (get_selectivity_dispatcher(field, enum_operator::LESS_THAN, + typelib, &less_than_selectivity) || + get_selectivity_dispatcher(field, enum_operator::GREATER_THAN, + typelib, &greater_than_selectivity)) + return true; + + *selectivity = this->get_non_null_values_fraction() - + (less_than_selectivity + greater_than_selectivity); + /* + Make sure that we don't return a value less than 0.0. This might happen + with a query like: + EXPLAIN SELECT a FROM t1 WHERE t1.a BETWEEN 3 AND 0; + */ + *selectivity = std::max(0.0, *selectivity); + return false; + } + case enum_operator::IS_NULL: + *selectivity = this->get_null_values_fraction(); + return false; + case enum_operator::IS_NOT_NULL: + *selectivity = 1.0 - this->get_null_values_fraction(); + return false; + default: { + assert(false); + return true; + } + } + + /* purecov: begin deadcode */ + assert(false); + return true; + /* purecov: end deadcode */ +} + bool Histogram::get_raw_selectivity(Item **items, size_t item_count, enum_operator op, double *selectivity) const { diff --git a/sql/histograms/histogram.h b/sql/histograms/histogram.h index 9b616088ee2..01bb5672df9 100644 --- a/sql/histograms/histogram.h +++ b/sql/histograms/histogram.h @@ -454,12 +454,21 @@ class Histogram { bool m_auto_update; /** - An internal function for getting a selectivity estimate prior to adustment. + An internal function for getting a selectivity estimate prior to adjustment. @see get_selectivity() for details. */ bool get_raw_selectivity(Item **items, size_t item_count, enum_operator op, double *selectivity) const; + /** + An internal function for getting a selectivity estimate prior to adjustment. + + @param field Provide a value to estimate 'op value' + @param op The predicate operator + @param[out] selectivity The calculated selectivity + */ + bool get_raw_selectivity(Field *field, enum_operator op, + double *selectivity) const; /** An internal function for getting the selecitvity estimation. @@ -682,6 +691,38 @@ class Histogram { bool get_selectivity(Item **items, size_t item_count, enum_operator op, double *selectivity) const; + /** + Get selectivity estimation by field value. + + This function will try and get the selectivity estimation for a predicate + on the form "OPERATOR CONSTANT", for instance "> 23", however, the constant + is provided by the field rather than being literal. Note that the field + already has type info, so it does not need COLUMN as in the other + get_selectivity() functions. + + @param field the field with user-provided constant value. + @param op the predicate operator + @param[out] selectivity the calculated selectivity + + @retval true if an error occurred + @return false if success + */ + bool get_selectivity(Field *field, enum_operator op, + double *selectivity) const; + + /** + Get selectivity estimation by field range endpoints, prior to adjustment. + + @param field Provide a temporary storage to evaluate endpoints + @param flag Describe presence and inclusion of endpoints, as + well as range properties (EQ_RANGE, NULL_RANGE). + @param minp The lower endpoint value buffer + @param maxp The upper endpoint value buffer + @param[out] selectivity The calculated selectivity + */ + bool get_raw_range_selectivity(Field *fld, uint flag, const uchar *minp, + const uchar *maxp, double *selectivity) const; + /** @return the fraction of non-null values in the histogram. */ diff --git a/sql/range_optimizer/tree.h b/sql/range_optimizer/tree.h index ca0347b4276..f875e52c417 100644 --- a/sql/range_optimizer/tree.h +++ b/sql/range_optimizer/tree.h @@ -244,6 +244,21 @@ inline uint invert_max_flag(uint max_flag) { return min_flag_out; } +/** + A helper function to invert min flags and max flags for DESC key parts. + It is needed because invert_min_flag(flag) and invert_max_flag(flag) preserve + flags of the other end. +*/ +inline uint invert_min_max_flag(uint range_flag) { + uint flag_out = + range_flag & ~(NEAR_MIN | NO_MIN_RANGE | NEAR_MAX | NO_MAX_RANGE); + if (range_flag & NEAR_MIN) flag_out |= NEAR_MAX; + if (range_flag & NO_MIN_RANGE) flag_out |= NO_MAX_RANGE; + if (range_flag & NEAR_MAX) flag_out |= NEAR_MIN; + if (range_flag & NO_MAX_RANGE) flag_out |= NO_MIN_RANGE; + return flag_out; +} + /* A construction block of the SEL_ARG-graph.