Bug #78727 Wrong column referenced from derived table when doing OrderBy
Submitted: 7 Oct 2015 0:55 Modified: 7 Oct 2015 15:18
Reporter: Ryan Griffith (OCA) Email Updates:
Status: Closed Impact on me:
None 
Category:Connector / NET Severity:S1 (Critical)
Version:6.9.6 OS:Any
Assigned to: CPU Architecture:Any

[7 Oct 2015 0:55] Ryan Griffith
Description:
When performing a Where -> GroupBy -> OrderBy, -> Take() -> ToList(), the OrderBy column is transposed in the final derived table in SQL. 

Notice column C1

How to repeat:
***************
SQL TABLES
***************

CREATE TABLE `fact_web_session_summary` (
	`id` INT(11) NOT NULL AUTO_INCREMENT,
	`date_id` INT(11) NOT NULL,
	`web_profile_id` INT(11) NOT NULL,
	`web_visitor_location_id` INT(11) NOT NULL,
	`web_platform_device_id` INT(11) NOT NULL,
	`web_source_id` INT(11) NOT NULL,
	`web_session_flags_id` INT(11) NOT NULL,
	`page_views` INT(11) NOT NULL,
	`hits` INT(11) NOT NULL,
	`users` INT(11) NOT NULL,
	`new_users` INT(11) NOT NULL,
	`sessions` INT(11) NOT NULL,
	`bounces` INT(11) NOT NULL,
	`duration` INT(11) NOT NULL
	PRIMARY KEY (`id`)
)
COLLATE='utf8_general_ci'
ENGINE=InnoDB;

CREATE TABLE `dim_web_source` (
	`id` INT(11) NOT NULL AUTO_INCREMENT,
	`source` VARCHAR(200) NOT NULL,
	`medium` VARCHAR(200) NOT NULL,
	PRIMARY KEY (`id`)
)
COLLATE='utf8_general_ci'
ENGINE=InnoDB
;

---------------------------------

***************
MODELS FOR DB CONTEXT
***************

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace Test {
    public class fact_web_session_summary {

        public int id { get; set; }

        public int duration { get; set; }
        public int bounces { get; set; }
        public int sessions { get; set; }
        public int new_users { get; set; }
        public int users { get; set; }
        public int page_views { get; set; }
        public int hits { get; set; }

        public int web_source_id { get; set; }
        public virtual dim_web_source web_source { get; set; }
		
		public int date_id { get; set; }
        public virtual dim_date date { get; set; }
    }
}

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace Test {
    public class dim_web_source {
        public int id { get; set; }
        public string source { get; set; }
        public string medium { get; set; }

        public virtual ICollection<fact_web_session_summary> web_session_summaries { get; set; }
    }
}

--------------------------------

***************
LINQ STATEMENT
***************

var startDate = Convert.ToDateTime("01/01/2015");
var endDate = Convert.ToDateTime("10/01/2015");

dbContext.fact_web_session_summary
.Where(o => o.dim_date.date >= startDate && o.dim_date <= endDate)
.GroupBy(o => o.web_source.source)
.Select(o => new {
	Source = o.Key,
	Sessions = o.Sum(t => t.sessions),
	NewUsers = o.Sum(t => t.new_users),
	PageViews = o.Sum(t => t.page_views),
	Bounces = o.Sum(t => t.bounces),
	Duration = o.Sum(t => t.duration),
	Hits = o.Sum(t => t.hits)
})
.OrderByDescending(o => o.Sessions)
.Take(10)
.ToList()
.Select(o => new Itc.MetricPortal.WebStats.Metric.ViewModels.SummaryViewModel() {
	KeyName = o.Source,
	Sessions = o.Sessions,
	NewUsers = o.NewUsers,
	PageViews = o.PageViews,
	Bounces = o.Bounces,
	Duration = o.Duration,
	Hits = o.Hits
});
				
				
				
				
				
				
				
				
				
				
-------------------------

***************
SQL OUTPUT
***************

SELECT
`Project1`.`C7` AS `C1`, 
`Project1`.`source`, 
`Project1`.`C1` AS `C2`, 
`Project1`.`C2` AS `C3`, 
`Project1`.`C3` AS `C4`, 
`Project1`.`C4` AS `C5`, 
`Project1`.`C5` AS `C6`, 
`Project1`.`C6` AS `C7`
FROM (SELECT
`GroupBy1`.`A1` AS `C1`, 
`GroupBy1`.`A2` AS `C2`, 
`GroupBy1`.`A3` AS `C3`, 
`GroupBy1`.`A4` AS `C4`, 
`GroupBy1`.`A5` AS `C5`, 
`GroupBy1`.`A6` AS `C6`, 
`GroupBy1`.`K1` AS `source`, 
1 AS `C7`
FROM (SELECT
`Extent3`.`source` AS `K1`, 
SUM(`Extent1`.`sessions`) AS `A1`, 
SUM(`Extent1`.`new_users`) AS `A2`, 
SUM(`Extent1`.`page_views`) AS `A3`, 
SUM(`Extent1`.`bounces`) AS `A4`, 
SUM(`Extent1`.`duration`) AS `A5`, 
SUM(`Extent1`.`hits`) AS `A6`
FROM `fact_web_session_summary` AS `Extent1` 
INNER JOIN `dim_date` AS `Extent2` ON `Extent1`.`date_id` = `Extent2`.`date_id` 
INNER JOIN `dim_web_source` AS `Extent3` ON `Extent1`.`web_source_id` = `Extent3`.`id`
 WHERE (`Extent2`.`date` >= '2015-01-01') AND (`Extent2`.`date` <= '2015-10-01')
 GROUP BY 
`Extent3`.`source`) AS `GroupBy1`) AS `Project1`
 ORDER BY 
`Project1`.`C1` DESC LIMIT 10
[7 Oct 2015 0:57] Ryan Griffith
I had forgotten to include dim_date:

using System;
using System.Collections.Generic;

namespace Test
{
    public partial class dim_date
    {
        public dim_date() {

        }

        public int date_id { get; set; }
        public System.DateTime date { get; set; }

    }
}
[7 Oct 2015 4:57] Ryan Griffith
This is broken core functionality. This is critical in my opinion.
[7 Oct 2015 15:07] Ryan Griffith
I've noticed that if I change the order by to another summarized column, say,

.OrderByDescending(o => o.PageViews)

The output SQL is still wrong, and is in fact ordered by the column just preceding PageViews, which is NewUsers. It seems there is an offset
[7 Oct 2015 15:18] Ryan Griffith
YOU CAN DISREGARD THIS BUG:

This bug is not an issue with the .Net connector. This is actually an issue with MariaDB 10.1.7. I was confused because of the very similar column names in the derived tables vs. the final projection and did not realize that the final projection was grouped by the derived table's original column name.