diff --git a/sqltoolsservice.sln b/sqltoolsservice.sln index e46e2091..11efaa0e 100644 --- a/sqltoolsservice.sln +++ b/sqltoolsservice.sln @@ -114,6 +114,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "azure-pipelines", "azure-pi EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Microsoft.Kusto.ServiceLayer.UnitTests", "test\Microsoft.Kusto.ServiceLayer.UnitTests\Microsoft.Kusto.ServiceLayer.UnitTests.csproj", "{AFCDED82-B659-4BE1-86ED-0F4F8BC661AE}" EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.InsightsGenerator", "src\Microsoft.InsightsGenerator\Microsoft.InsightsGenerator.csproj", "{7F2659DB-92C8-4823-AFB9-88BC1B6D959F}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.InsightsGenerator.UnitTests", "test\Microsoft.InsightsGenerator.UnitTests\Microsoft.InsightsGenerator.UnitTests.csproj", "{BB7FF5B5-84E3-4F4B-A2A7-2CC4C75632E9}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -265,6 +269,18 @@ Global {AFCDED82-B659-4BE1-86ED-0F4F8BC661AE}.Integration|Any CPU.Build.0 = Debug|Any CPU {AFCDED82-B659-4BE1-86ED-0F4F8BC661AE}.Release|Any CPU.ActiveCfg = Release|Any CPU {AFCDED82-B659-4BE1-86ED-0F4F8BC661AE}.Release|Any CPU.Build.0 = Release|Any CPU + {7F2659DB-92C8-4823-AFB9-88BC1B6D959F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {7F2659DB-92C8-4823-AFB9-88BC1B6D959F}.Debug|Any CPU.Build.0 = Debug|Any CPU + {7F2659DB-92C8-4823-AFB9-88BC1B6D959F}.Integration|Any CPU.ActiveCfg = Debug|Any CPU + {7F2659DB-92C8-4823-AFB9-88BC1B6D959F}.Integration|Any CPU.Build.0 = Debug|Any CPU + {7F2659DB-92C8-4823-AFB9-88BC1B6D959F}.Release|Any CPU.ActiveCfg = Release|Any CPU + {7F2659DB-92C8-4823-AFB9-88BC1B6D959F}.Release|Any CPU.Build.0 = Release|Any CPU + {BB7FF5B5-84E3-4F4B-A2A7-2CC4C75632E9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {BB7FF5B5-84E3-4F4B-A2A7-2CC4C75632E9}.Debug|Any CPU.Build.0 = Debug|Any CPU + {BB7FF5B5-84E3-4F4B-A2A7-2CC4C75632E9}.Integration|Any CPU.ActiveCfg = Debug|Any CPU + {BB7FF5B5-84E3-4F4B-A2A7-2CC4C75632E9}.Integration|Any CPU.Build.0 = Debug|Any CPU + {BB7FF5B5-84E3-4F4B-A2A7-2CC4C75632E9}.Release|Any CPU.ActiveCfg = Release|Any CPU + {BB7FF5B5-84E3-4F4B-A2A7-2CC4C75632E9}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -296,6 +312,8 @@ Global {0EC2B30C-0652-49AE-9594-85B3C3E9CA21} = {AB9CA2B8-6F70-431C-8A1D-67479D8A7BE4} {E0C941C8-91F2-4BE1-8B79-AC88EDB78729} = {2BBD7364-054F-4693-97CD-1C395E3E84A9} {AFCDED82-B659-4BE1-86ED-0F4F8BC661AE} = {AB9CA2B8-6F70-431C-8A1D-67479D8A7BE4} + {7F2659DB-92C8-4823-AFB9-88BC1B6D959F} = {2BBD7364-054F-4693-97CD-1C395E3E84A9} + {BB7FF5B5-84E3-4F4B-A2A7-2CC4C75632E9} = {AB9CA2B8-6F70-431C-8A1D-67479D8A7BE4} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {B31CDF4B-2851-45E5-8C5F-BE97125D9DD8} diff --git a/src/Microsoft.InsightsGenerator/DataArray.cs b/src/Microsoft.InsightsGenerator/DataArray.cs new file mode 100644 index 00000000..b5382f9b --- /dev/null +++ b/src/Microsoft.InsightsGenerator/DataArray.cs @@ -0,0 +1,26 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +namespace Microsoft.InsightsGenerator +{ + public class DataArray + { + public enum DataType + { + Number, + String, + DateTime + } + + public string[] ColumnNames { get; set; } + + public string[] TransformedColumnNames { get; set; } + + public DataType[] ColumnDataType { get; set; } + + public object[][] Cells { get; set; } + } +} + diff --git a/src/Microsoft.InsightsGenerator/DataTransformation.cs b/src/Microsoft.InsightsGenerator/DataTransformation.cs new file mode 100644 index 00000000..1a68271e --- /dev/null +++ b/src/Microsoft.InsightsGenerator/DataTransformation.cs @@ -0,0 +1,164 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +using System; +using System.Collections.Generic; + +namespace Microsoft.InsightsGenerator +{ + public class DataTransformer + { + private class ColumnInfo + { + public int ColumnIndex { get; set; } + + public int DistinctValues { get; set; } + + public DataArray.DataType DataType { get; set; } + } + + public DataArray Transform(DataArray array) + { + if (array == null || array.Cells == null || array.Cells.Length == 0) + { + return array; + } + + DataArray.DataType[] columnDataType; + array.TransformedColumnNames = GetColumnLabels(array , out columnDataType); + array.ColumnDataType = columnDataType; + return array; + } + + private string[] GetColumnLabels(DataArray array, out DataArray.DataType[] columnDataType) + { + columnDataType = new DataArray.DataType[array.ColumnNames.Length]; + int columnCount = array.Cells[0].Length; + Dictionary> columnInfo = new Dictionary>(); + for (int column = 0; column < columnCount; ++column) + { + int distinctValues; + DataArray.DataType dataType = GetColumnType(array, column, out distinctValues); + columnDataType[column] = dataType; + + if (!columnInfo.ContainsKey(dataType)) + { + columnInfo.Add(dataType, new List()); + } + + columnInfo[dataType].Add(new ColumnInfo() + { + ColumnIndex = column, + DistinctValues = distinctValues, + DataType = dataType + }); + } + + bool containsDateTime = columnInfo.ContainsKey(DataArray.DataType.DateTime); + string[] labels = new string[columnCount]; + if (containsDateTime) + { + List dateColumns = columnInfo[DataArray.DataType.DateTime]; + for (int i = 0; i < dateColumns.Count; ++i) + { + labels[dateColumns[i].ColumnIndex] = "input_t_" + i; + } + if (columnInfo.ContainsKey(DataArray.DataType.String)) + { + List stringColumns = columnInfo[DataArray.DataType.String]; + for (int i = 0; i < stringColumns.Count; ++i) + { + labels[stringColumns[i].ColumnIndex] = "slicer_" + i; + } + } + } + else + { + if (columnInfo.ContainsKey(DataArray.DataType.String)) + { + int maxDistinctValue = Int32.MaxValue; + int maxColumnIndex = -1; + int maxColumnLabelIndex = 0; + List stringColumns = columnInfo[DataArray.DataType.String]; + for (int i = 0; i < stringColumns.Count; ++i) + { + if (maxDistinctValue == Int32.MaxValue || maxDistinctValue < stringColumns[i].DistinctValues) + { + maxDistinctValue = stringColumns[i].DistinctValues; + maxColumnIndex = i; + maxColumnLabelIndex = stringColumns[i].ColumnIndex; + } + } + + labels[maxColumnLabelIndex] = "input_g_0"; + int adjustIndex = 0; + for (int i = 0; i < stringColumns.Count; ++i) + { + if (i != maxColumnIndex) + { + labels[stringColumns[i].ColumnIndex] = "slicer_" + (i - adjustIndex); + } + else + { + ++adjustIndex; + } + } + } + } + + if (columnInfo.ContainsKey(DataArray.DataType.Number)) + { + List numberColumns = columnInfo[DataArray.DataType.Number]; + for (int i = 0; i < numberColumns.Count; ++i) + { + labels[numberColumns[i].ColumnIndex] = "output_" + i; + } + } + + return labels; + } + + private DataArray.DataType GetColumnType(DataArray array, int column, out int distinctValues) + { + // count number of distinct values + HashSet values = new HashSet(); + for (int row = 0; row < array.Cells.Length; ++row) + { + if (!values.Contains(array.Cells[row][column])) + { + values.Add(array.Cells[row][column]); + } + } + distinctValues = values.Count; + + // return the provided type if available + if (array.ColumnDataType != null && array.ColumnDataType.Length > column) + { + return array.ColumnDataType[column]; + } + else + { + // determine the type from the first value in array + object firstValue = array.Cells[0][column]; + string firstValueString = firstValue.ToString(); + + long longValue; + double doubleValue; + if (long.TryParse(firstValueString, out longValue) || double.TryParse(firstValueString, out doubleValue)) + { + return DataArray.DataType.Number; + } + + DateTime dateValue; + if (DateTime.TryParse(firstValueString, out dateValue)) + { + return DataArray.DataType.DateTime; + } + + return DataArray.DataType.String; + } + } + } +} diff --git a/src/Microsoft.InsightsGenerator/Microsoft.InsightsGenerator.csproj b/src/Microsoft.InsightsGenerator/Microsoft.InsightsGenerator.csproj new file mode 100644 index 00000000..6776c9e9 --- /dev/null +++ b/src/Microsoft.InsightsGenerator/Microsoft.InsightsGenerator.csproj @@ -0,0 +1,21 @@ + + + netstandard2.0 + false + false + false + false + false + $(DefineConstants);TRACE + true + portable + + + + + + + PreserveNewest + + + diff --git a/src/Microsoft.InsightsGenerator/Properties/AssemblyInfo.cs b/src/Microsoft.InsightsGenerator/Properties/AssemblyInfo.cs new file mode 100644 index 00000000..aa9bd429 --- /dev/null +++ b/src/Microsoft.InsightsGenerator/Properties/AssemblyInfo.cs @@ -0,0 +1,44 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("Microsoft InsightsGenerator")] +[assembly: AssemblyDescription("Provides Microsoft InsightsGenerator services.")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("Microsoft")] +[assembly: AssemblyProduct("SMicrosoft InsightsGenerator")] +[assembly: AssemblyCopyright("� Microsoft Corporation. All rights reserved.")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("5b6bd4c4-7352-4762-9ad2-578b3fbd1685")] + +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +// You can specify all the values or you can default the Build and Revision Numbers +// by using the '*' as shown below: +// [assembly: AssemblyVersion("1.0.*")] +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] +[assembly: AssemblyInformationalVersion("1.0.0.0")] + +[assembly: InternalsVisibleTo("Microsoft.InsightsGenerator.UnitTests")] diff --git a/src/Microsoft.InsightsGenerator/RulesEngine.cs b/src/Microsoft.InsightsGenerator/RulesEngine.cs new file mode 100644 index 00000000..2c333018 --- /dev/null +++ b/src/Microsoft.InsightsGenerator/RulesEngine.cs @@ -0,0 +1,212 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using System.Text.RegularExpressions; + +namespace Microsoft.InsightsGenerator +{ + public class RulesEngine + { + public static List