From 84f62a3e85ce0a18a131a136d390132f8c1e7a96 Mon Sep 17 00:00:00 2001 From: Sebastian Pfliegel Date: Wed, 1 Aug 2018 21:15:11 +0200 Subject: [PATCH] Added more options to SaveResultsAsCsvRequestParams (#669) Fixes Microsoft/sqlopsstudio#203 Options: LineSeperator TextIdentifier Encoding --- .../Microsoft.SqlTools.ServiceLayer.csproj | 8 +- .../Contracts/SaveResultsRequest.cs | 17 +- .../DataStorage/SaveAsCsvFileStreamWriter.cs | 58 +++++-- ...oft.SqlTools.ServiceLayer.UnitTests.csproj | 2 +- .../SaveAsCsvFileStreamWriterTests.cs | 163 +++++++++++++++++- 5 files changed, 226 insertions(+), 22 deletions(-) diff --git a/src/Microsoft.SqlTools.ServiceLayer/Microsoft.SqlTools.ServiceLayer.csproj b/src/Microsoft.SqlTools.ServiceLayer/Microsoft.SqlTools.ServiceLayer.csproj index 9015751c..b3c040f3 100644 --- a/src/Microsoft.SqlTools.ServiceLayer/Microsoft.SqlTools.ServiceLayer.csproj +++ b/src/Microsoft.SqlTools.ServiceLayer/Microsoft.SqlTools.ServiceLayer.csproj @@ -19,11 +19,13 @@ - - + + - + + + diff --git a/src/Microsoft.SqlTools.ServiceLayer/QueryExecution/Contracts/SaveResultsRequest.cs b/src/Microsoft.SqlTools.ServiceLayer/QueryExecution/Contracts/SaveResultsRequest.cs index 0b49fc36..25bcf9ef 100644 --- a/src/Microsoft.SqlTools.ServiceLayer/QueryExecution/Contracts/SaveResultsRequest.cs +++ b/src/Microsoft.SqlTools.ServiceLayer/QueryExecution/Contracts/SaveResultsRequest.cs @@ -79,9 +79,24 @@ namespace Microsoft.SqlTools.ServiceLayer.QueryExecution.Contracts public bool IncludeHeaders { get; set; } /// - /// Delimeter for separating data items in CSV + /// Delimiter for separating data items in CSV /// public string Delimiter { get; set; } + + /// + /// either CR, CRLF or LF to seperate rows in CSV + /// + public string LineSeperator { get; set; } + + /// + /// Text identifier for alphanumeric columns in CSV + /// + public string TextIdentifier { get; set; } + + /// + /// Encoding of the CSV file + /// + public string Encoding { get; set; } } /// diff --git a/src/Microsoft.SqlTools.ServiceLayer/QueryExecution/DataStorage/SaveAsCsvFileStreamWriter.cs b/src/Microsoft.SqlTools.ServiceLayer/QueryExecution/DataStorage/SaveAsCsvFileStreamWriter.cs index cbed6c53..8f962267 100644 --- a/src/Microsoft.SqlTools.ServiceLayer/QueryExecution/DataStorage/SaveAsCsvFileStreamWriter.cs +++ b/src/Microsoft.SqlTools.ServiceLayer/QueryExecution/DataStorage/SaveAsCsvFileStreamWriter.cs @@ -48,23 +48,57 @@ namespace Microsoft.SqlTools.ServiceLayer.QueryExecution.DataStorage /// public override void WriteRow(IList row, IList columns) { - string delimiter = ","; + char delimiter = ','; if(!string.IsNullOrEmpty(saveParams.Delimiter)) { - delimiter = saveParams.Delimiter; - } + // first char in string + delimiter = saveParams.Delimiter[0]; + } + + string lineSeperator = Environment.NewLine; + if(!string.IsNullOrEmpty(saveParams.LineSeperator)) + { + lineSeperator = saveParams.LineSeperator; + } + + char textIdentifier = '"'; + if(!string.IsNullOrEmpty(saveParams.TextIdentifier)) + { + // first char in string + textIdentifier = saveParams.TextIdentifier[0]; + } + + Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); + int codepage; + Encoding encoding; + try + { + if(int.TryParse(saveParams.Encoding, out codepage)) + { + encoding = Encoding.GetEncoding(codepage); + } + else + { + encoding = Encoding.GetEncoding(saveParams.Encoding); + } + } + catch + { + // Fallback encoding when specified codepage is invalid + encoding = Encoding.GetEncoding("utf-8"); + } // Write out the header if we haven't already and the user chose to have it if (saveParams.IncludeHeaders && !headerWritten) { // Build the string var selectedColumns = columns.Skip(ColumnStartIndex ?? 0).Take(ColumnCount ?? columns.Count) - .Select(c => EncodeCsvField(c.ColumnName) ?? string.Empty); + .Select(c => EncodeCsvField(c.ColumnName, delimiter, textIdentifier) ?? string.Empty); string headerLine = string.Join(delimiter, selectedColumns); // Encode it and write it out - byte[] headerBytes = Encoding.UTF8.GetBytes(headerLine + Environment.NewLine); + byte[] headerBytes = encoding.GetBytes(headerLine + lineSeperator); FileStream.Write(headerBytes, 0, headerBytes.Length); headerWritten = true; @@ -73,11 +107,11 @@ namespace Microsoft.SqlTools.ServiceLayer.QueryExecution.DataStorage // Build the string for the row var selectedCells = row.Skip(ColumnStartIndex ?? 0) .Take(ColumnCount ?? columns.Count) - .Select(c => EncodeCsvField(c.DisplayValue)); + .Select(c => EncodeCsvField(c.DisplayValue, delimiter, textIdentifier)); string rowLine = string.Join(delimiter, selectedCells); // Encode it and write it out - byte[] rowBytes = Encoding.UTF8.GetBytes(rowLine + Environment.NewLine); + byte[] rowBytes = encoding.GetBytes(rowLine + lineSeperator); FileStream.Write(rowBytes, 0, rowBytes.Length); } @@ -98,8 +132,10 @@ namespace Microsoft.SqlTools.ServiceLayer.QueryExecution.DataStorage /// /// The field to encode /// The CSV encoded version of the original field - internal static string EncodeCsvField(string field) + internal static string EncodeCsvField(string field, char delimiter, char textIdentifier) { + string strTextIdentifier = textIdentifier.ToString(); + // Special case for nulls if (field == null) { @@ -107,16 +143,16 @@ namespace Microsoft.SqlTools.ServiceLayer.QueryExecution.DataStorage } // Whether this field has special characters which require it to be embedded in quotes - bool embedInQuotes = field.IndexOfAny(new[] {',', '\r', '\n', '"'}) >= 0 // Contains special characters + bool embedInQuotes = field.IndexOfAny(new[] { delimiter, '\r', '\n', textIdentifier }) >= 0 // Contains special characters || field.StartsWith(" ") || field.EndsWith(" ") // Start/Ends with space || field.StartsWith("\t") || field.EndsWith("\t"); // Starts/Ends with tab //Replace all quotes in the original field with double quotes - string ret = field.Replace("\"", "\"\""); + string ret = field.Replace(strTextIdentifier, strTextIdentifier + strTextIdentifier); if (embedInQuotes) { - ret = $"\"{ret}\""; + ret = strTextIdentifier + $"{ret}" + strTextIdentifier; } return ret; diff --git a/test/Microsoft.SqlTools.ServiceLayer.UnitTests/Microsoft.SqlTools.ServiceLayer.UnitTests.csproj b/test/Microsoft.SqlTools.ServiceLayer.UnitTests/Microsoft.SqlTools.ServiceLayer.UnitTests.csproj index e41d6c18..2d886e68 100644 --- a/test/Microsoft.SqlTools.ServiceLayer.UnitTests/Microsoft.SqlTools.ServiceLayer.UnitTests.csproj +++ b/test/Microsoft.SqlTools.ServiceLayer.UnitTests/Microsoft.SqlTools.ServiceLayer.UnitTests.csproj @@ -13,10 +13,10 @@ + - diff --git a/test/Microsoft.SqlTools.ServiceLayer.UnitTests/QueryExecution/DataStorage/SaveAsCsvFileStreamWriterTests.cs b/test/Microsoft.SqlTools.ServiceLayer.UnitTests/QueryExecution/DataStorage/SaveAsCsvFileStreamWriterTests.cs index eb16afbc..ab6e4723 100644 --- a/test/Microsoft.SqlTools.ServiceLayer.UnitTests/QueryExecution/DataStorage/SaveAsCsvFileStreamWriterTests.cs +++ b/test/Microsoft.SqlTools.ServiceLayer.UnitTests/QueryExecution/DataStorage/SaveAsCsvFileStreamWriterTests.cs @@ -30,7 +30,7 @@ namespace Microsoft.SqlTools.ServiceLayer.UnitTests.QueryExecution.DataStorage public void EncodeCsvFieldShouldWrap(string field) { // If: I CSV encode a field that has forbidden characters in it - string output = SaveAsCsvFileStreamWriter.EncodeCsvField(field); + string output = SaveAsCsvFileStreamWriter.EncodeCsvField(field, ',', '\"'); // Then: It should wrap it in quotes Assert.True(Regex.IsMatch(output, "^\".*") @@ -44,7 +44,7 @@ namespace Microsoft.SqlTools.ServiceLayer.UnitTests.QueryExecution.DataStorage public void EncodeCsvFieldShouldNotWrap(string field) { // If: I CSV encode a field that does not have forbidden characters in it - string output = SaveAsCsvFileStreamWriter.EncodeCsvField(field); + string output = SaveAsCsvFileStreamWriter.EncodeCsvField(field, ',', '\"'); // Then: It should not wrap it in quotes Assert.False(Regex.IsMatch(output, "^\".*\"$")); @@ -54,7 +54,7 @@ namespace Microsoft.SqlTools.ServiceLayer.UnitTests.QueryExecution.DataStorage public void EncodeCsvFieldReplace() { // If: I CSV encode a field that has a double quote in it, - string output = SaveAsCsvFileStreamWriter.EncodeCsvField("Some\"thing"); + string output = SaveAsCsvFileStreamWriter.EncodeCsvField("Some\"thing", ',', '\"'); // Then: It should be replaced with double double quotes Assert.Equal("\"Some\"\"thing\"", output); @@ -64,7 +64,7 @@ namespace Microsoft.SqlTools.ServiceLayer.UnitTests.QueryExecution.DataStorage public void EncodeCsvFieldNull() { // If: I CSV encode a null - string output = SaveAsCsvFileStreamWriter.EncodeCsvField(null); + string output = SaveAsCsvFileStreamWriter.EncodeCsvField(null, ',', '\"'); // Then: there should be a string version of null returned Assert.Equal("NULL", output); @@ -214,10 +214,10 @@ namespace Microsoft.SqlTools.ServiceLayer.UnitTests.QueryExecution.DataStorage } [Fact] - public void WriteRowWithCustomDelimeters() + public void WriteRowWithCustomDelimiters() { // Setup: - // ... Create a request params that has custom delimeter say pipe("|") then this delimeter should be used + // ... Create a request params that has custom delimiter say pipe("|") then this delimiter should be used // ... Create a set of data to write // ... Create a memory location to store the data var requestParams = new SaveResultsAsCsvRequestParams @@ -261,5 +261,156 @@ namespace Microsoft.SqlTools.ServiceLayer.UnitTests.QueryExecution.DataStorage // Note: No need to check values, it is done as part of the previous tests } + [Fact] + public void WriteRowsWithCustomLineSeperator() + { + // Setup: + // ... Create a request params that has custom line seperator then this seperator should be used + // ... Create a set of data to write + // ... Create a memory location to store the data + var requestParams = new SaveResultsAsCsvRequestParams + { + IncludeHeaders = true + }; + List data = new List + { + new DbCellValue { DisplayValue = "item1" }, + new DbCellValue { DisplayValue = "item2" } + }; + List columns = new List + { + new DbColumnWrapper(new TestDbColumn("column1")), + new DbColumnWrapper(new TestDbColumn("column2")) + }; + + byte[] output; + string outputString; + string[] lines; + SaveAsCsvFileStreamWriter writer; + + // If: I set default seperator and write a row + requestParams.LineSeperator = null; + output = new byte[8192]; + writer = new SaveAsCsvFileStreamWriter(new MemoryStream(output), requestParams); + using (writer) + { + writer.WriteRow(data, columns); + } + + // Then: + // ... It should have splitten the lines by system's default line seperator + outputString = Encoding.UTF8.GetString(output).TrimEnd('\0', '\r', '\n'); + lines = outputString.Split(new[] { Environment.NewLine }, StringSplitOptions.None); + Assert.Equal(2, lines.Length); + + // If: I set \n (line feed) as seperator and write a row + requestParams.LineSeperator = "\n"; + output = new byte[8192]; + writer = new SaveAsCsvFileStreamWriter(new MemoryStream(output), requestParams); + using (writer) + { + writer.WriteRow(data, columns); + } + + // Then: + // ... It should have splitten the lines by \n + outputString = Encoding.UTF8.GetString(output).TrimEnd('\0', '\r', '\n'); + lines = outputString.Split(new[] { '\n' }, StringSplitOptions.None); + Assert.Equal(2, lines.Length); + + // If: I set \r\n (carriage return + line feed) as seperator and write a row + requestParams.LineSeperator = "\r\n"; + output = new byte[8192]; + writer = new SaveAsCsvFileStreamWriter(new MemoryStream(output), requestParams); + using (writer) + { + writer.WriteRow(data, columns); + } + + // Then: + // ... It should have splitten the lines by \r\n + outputString = Encoding.UTF8.GetString(output).TrimEnd('\0', '\r', '\n'); + lines = outputString.Split(new[] { "\r\n" }, StringSplitOptions.None); + Assert.Equal(2, lines.Length); + + } + + [Fact] + public void WriteRowWithCustomTextIdentifier() + { + // Setup: + // ... Create a request params that has a text identifier set say single quotation marks("'") then this text identifier should be used + // ... Create a set of data to write + // ... Create a memory location to store the data + var requestParams = new SaveResultsAsCsvRequestParams() + { + TextIdentifier = "\'", + Delimiter = ";" + }; + List data = new List + { + new DbCellValue { DisplayValue = "item;1" }, + new DbCellValue { DisplayValue = "item,2" }, + new DbCellValue { DisplayValue = "item\"3" }, + new DbCellValue { DisplayValue = "item\'4" } + }; + List columns = new List + { + new DbColumnWrapper(new TestDbColumn("column1")), + new DbColumnWrapper(new TestDbColumn("column2")), + new DbColumnWrapper(new TestDbColumn("column3")), + new DbColumnWrapper(new TestDbColumn("column4")) + }; + byte[] output = new byte[8192]; + + // If: I write a row + SaveAsCsvFileStreamWriter writer = new SaveAsCsvFileStreamWriter(new MemoryStream(output), requestParams); + using (writer) + { + writer.WriteRow(data, columns); + } + + // Then: + // ... It should have splitten the columns by delimiter, embedded in text identifier when field contains delimiter or the text identifier + string outputString = Encoding.UTF8.GetString(output).TrimEnd('\0', '\r', '\n'); + Assert.Equal("\'item;1\';item,2;item\"3;\'item\'\'4\'", outputString); + } + + [Fact] + public void WriteRowWithCustomEncoding() + { + // Setup: + // ... Create a request params that has custom delimiter say pipe("|") then this delimiter should be used + // ... Create a set of data to write + // ... Create a memory location to store the data + var requestParams = new SaveResultsAsCsvRequestParams + { + Encoding = "Windows-1252" + }; + List data = new List + { + new DbCellValue { DisplayValue = "ü" } + }; + List columns = new List + { + new DbColumnWrapper(new TestDbColumn("column1")) + }; + byte[] output = new byte[8192]; + + // If: I write a row + SaveAsCsvFileStreamWriter writer = new SaveAsCsvFileStreamWriter(new MemoryStream(output), requestParams); + using (writer) + { + writer.WriteRow(data, columns); + } + + // Then: + // ... It should have written the umlaut using the encoding Windows-1252 + Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); + string outputString = Encoding.GetEncoding("Windows-1252").GetString(output).TrimEnd('\0', '\r', '\n'); + Assert.Equal("ü", outputString); + + } + } }