Added more options to SaveResultsAsCsvRequestParams (#669)

Fixes Microsoft/sqlopsstudio#203
Options:
LineSeperator
TextIdentifier
Encoding
This commit is contained in:
Sebastian Pfliegel
2018-08-01 21:15:11 +02:00
committed by Karl Burtram
parent d09810980a
commit 84f62a3e85
5 changed files with 226 additions and 22 deletions

View File

@@ -19,11 +19,13 @@
</ItemGroup>
<ItemGroup>
<PackageReference Include="System.Data.SqlClient" Version="4.5.0-preview2-26406-04" />
<PackageReference Include="Microsoft.SqlServer.Management.XEvent" Version="140.17279.0-xplat" />
<PackageReference Include="Microsoft.SqlServer.Management.XEventDBScoped" Version="140.17279.0-xplat" />
<PackageReference Include="Microsoft.SqlServer.Management.XEvent" Version="140.17279.0-xplat" />
<PackageReference Include="Microsoft.SqlServer.Management.XEventDBScoped" Version="140.17279.0-xplat" />
<PackageReference Include="Microsoft.SqlServer.Management.SmoMetadataProvider" Version="140.17279.0-xplat" />
<PackageReference Include="Microsoft.SqlServer.Management.SqlScriptPublishModel" Version="140.17279.0-xplat" />
<PackageReference Include="Microsoft.SqlServer.SqlParser" Version="140.17279.0-xplat" /> </ItemGroup>
<PackageReference Include="Microsoft.SqlServer.SqlParser" Version="140.17279.0-xplat" />
<PackageReference Include="System.Text.Encoding.CodePages" Version="4.5.0-preview2-26406-04" />
</ItemGroup>
<ItemGroup>
<Compile Include="**\*.cs" />
</ItemGroup>

View File

@@ -79,9 +79,24 @@ namespace Microsoft.SqlTools.ServiceLayer.QueryExecution.Contracts
public bool IncludeHeaders { get; set; }
/// <summary>
/// Delimeter for separating data items in CSV
/// Delimiter for separating data items in CSV
/// </summary>
public string Delimiter { get; set; }
/// <summary>
/// either CR, CRLF or LF to seperate rows in CSV
/// </summary>
public string LineSeperator { get; set; }
/// <summary>
/// Text identifier for alphanumeric columns in CSV
/// </summary>
public string TextIdentifier { get; set; }
/// <summary>
/// Encoding of the CSV file
/// </summary>
public string Encoding { get; set; }
}
/// <summary>

View File

@@ -48,23 +48,57 @@ namespace Microsoft.SqlTools.ServiceLayer.QueryExecution.DataStorage
/// </param>
public override void WriteRow(IList<DbCellValue> row, IList<DbColumnWrapper> columns)
{
string delimiter = ",";
char delimiter = ',';
if(!string.IsNullOrEmpty(saveParams.Delimiter))
{
delimiter = saveParams.Delimiter;
}
// first char in string
delimiter = saveParams.Delimiter[0];
}
string lineSeperator = Environment.NewLine;
if(!string.IsNullOrEmpty(saveParams.LineSeperator))
{
lineSeperator = saveParams.LineSeperator;
}
char textIdentifier = '"';
if(!string.IsNullOrEmpty(saveParams.TextIdentifier))
{
// first char in string
textIdentifier = saveParams.TextIdentifier[0];
}
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
int codepage;
Encoding encoding;
try
{
if(int.TryParse(saveParams.Encoding, out codepage))
{
encoding = Encoding.GetEncoding(codepage);
}
else
{
encoding = Encoding.GetEncoding(saveParams.Encoding);
}
}
catch
{
// Fallback encoding when specified codepage is invalid
encoding = Encoding.GetEncoding("utf-8");
}
// Write out the header if we haven't already and the user chose to have it
if (saveParams.IncludeHeaders && !headerWritten)
{
// Build the string
var selectedColumns = columns.Skip(ColumnStartIndex ?? 0).Take(ColumnCount ?? columns.Count)
.Select(c => EncodeCsvField(c.ColumnName) ?? string.Empty);
.Select(c => EncodeCsvField(c.ColumnName, delimiter, textIdentifier) ?? string.Empty);
string headerLine = string.Join(delimiter, selectedColumns);
// Encode it and write it out
byte[] headerBytes = Encoding.UTF8.GetBytes(headerLine + Environment.NewLine);
byte[] headerBytes = encoding.GetBytes(headerLine + lineSeperator);
FileStream.Write(headerBytes, 0, headerBytes.Length);
headerWritten = true;
@@ -73,11 +107,11 @@ namespace Microsoft.SqlTools.ServiceLayer.QueryExecution.DataStorage
// Build the string for the row
var selectedCells = row.Skip(ColumnStartIndex ?? 0)
.Take(ColumnCount ?? columns.Count)
.Select(c => EncodeCsvField(c.DisplayValue));
.Select(c => EncodeCsvField(c.DisplayValue, delimiter, textIdentifier));
string rowLine = string.Join(delimiter, selectedCells);
// Encode it and write it out
byte[] rowBytes = Encoding.UTF8.GetBytes(rowLine + Environment.NewLine);
byte[] rowBytes = encoding.GetBytes(rowLine + lineSeperator);
FileStream.Write(rowBytes, 0, rowBytes.Length);
}
@@ -98,8 +132,10 @@ namespace Microsoft.SqlTools.ServiceLayer.QueryExecution.DataStorage
/// </summary>
/// <param name="field">The field to encode</param>
/// <returns>The CSV encoded version of the original field</returns>
internal static string EncodeCsvField(string field)
internal static string EncodeCsvField(string field, char delimiter, char textIdentifier)
{
string strTextIdentifier = textIdentifier.ToString();
// Special case for nulls
if (field == null)
{
@@ -107,16 +143,16 @@ namespace Microsoft.SqlTools.ServiceLayer.QueryExecution.DataStorage
}
// Whether this field has special characters which require it to be embedded in quotes
bool embedInQuotes = field.IndexOfAny(new[] {',', '\r', '\n', '"'}) >= 0 // Contains special characters
bool embedInQuotes = field.IndexOfAny(new[] { delimiter, '\r', '\n', textIdentifier }) >= 0 // Contains special characters
|| field.StartsWith(" ") || field.EndsWith(" ") // Start/Ends with space
|| field.StartsWith("\t") || field.EndsWith("\t"); // Starts/Ends with tab
//Replace all quotes in the original field with double quotes
string ret = field.Replace("\"", "\"\"");
string ret = field.Replace(strTextIdentifier, strTextIdentifier + strTextIdentifier);
if (embedInQuotes)
{
ret = $"\"{ret}\"";
ret = strTextIdentifier + $"{ret}" + strTextIdentifier;
}
return ret;

View File

@@ -13,10 +13,10 @@
<PackageReference Include="xunit" Version="2.2.0" />
<PackageReference Include="xunit.runner.visualstudio" Version="2.2.0" />
<PackageReference Include="System.Data.SqlClient" Version="4.5.0-preview2-26406-04" />
<PackageReference Include="System.Text.Encoding.CodePages" Version="4.5.0-preview2-26406-04" />
<PackageReference Include="Microsoft.SqlServer.Management.SmoMetadataProvider" Version="140.17279.0-xplat" />
<PackageReference Include="Microsoft.SqlServer.Management.SqlScriptPublishModel" Version="140.17279.0-xplat" />
<PackageReference Include="Microsoft.SqlServer.SqlParser" Version="140.17279.0-xplat" />
</ItemGroup>
<ItemGroup>
<Reference Include="Newtonsoft.Json">

View File

@@ -30,7 +30,7 @@ namespace Microsoft.SqlTools.ServiceLayer.UnitTests.QueryExecution.DataStorage
public void EncodeCsvFieldShouldWrap(string field)
{
// If: I CSV encode a field that has forbidden characters in it
string output = SaveAsCsvFileStreamWriter.EncodeCsvField(field);
string output = SaveAsCsvFileStreamWriter.EncodeCsvField(field, ',', '\"');
// Then: It should wrap it in quotes
Assert.True(Regex.IsMatch(output, "^\".*")
@@ -44,7 +44,7 @@ namespace Microsoft.SqlTools.ServiceLayer.UnitTests.QueryExecution.DataStorage
public void EncodeCsvFieldShouldNotWrap(string field)
{
// If: I CSV encode a field that does not have forbidden characters in it
string output = SaveAsCsvFileStreamWriter.EncodeCsvField(field);
string output = SaveAsCsvFileStreamWriter.EncodeCsvField(field, ',', '\"');
// Then: It should not wrap it in quotes
Assert.False(Regex.IsMatch(output, "^\".*\"$"));
@@ -54,7 +54,7 @@ namespace Microsoft.SqlTools.ServiceLayer.UnitTests.QueryExecution.DataStorage
public void EncodeCsvFieldReplace()
{
// If: I CSV encode a field that has a double quote in it,
string output = SaveAsCsvFileStreamWriter.EncodeCsvField("Some\"thing");
string output = SaveAsCsvFileStreamWriter.EncodeCsvField("Some\"thing", ',', '\"');
// Then: It should be replaced with double double quotes
Assert.Equal("\"Some\"\"thing\"", output);
@@ -64,7 +64,7 @@ namespace Microsoft.SqlTools.ServiceLayer.UnitTests.QueryExecution.DataStorage
public void EncodeCsvFieldNull()
{
// If: I CSV encode a null
string output = SaveAsCsvFileStreamWriter.EncodeCsvField(null);
string output = SaveAsCsvFileStreamWriter.EncodeCsvField(null, ',', '\"');
// Then: there should be a string version of null returned
Assert.Equal("NULL", output);
@@ -214,10 +214,10 @@ namespace Microsoft.SqlTools.ServiceLayer.UnitTests.QueryExecution.DataStorage
}
[Fact]
public void WriteRowWithCustomDelimeters()
public void WriteRowWithCustomDelimiters()
{
// Setup:
// ... Create a request params that has custom delimeter say pipe("|") then this delimeter should be used
// ... Create a request params that has custom delimiter say pipe("|") then this delimiter should be used
// ... Create a set of data to write
// ... Create a memory location to store the data
var requestParams = new SaveResultsAsCsvRequestParams
@@ -261,5 +261,156 @@ namespace Microsoft.SqlTools.ServiceLayer.UnitTests.QueryExecution.DataStorage
// Note: No need to check values, it is done as part of the previous tests
}
[Fact]
public void WriteRowsWithCustomLineSeperator()
{
// Setup:
// ... Create a request params that has custom line seperator then this seperator should be used
// ... Create a set of data to write
// ... Create a memory location to store the data
var requestParams = new SaveResultsAsCsvRequestParams
{
IncludeHeaders = true
};
List<DbCellValue> data = new List<DbCellValue>
{
new DbCellValue { DisplayValue = "item1" },
new DbCellValue { DisplayValue = "item2" }
};
List<DbColumnWrapper> columns = new List<DbColumnWrapper>
{
new DbColumnWrapper(new TestDbColumn("column1")),
new DbColumnWrapper(new TestDbColumn("column2"))
};
byte[] output;
string outputString;
string[] lines;
SaveAsCsvFileStreamWriter writer;
// If: I set default seperator and write a row
requestParams.LineSeperator = null;
output = new byte[8192];
writer = new SaveAsCsvFileStreamWriter(new MemoryStream(output), requestParams);
using (writer)
{
writer.WriteRow(data, columns);
}
// Then:
// ... It should have splitten the lines by system's default line seperator
outputString = Encoding.UTF8.GetString(output).TrimEnd('\0', '\r', '\n');
lines = outputString.Split(new[] { Environment.NewLine }, StringSplitOptions.None);
Assert.Equal(2, lines.Length);
// If: I set \n (line feed) as seperator and write a row
requestParams.LineSeperator = "\n";
output = new byte[8192];
writer = new SaveAsCsvFileStreamWriter(new MemoryStream(output), requestParams);
using (writer)
{
writer.WriteRow(data, columns);
}
// Then:
// ... It should have splitten the lines by \n
outputString = Encoding.UTF8.GetString(output).TrimEnd('\0', '\r', '\n');
lines = outputString.Split(new[] { '\n' }, StringSplitOptions.None);
Assert.Equal(2, lines.Length);
// If: I set \r\n (carriage return + line feed) as seperator and write a row
requestParams.LineSeperator = "\r\n";
output = new byte[8192];
writer = new SaveAsCsvFileStreamWriter(new MemoryStream(output), requestParams);
using (writer)
{
writer.WriteRow(data, columns);
}
// Then:
// ... It should have splitten the lines by \r\n
outputString = Encoding.UTF8.GetString(output).TrimEnd('\0', '\r', '\n');
lines = outputString.Split(new[] { "\r\n" }, StringSplitOptions.None);
Assert.Equal(2, lines.Length);
}
[Fact]
public void WriteRowWithCustomTextIdentifier()
{
// Setup:
// ... Create a request params that has a text identifier set say single quotation marks("'") then this text identifier should be used
// ... Create a set of data to write
// ... Create a memory location to store the data
var requestParams = new SaveResultsAsCsvRequestParams()
{
TextIdentifier = "\'",
Delimiter = ";"
};
List<DbCellValue> data = new List<DbCellValue>
{
new DbCellValue { DisplayValue = "item;1" },
new DbCellValue { DisplayValue = "item,2" },
new DbCellValue { DisplayValue = "item\"3" },
new DbCellValue { DisplayValue = "item\'4" }
};
List<DbColumnWrapper> columns = new List<DbColumnWrapper>
{
new DbColumnWrapper(new TestDbColumn("column1")),
new DbColumnWrapper(new TestDbColumn("column2")),
new DbColumnWrapper(new TestDbColumn("column3")),
new DbColumnWrapper(new TestDbColumn("column4"))
};
byte[] output = new byte[8192];
// If: I write a row
SaveAsCsvFileStreamWriter writer = new SaveAsCsvFileStreamWriter(new MemoryStream(output), requestParams);
using (writer)
{
writer.WriteRow(data, columns);
}
// Then:
// ... It should have splitten the columns by delimiter, embedded in text identifier when field contains delimiter or the text identifier
string outputString = Encoding.UTF8.GetString(output).TrimEnd('\0', '\r', '\n');
Assert.Equal("\'item;1\';item,2;item\"3;\'item\'\'4\'", outputString);
}
[Fact]
public void WriteRowWithCustomEncoding()
{
// Setup:
// ... Create a request params that has custom delimiter say pipe("|") then this delimiter should be used
// ... Create a set of data to write
// ... Create a memory location to store the data
var requestParams = new SaveResultsAsCsvRequestParams
{
Encoding = "Windows-1252"
};
List<DbCellValue> data = new List<DbCellValue>
{
new DbCellValue { DisplayValue = "ü" }
};
List<DbColumnWrapper> columns = new List<DbColumnWrapper>
{
new DbColumnWrapper(new TestDbColumn("column1"))
};
byte[] output = new byte[8192];
// If: I write a row
SaveAsCsvFileStreamWriter writer = new SaveAsCsvFileStreamWriter(new MemoryStream(output), requestParams);
using (writer)
{
writer.WriteRow(data, columns);
}
// Then:
// ... It should have written the umlaut using the encoding Windows-1252
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
string outputString = Encoding.GetEncoding("Windows-1252").GetString(output).TrimEnd('\0', '\r', '\n');
Assert.Equal("ü", outputString);
}
}
}