Feature: Writing Execute Results to Temp File (#35)

* WIP for buffering in temporary file

* Adding support for writing to disk for buffering

* WIP - Adding file reader, factory for reader/writer

* Making long list use generics and implement IEnumerable

* Reading/Writing from file is working

* Removing unused 'skipValue' logic

* More tweaks to file buffer

Adding logic for cleaning up the temp files
Adding fix for empty/null column names

* Adding comments and cleanup

* Unit tests for FileStreamWrapper

* WIP adding more unit tests, and finishing up wiring up the output writers

* Finishing up initial unit tests

* Fixing bugs with long fields

* Squashed commit of the following:

commit df0ffc12a46cb286d801d08689964eac08ad71dd
Author: Benjamin Russell <beruss@microsoft.com>
Date:   Wed Sep 7 14:45:39 2016 -0700

    Removing last bit of async for file writing.

    We're seeing a 8x improvement of file write speeds!

commit 08a4b9f32e825512ca24d5dc03ef5acbf7cc6d94
Author: Benjamin Russell <beruss@microsoft.com>
Date:   Wed Sep 7 11:23:06 2016 -0700

    Removing async wrappers

* Rolling back test code for Program.cs

* Changes as per code review

* Fixing broken unit tests

* More fixes for codereview
This commit is contained in:
Benjamin Russell
2016-09-08 17:55:11 -07:00
committed by GitHub
parent 903eab61d1
commit 8aa3d524fc
24 changed files with 4050 additions and 195 deletions

View File

@@ -1,4 +1,4 @@
//
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
//
@@ -7,46 +7,130 @@ using System;
using System.Collections.Generic;
using System.Data.Common;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.SqlTools.ServiceLayer.QueryExecution.Contracts;
using Microsoft.SqlTools.ServiceLayer.QueryExecution.DataStorage;
using Microsoft.SqlTools.ServiceLayer.Utility;
namespace Microsoft.SqlTools.ServiceLayer.QueryExecution
{
public class ResultSet
public class ResultSet : IDisposable
{
public DbColumn[] Columns { get; set; }
#region Constants
public List<object[]> Rows { get; private set; }
private const int DefaultMaxCharsToStore = 65535; // 64 KB - QE default
public ResultSet()
{
Rows = new List<object[]>();
}
// xml is a special case so number of chars to store is usually greater than for other long types
private const int DefaultMaxXmlCharsToStore = 2097152; // 2 MB - QE default
#endregion
#region Member Variables
/// <summary>
/// Add a row of data to the result set using a <see cref="DbDataReader"/> that has already
/// read in a row.
/// For IDisposable pattern, whether or not object has been disposed
/// </summary>
/// <param name="reader">A <see cref="DbDataReader"/> that has already had a read performed</param>
public void AddRow(DbDataReader reader)
private bool disposed;
/// <summary>
/// The factory to use to get reading/writing handlers
/// </summary>
private readonly IFileStreamFactory fileStreamFactory;
/// <summary>
/// File stream reader that will be reused to make rapid-fire retrieval of result subsets
/// quick and low perf impact.
/// </summary>
private IFileStreamReader fileStreamReader;
/// <summary>
/// Whether or not the result set has been read in from the database
/// </summary>
private bool hasBeenRead;
/// <summary>
/// The name of the temporary file we're using to output these results in
/// </summary>
private readonly string outputFileName;
#endregion
/// <summary>
/// Creates a new result set and initializes its state
/// </summary>
/// <param name="reader">The reader from executing a query</param>
/// <param name="factory">Factory for creating a reader/writer</param>
public ResultSet(DbDataReader reader, IFileStreamFactory factory)
{
List<object> row = new List<object>();
for (int i = 0; i < reader.FieldCount; ++i)
// Sanity check to make sure we got a reader
if (reader == null)
{
row.Add(reader.GetValue(i));
throw new ArgumentNullException(nameof(reader), "Reader cannot be null");
}
Rows.Add(row.ToArray());
DataReader = new StorageDataReader(reader);
// Initialize the storage
outputFileName = factory.CreateFile();
FileOffsets = new LongList<long>();
// Store the factory
fileStreamFactory = factory;
hasBeenRead = false;
}
#region Properties
/// <summary>
/// The columns for this result set
/// </summary>
public DbColumnWrapper[] Columns { get; private set; }
/// <summary>
/// The reader to use for this resultset
/// </summary>
private StorageDataReader DataReader { get; set; }
/// <summary>
/// A list of offsets into the buffer file that correspond to where rows start
/// </summary>
private LongList<long> FileOffsets { get; set; }
/// <summary>
/// Maximum number of characters to store for a field
/// </summary>
public int MaxCharsToStore { get { return DefaultMaxCharsToStore; } }
/// <summary>
/// Maximum number of characters to store for an XML field
/// </summary>
public int MaxXmlCharsToStore { get { return DefaultMaxXmlCharsToStore; } }
/// <summary>
/// The number of rows for this result set
/// </summary>
public long RowCount { get; private set; }
#endregion
#region Public Methods
/// <summary>
/// Generates a subset of the rows from the result set
/// </summary>
/// <param name="startRow">The starting row of the results</param>
/// <param name="rowCount">How many rows to retrieve</param>
/// <returns>A subset of results</returns>
public ResultSetSubset GetSubset(int startRow, int rowCount)
public Task<ResultSetSubset> GetSubset(int startRow, int rowCount)
{
// Sanity check to make sure that the results have been read beforehand
if (!hasBeenRead || fileStreamReader == null)
{
throw new InvalidOperationException("Cannot read subset unless the results have been read from the server");
}
// Sanity check to make sure that the row and the row count are within bounds
if (startRow < 0 || startRow >= Rows.Count)
if (startRow < 0 || startRow >= RowCount)
{
throw new ArgumentOutOfRangeException(nameof(startRow), "Start row cannot be less than 0 " +
"or greater than the number of rows in the resultset");
@@ -56,13 +140,79 @@ namespace Microsoft.SqlTools.ServiceLayer.QueryExecution
throw new ArgumentOutOfRangeException(nameof(rowCount), "Row count must be a positive integer");
}
// Retrieve the subset of the results as per the request
object[][] rows = Rows.Skip(startRow).Take(rowCount).ToArray();
return new ResultSetSubset
return Task.Factory.StartNew(() =>
{
Rows = rows,
RowCount = rows.Length
};
// Figure out which rows we need to read back
IEnumerable<long> rowOffsets = FileOffsets.Skip(startRow).Take(rowCount);
// Iterate over the rows we need and process them into output
object[][] rows = rowOffsets.Select(rowOffset => fileStreamReader.ReadRow(rowOffset, Columns)).ToArray();
// Retrieve the subset of the results as per the request
return new ResultSetSubset
{
Rows = rows,
RowCount = rows.Length
};
});
}
/// <summary>
/// Reads from the reader until there are no more results to read
/// </summary>
/// <param name="cancellationToken">Cancellation token for cancelling the query</param>
public async Task ReadResultToEnd(CancellationToken cancellationToken)
{
// Open a writer for the file
using (IFileStreamWriter fileWriter = fileStreamFactory.GetWriter(outputFileName, MaxCharsToStore, MaxXmlCharsToStore))
{
// If we can initialize the columns using the column schema, use that
if (!DataReader.DbDataReader.CanGetColumnSchema())
{
throw new InvalidOperationException("Could not retrieve column schema for result set.");
}
Columns = DataReader.Columns;
long currentFileOffset = 0;
while (await DataReader.ReadAsync(cancellationToken))
{
RowCount++;
FileOffsets.Add(currentFileOffset);
currentFileOffset += fileWriter.WriteRow(DataReader);
}
}
// Mark that result has been read
hasBeenRead = true;
fileStreamReader = fileStreamFactory.GetReader(outputFileName);
}
#endregion
#region IDisposable Implementation
public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}
protected virtual void Dispose(bool disposing)
{
if (disposed)
{
return;
}
if (disposing)
{
fileStreamReader?.Dispose();
fileStreamFactory.DisposeFile(outputFileName);
}
disposed = true;
}
#endregion
}
}