Feature: Writing Execute Results to Temp File (#35)

* WIP for buffering in temporary file * Adding support for writing to disk for buffering * WIP - Adding file reader, factory for reader/writer * Making long list use generics and implement IEnumerable * Reading/Writing from file is working * Removing unused 'skipValue' logic * More tweaks to file buffer Adding logic for cleaning up the temp files Adding fix for empty/null column names * Adding comments and cleanup * Unit tests for FileStreamWrapper * WIP adding more unit tests, and finishing up wiring up the output writers * Finishing up initial unit tests * Fixing bugs with long fields * Squashed commit of the following: commit df0ffc12a46cb286d801d08689964eac08ad71dd Author: Benjamin Russell <beruss@microsoft.com> Date: Wed Sep 7 14:45:39 2016 -0700 Removing last bit of async for file writing. We're seeing a 8x improvement of file write speeds! commit 08a4b9f32e825512ca24d5dc03ef5acbf7cc6d94 Author: Benjamin Russell <beruss@microsoft.com> Date: Wed Sep 7 11:23:06 2016 -0700 Removing async wrappers * Rolling back test code for Program.cs * Changes as per code review * Fixing broken unit tests * More fixes for codereview
2026-01-16 09:35:36 -05:00 · 2016-09-08 17:55:11 -07:00
parent 903eab61d1
commit 8aa3d524fc
24 changed files with 4050 additions and 195 deletions
--- a/src/Microsoft.SqlTools.ServiceLayer/QueryExecution/ResultSet.cs
+++ b/src/Microsoft.SqlTools.ServiceLayer/QueryExecution/ResultSet.cs
@@ -1,4 +1,4 @@
-//
+// 
 // Copyright (c) Microsoft. All rights reserved.
 // Licensed under the MIT license. See LICENSE file in the project root for full license information.
 //
@@ -7,46 +7,130 @@ using System;
 using System.Collections.Generic;
 using System.Data.Common;
 using System.Linq;
+using System.Threading;
+using System.Threading.Tasks;
 using Microsoft.SqlTools.ServiceLayer.QueryExecution.Contracts;
+using Microsoft.SqlTools.ServiceLayer.QueryExecution.DataStorage;
+using Microsoft.SqlTools.ServiceLayer.Utility;

 namespace Microsoft.SqlTools.ServiceLayer.QueryExecution
 {
-    public class ResultSet
+    public class ResultSet : IDisposable
    {
-        public DbColumn[] Columns { get; set; }
+        #region Constants

-        public List<object[]> Rows { get; private set; }
+        private const int DefaultMaxCharsToStore = 65535; // 64 KB - QE default

-        public ResultSet()
-        {
-            Rows = new List<object[]>();
-        }
+        // xml is a special case so number of chars to store is usually greater than for other long types
+        private const int DefaultMaxXmlCharsToStore = 2097152; // 2 MB - QE default
+
+        #endregion
+
+        #region Member Variables

        /// <summary>
-        /// Add a row of data to the result set using a <see cref="DbDataReader"/> that has already
-        /// read in a row.
+        /// For IDisposable pattern, whether or not object has been disposed
        /// </summary>
-        /// <param name="reader">A <see cref="DbDataReader"/> that has already had a read performed</param>
-        public void AddRow(DbDataReader reader)
+        private bool disposed;
+
+        /// <summary>
+        /// The factory to use to get reading/writing handlers
+        /// </summary>
+        private readonly IFileStreamFactory fileStreamFactory;
+
+        /// <summary>
+        /// File stream reader that will be reused to make rapid-fire retrieval of result subsets
+        /// quick and low perf impact.
+        /// </summary>
+        private IFileStreamReader fileStreamReader;
+
+        /// <summary>
+        /// Whether or not the result set has been read in from the database
+        /// </summary>
+        private bool hasBeenRead;
+
+        /// <summary>
+        /// The name of the temporary file we're using to output these results in
+        /// </summary>
+        private readonly string outputFileName;
+
+        #endregion
+
+        /// <summary>
+        /// Creates a new result set and initializes its state
+        /// </summary>
+        /// <param name="reader">The reader from executing a query</param>
+        /// <param name="factory">Factory for creating a reader/writer</param>
+        public ResultSet(DbDataReader reader, IFileStreamFactory factory)
        {
-            List<object> row = new List<object>();
-            for (int i = 0; i < reader.FieldCount; ++i)
+            // Sanity check to make sure we got a reader
+            if (reader == null)
            {
-                row.Add(reader.GetValue(i));
+                throw new ArgumentNullException(nameof(reader), "Reader cannot be null");
            }
-            Rows.Add(row.ToArray());
+            DataReader = new StorageDataReader(reader);
+
+            // Initialize the storage
+            outputFileName = factory.CreateFile();
+            FileOffsets = new LongList<long>();
+
+            // Store the factory
+            fileStreamFactory = factory;
+            hasBeenRead = false;
        }

+        #region Properties
+
+        /// <summary>
+        /// The columns for this result set
+        /// </summary>
+        public DbColumnWrapper[] Columns { get; private set; }
+
+        /// <summary>
+        /// The reader to use for this resultset
+        /// </summary>
+        private StorageDataReader DataReader { get; set; }
+
+        /// <summary>
+        /// A list of offsets into the buffer file that correspond to where rows start
+        /// </summary>
+        private LongList<long> FileOffsets { get; set; }
+
+        /// <summary>
+        /// Maximum number of characters to store for a field
+        /// </summary>
+        public int MaxCharsToStore { get { return DefaultMaxCharsToStore; } }
+
+        /// <summary>
+        /// Maximum number of characters to store for an XML field
+        /// </summary>
+        public int MaxXmlCharsToStore { get { return DefaultMaxXmlCharsToStore; } }
+
+        /// <summary>
+        /// The number of rows for this result set
+        /// </summary>
+        public long RowCount { get; private set; }
+
+        #endregion
+
+        #region Public Methods
+
        /// <summary>
        /// Generates a subset of the rows from the result set
        /// </summary>
        /// <param name="startRow">The starting row of the results</param>
        /// <param name="rowCount">How many rows to retrieve</param>
        /// <returns>A subset of results</returns>
-        public ResultSetSubset GetSubset(int startRow, int rowCount)
+        public Task<ResultSetSubset> GetSubset(int startRow, int rowCount)
        {
+            // Sanity check to make sure that the results have been read beforehand
+            if (!hasBeenRead || fileStreamReader == null)
+            {
+                throw new InvalidOperationException("Cannot read subset unless the results have been read from the server");
+            }
+
            // Sanity check to make sure that the row and the row count are within bounds
-            if (startRow < 0 || startRow >= Rows.Count)
+            if (startRow < 0 || startRow >= RowCount)
            {
                throw new ArgumentOutOfRangeException(nameof(startRow), "Start row cannot be less than 0 " +
                                                                        "or greater than the number of rows in the resultset");
@@ -56,13 +140,79 @@ namespace Microsoft.SqlTools.ServiceLayer.QueryExecution
                throw new ArgumentOutOfRangeException(nameof(rowCount), "Row count must be a positive integer");
            }

-            // Retrieve the subset of the results as per the request
-            object[][] rows = Rows.Skip(startRow).Take(rowCount).ToArray();
-            return new ResultSetSubset
+            return Task.Factory.StartNew(() =>
            {
-                Rows = rows,
-                RowCount = rows.Length
-            };
+                // Figure out which rows we need to read back
+                IEnumerable<long> rowOffsets = FileOffsets.Skip(startRow).Take(rowCount);
+
+                // Iterate over the rows we need and process them into output
+                object[][] rows = rowOffsets.Select(rowOffset => fileStreamReader.ReadRow(rowOffset, Columns)).ToArray();
+
+                // Retrieve the subset of the results as per the request
+                return new ResultSetSubset
+                {
+                    Rows = rows,
+                    RowCount = rows.Length
+                };
+            });
        }
+
+        /// <summary>
+        /// Reads from the reader until there are no more results to read
+        /// </summary>
+        /// <param name="cancellationToken">Cancellation token for cancelling the query</param>
+        public async Task ReadResultToEnd(CancellationToken cancellationToken)
+        {
+            // Open a writer for the file
+            using (IFileStreamWriter fileWriter = fileStreamFactory.GetWriter(outputFileName, MaxCharsToStore, MaxXmlCharsToStore))
+            {
+                // If we can initialize the columns using the column schema, use that
+                if (!DataReader.DbDataReader.CanGetColumnSchema())
+                {
+                    throw new InvalidOperationException("Could not retrieve column schema for result set.");
+                }
+                Columns = DataReader.Columns;
+                long currentFileOffset = 0;
+
+                while (await DataReader.ReadAsync(cancellationToken))
+                {
+                    RowCount++;
+                    FileOffsets.Add(currentFileOffset);
+                    currentFileOffset += fileWriter.WriteRow(DataReader);
+                }
+            }
+
+            // Mark that result has been read
+            hasBeenRead = true;
+            fileStreamReader = fileStreamFactory.GetReader(outputFileName);
+        }
+
+        #endregion
+
+        #region IDisposable Implementation
+
+        public void Dispose()
+        {
+            Dispose(true);
+            GC.SuppressFinalize(this);
+        }
+
+        protected virtual void Dispose(bool disposing)
+        {
+            if (disposed)
+            {
+                return;
+            }
+
+            if (disposing)
+            {
+                fileStreamReader?.Dispose();
+                fileStreamFactory.DisposeFile(outputFileName);
+            }
+
+            disposed = true;
+        }
+
+        #endregion
    }
 }