From bf4911795f039b4366d5435aa0f09f09742754b6 Mon Sep 17 00:00:00 2001 From: Charles Gagnon Date: Mon, 3 Aug 2020 07:54:06 -0700 Subject: [PATCH] Add Notebook conversion logic (#1027) * Add logic to convert Notebook doc into SQL query * move classes * Add logic for SQL -> Notebook * Fix a few trimming issues --- .../Contracts/NotebookConvertContracts.cs | 6 +- .../NotebookConvert/Notebook.cs | 65 +++++++ .../NotebookConvert/NotebookConvertService.cs | 162 ++++++++++++++---- 3 files changed, 198 insertions(+), 35 deletions(-) create mode 100644 src/Microsoft.SqlTools.ServiceLayer/NotebookConvert/Notebook.cs diff --git a/src/Microsoft.SqlTools.ServiceLayer/NotebookConvert/Contracts/NotebookConvertContracts.cs b/src/Microsoft.SqlTools.ServiceLayer/NotebookConvert/Contracts/NotebookConvertContracts.cs index 71f85dd2..8b1b81d4 100644 --- a/src/Microsoft.SqlTools.ServiceLayer/NotebookConvert/Contracts/NotebookConvertContracts.cs +++ b/src/Microsoft.SqlTools.ServiceLayer/NotebookConvert/Contracts/NotebookConvertContracts.cs @@ -14,7 +14,7 @@ namespace Microsoft.SqlTools.ServiceLayer.NotebookConvert.Contracts /// /// The raw Notebook JSON content to convert /// - public string NotebookJson { get; set; } + public string Content; } @@ -23,7 +23,7 @@ namespace Microsoft.SqlTools.ServiceLayer.NotebookConvert.Contracts /// /// The raw SQL query content to display /// - public string content; + public string Content; } public class ConvertNotebookToSqlRequest @@ -50,7 +50,7 @@ namespace Microsoft.SqlTools.ServiceLayer.NotebookConvert.Contracts /// /// The raw Notebook JSON content to display /// - public string content; + public string Content; } public class ConvertSqlToNotebookRequest diff --git a/src/Microsoft.SqlTools.ServiceLayer/NotebookConvert/Notebook.cs b/src/Microsoft.SqlTools.ServiceLayer/NotebookConvert/Notebook.cs new file mode 100644 index 00000000..d339249b --- /dev/null +++ b/src/Microsoft.SqlTools.ServiceLayer/NotebookConvert/Notebook.cs @@ -0,0 +1,65 @@ +using Newtonsoft.Json; +using System; +using System.Collections.Generic; + +namespace Microsoft.SqlTools.ServiceLayer.NotebookConvert +{ + /// + /// Basic schema wrapper for parsing a Notebook document + /// + public class NotebookDocument + { + [JsonProperty("metadata")] + public NotebookMetadata NotebookMetadata; + [JsonProperty("nbformat_minor")] + public int NotebookFormatMinor = 2; + [JsonProperty("nbformat")] + public int NotebookFormat = 4; + [JsonProperty("cells")] + public IList Cells = new List(); + } + + public class NotebookMetadata + { + [JsonProperty("kernelspec")] + public NotebookKernelSpec KernelSpec; + [JsonProperty("language_info")] + public NotebookLanguageInfo LanguageInfo; + } + + public class NotebookKernelSpec + { + [JsonProperty("name")] + public string Name; + [JsonProperty("display_name")] + public string DisplayName; + [JsonProperty("language")] + public string Language; + } + + public class NotebookLanguageInfo + { + [JsonProperty("name")] + public string Name; + [JsonProperty("version")] + public string Version; + } + + /// + /// Cell of a Notebook document + /// + public class NotebookCell + { + public NotebookCell(string cellType, IList source) + { + this.CellType = cellType; + this.Source = source; + } + + [JsonProperty("cell_type")] + public string CellType; + + [JsonProperty("source")] + public IList Source; + } +} diff --git a/src/Microsoft.SqlTools.ServiceLayer/NotebookConvert/NotebookConvertService.cs b/src/Microsoft.SqlTools.ServiceLayer/NotebookConvert/NotebookConvertService.cs index 34ad2328..ff95447c 100644 --- a/src/Microsoft.SqlTools.ServiceLayer/NotebookConvert/NotebookConvertService.cs +++ b/src/Microsoft.SqlTools.ServiceLayer/NotebookConvert/NotebookConvertService.cs @@ -4,12 +4,18 @@ // using System; +using System.Collections.Generic; +using System.Linq; using System.Threading.Tasks; using Microsoft.SqlTools.Hosting.Protocol; using Microsoft.SqlTools.ServiceLayer.Hosting; using Microsoft.SqlTools.ServiceLayer.NotebookConvert.Contracts; using Microsoft.SqlTools.ServiceLayer.SqlContext; using Microsoft.SqlTools.ServiceLayer.Workspace; +using Newtonsoft.Json; +using Microsoft.SqlServer.TransactSql.ScriptDom; +using System.IO; +using Microsoft.SqlTools.ServiceLayer.NotebookConvert; namespace Microsoft.SqlTools.ServiceLayer.Agent { @@ -48,7 +54,7 @@ namespace Microsoft.SqlTools.ServiceLayer.Agent this.ServiceHost.SetRequestHandler(ConvertNotebookToSqlRequest.Type, HandleConvertNotebookToSqlRequest); this.ServiceHost.SetRequestHandler(ConvertSqlToNotebookRequest.Type, HandleConvertSqlToNotebookRequest); - + } @@ -60,8 +66,12 @@ namespace Microsoft.SqlTools.ServiceLayer.Agent { try { - var result = new ConvertNotebookToSqlResult(); - result.content = parameters.NotebookJson; + var notebookDoc = JsonConvert.DeserializeObject(parameters.Content); + + var result = new ConvertNotebookToSqlResult + { + Content = ConvertNotebookDocToSql(notebookDoc) + }; await requestContext.SendResult(result); } catch (Exception e) @@ -75,41 +85,14 @@ namespace Microsoft.SqlTools.ServiceLayer.Agent { await Task.Run(async () => { - + try { var file = WorkspaceService.Instance.Workspace.GetFile(parameters.ClientUri); // Temporary notebook that we just fill in with the sql until the parsing logic is added var result = new ConvertSqlToNotebookResult { - content = $@"{{ - ""metadata"": {{ - ""kernelspec"": {{ - ""name"": ""SQL"", - ""display_name"": ""SQL"", - ""language"": ""sql"" - }}, - ""language_info"": {{ - ""name"": ""sql"", - ""version"": """" - }} - }}, - ""nbformat_minor"": 2, - ""nbformat"": 4, - ""cells"": [ - {{ - ""cell_type"": ""code"", - ""source"": [ - ""{file.Contents}"" - ], - ""metadata"": {{ - ""azdata_cell_guid"": ""477da394-51fd-45ab-8a37-387b47b2b692"" - }}, - ""outputs"": [], - ""execution_count"": null - }} - ] -}}" + Content = JsonConvert.SerializeObject(ConvertSqlToNotebook(file.Contents)) }; await requestContext.SendResult(result); } @@ -122,5 +105,120 @@ namespace Microsoft.SqlTools.ServiceLayer.Agent #endregion // Convert Handlers + private static NotebookDocument ConvertSqlToNotebook(string sql) + { + // Notebooks use \n so convert any other newlines now + sql = sql.Replace("\r\n", "\n"); + + var doc = new NotebookDocument + { + NotebookMetadata = new NotebookMetadata() + { + KernelSpec = new NotebookKernelSpec() + { + Name = "SQL", + DisplayName = "SQL", + Language = "sql" + }, + LanguageInfo = new NotebookLanguageInfo() + { + Name = "sql", + Version = "" + } + } + }; + var parser = new TSql150Parser(false); + IList errors = new List(); + var tokens = parser.GetTokenStream(new StringReader(sql), out errors); + + /** + * Split the text into separate chunks - blocks of Mutliline comments and blocks + * of everything else. We then create a markdown cell for each multiline comment and a code + * cell for the other blocks. + */ + var multilineComments = tokens.Where(token => token.TokenType == TSqlTokenType.MultilineComment); + + int currentIndex = 0; + int codeLength = 0; + string codeBlock = ""; + foreach (var comment in multilineComments) + { + // The code blocks are everything since the end of the last comment block up to the + // start of the next comment block + codeLength = comment.Offset - currentIndex; + codeBlock = sql.Substring(currentIndex, codeLength).Trim(); + if (!string.IsNullOrEmpty(codeBlock)) + { + doc.Cells.Add(GenerateCodeCell(codeBlock)); + } + + string commentBlock = comment.Text.Trim(); + // Trim off the starting /* and ending */ + commentBlock = commentBlock.Remove(0, 2); + commentBlock = commentBlock.Remove(commentBlock.Length - 2); + doc.Cells.Add(GenerateMarkdownCell(commentBlock.Trim())); + + currentIndex = comment.Offset + comment.Text.Length; + } + + // Add any remaining text in a final code block + codeLength = sql.Length - currentIndex; + codeBlock = sql.Substring(currentIndex, codeLength).Trim(); + if (!string.IsNullOrEmpty(codeBlock)) + { + doc.Cells.Add(GenerateCodeCell(codeBlock)); + } + + return doc; + } + + private static NotebookCell GenerateCodeCell(string contents) + { + // Each line is a separate entry in the contents array so split that now, but + // Notebooks still expect each line to end with a newline so keep that + var contentsArray = contents + .Split('\n') + .Select(line => $"{line}\n") + .ToList(); + // Last line shouldn't have a newline + contentsArray[^1] = contentsArray[^1].TrimEnd(); + return new NotebookCell("code", contentsArray); + } + + private static NotebookCell GenerateMarkdownCell(string contents) + { + // Each line is a separate entry in the contents array so split that now, but + // Notebooks still expect each line to end with a newline so keep that. + // In addition - markdown newlines have to be prefixed by 2 spaces + var contentsArray = contents + .Split('\n') + .Select(line => $"{line} \n") + .ToList(); + // Last line shouldn't have a newline + contentsArray[^1] = contentsArray[^1].TrimEnd(); + return new NotebookCell("markdown", contentsArray); + } + + /// + /// Converts a Notebook document into a single string that can be inserted into a SQL + /// query. + /// + private static string ConvertNotebookDocToSql(NotebookDocument doc) + { + // Add an extra blank line between each block for readability + return string.Join(Environment.NewLine + Environment.NewLine, doc.Cells.Select(cell => + { + return cell.CellType switch + { + // Markdown is text so wrapped in a comment block + "markdown" => $@"/* +{string.Join(Environment.NewLine, cell.Source)} +*/", + // Everything else (just code blocks for now) is left as is + _ => string.Join("", cell.Source), + }; + })); + } } + }