azuredatastudio/extensions/notebook/resources/notebooks/JupyterBooksCreate.ipynb

{
    "metadata": {
        "kernelspec": {
            "name": "python3",
            "display_name": "Python 3"
        },
        "language_info": {
            "name": "python",
            "version": "3.7.2",
            "mimetype": "text/x-python",
            "codemirror_mode": {
                "name": "ipython",
                "version": 3
            },
            "pygments_lexer": "ipython3",
            "nbconvert_exporter": "python",
            "file_extension": ".py"
        }
    },
    "nbformat_minor": 2,
    "nbformat": 4,
    "cells": [
        {
            "cell_type": "markdown",
            "source": [
                "# Create Jupyter Book\n",
                "\n",
                "## 1. Installation\n",
                "\n",
                "To install the Jupyter Book command-line interface (CLI), use `pip`!"
            ],
            "metadata": {
                "azdata_cell_guid": "97541c75-b1c9-4e4c-9f0a-f93df4a550ef"
            }
        },
        {
            "cell_type": "code",
            "source": [
                "import sys\r\n",
                "\r\n",
                "#install jupyter-book\r\n",
                "cmd = f'{sys.executable} -m pip show jupyter-book'\r\n",
                "cmdOutput = !{cmd}\r\n",
                "if len(cmdOutput) > 1 and '0.6.4' in cmdOutput[1]:\r\n",
                "    print('Jupyter-book required version is already installed!')\r\n",
                "else:\r\n",
                "    !pip install jupyter-book"
            ],
            "metadata": {
                "azdata_cell_guid": "8bd77173-2f63-4bf8-95e8-af2a654fc91e",
                "tags": []
            },
            "outputs": [],
            "execution_count": null
        },
        {
            "cell_type": "markdown",
            "source": [
                "## 2. Create a new book\r\n",
                "\r\n",
                "Create a book using your own notebooks and markdown pages:\r\n",
                "\r\n",
                "<span style=\"color:red\">Note:</span> Notebook and markdown filenames cannot contain spaces"
            ],
            "metadata": {
                "azdata_cell_guid": "6a1b6bb8-9cb8-43d5-878f-2029d1eacb0e"
            }
        },
        {
            "cell_type": "code",
            "source": [
                "import os, re, shutil\r\n",
                "\r\n",
                "try:\r\n",
                "    overwrite = False\r\n",
                "    book_name = input('Please provide the path where the book needs to be saved along with the book name ex-> D:\\Book1: ') \r\n",
                "\r\n",
                "    if (os.path.exists(book_name)):\r\n",
                "        new_book_name = input('A folder named ' + book_name + ' already exists. Enter a new name or the same name to overwrite the existing folder.\\n')\r\n",
                "        if book_name == new_book_name:\r\n",
                "            overwrite = True\r\n",
                "        book_name = new_book_name\r\n",
                "\r\n",
                "    content_folder = input('Please provide the path to your folder containing notebooks and markdown files: ')\r\n",
                "\r\n",
                "    while (not os.path.exists(content_folder)):\r\n",
                "        content_folder = input('Cannot find folder ' + content_folder + '. Please provide another path: ')\r\n",
                "    \r\n",
                "    if overwrite:\r\n",
                "        !jupyter-book create \"$book_name\" --content-folder \"$content_folder\" --overwrite\r\n",
                "    else:\r\n",
                "        !jupyter-book create \"$book_name\" --content-folder \"$content_folder\"\r\n",
                "except Exception as e:\r\n",
                "    raise SystemExit(str(e))"
            ],
            "metadata": {
                "azdata_cell_guid": "d1a363f0-d854-4466-be87-d01d4c7e51ef",
                "tags": []
            },
            "outputs": [],
            "execution_count": null
        },
        {
            "cell_type": "markdown",
            "source": [
                "## 3. Format toc.yml file of the book\r\n",
                "\r\n",
                "Create a valid toc.yml file that enables your book to be opened on our viewlet.\r\n",
                "\r\n",
                "<span style=\"color:red\">Note:</span> Skipping this step may result in a malformed book being generated."
            ],
            "metadata": {
                "azdata_cell_guid": "5a29e2dd-d9f5-4dc1-87bc-bce639362f51"
            }
        },
        {
            "cell_type": "code",
            "source": [
                "# Update toc file, book title and clean up the directores\n",
                "\n",
                "from os import path\n",
                "tocFilePath = path.join(book_name, \"_data\", \"toc.yml\")\n",
                "f = open(tocFilePath, \"r\")\n",
                "title = ''\n",
                "replacedString = ''\n",
                "result = f.read()\n",
                "f.close()\n",
                "contentFolders = []\n",
                "\n",
                "firstLevelUrls = re.findall(r'^(?:\\s+$[\\r\\n]+)+(\\- url: [a-zA-Z0-9\\\\.\\s\\-\\/]+$[\\r\\n]+)', result, re.MULTILINE)\n",
                "urls = re.findall(r'- url: [a-zA-Z0-9\\\\.\\s\\-\\/]+$', result, re.MULTILINE)\n",
                "headers = re.findall(r'- header: [a-zA-Z0-9\\\\.\\s-]+$', result, re.MULTILINE)\n",
                "# all the markdown urls are placed at the end of the list. \n",
                "possibleMarkdowns = re.findall(r'(- url: [a-zA-Z0-9\\\\.\\s\\-\\/]+$[\\r\\n]+)[\\r\\n]', result, re.MULTILINE)\n",
                "\n",
                "def getMarkdownFile(url):\n",
                "    # url are usually defined in toc as: \"- url: <Path\\to\\notebookFile>\"\n",
                "    # substring from 7th postion excluding the \"- url: \" from the path\n",
                "    #folders = url[7:].rstrip().split(path.sep)\n",
                "    if (os.name == 'nt'):\n",
                "        # if windows get the drive letter and add it to the url\n",
                "        driveletter = content_folder.split(path.sep)[0]\n",
                "        markdownFilePath = path.join(driveletter, url[7:].rstrip()+'.md')\n",
                "    else:\n",
                "        markdownFilePath = url[7:].rstrip() + '.md'\n",
                "    return markdownFilePath\n",
                "\n",
                "try:\n",
                "    if (firstLevelUrls or headers or urls):\n",
                "        if (firstLevelUrls and len(firstLevelUrls) == 1):\n",
                "            for url in firstLevelUrls:\n",
                "                # check first link is the markdown\n",
                "                title = url[url.rindex(path.sep)+1:].rstrip()\n",
                "                markdownFilePath = getMarkdownFile(url)\n",
                "                rootmarkdownExists = path.exists(markdownFilePath)\n",
                "                markdownUrl = urls[len(urls) -1]\n",
                "                if (rootmarkdownExists):\n",
                "                    markdownUrl = url\n",
                "                    replacedString = \"\\n- title: %s\\n  url: /%s\\n  not_numbered: true\\n\" % (title, title)\n",
                "                # else check if the last link in the url list is markdown    \n",
                "                elif (not headers and path.exists(getMarkdownFile(markdownUrl))):\n",
                "                    rootmarkdownExists = True\n",
                "                    title = markdownUrl[markdownUrl.rindex(path.sep)+1:].rstrip()\n",
                "                    replacedString = \"\\n- title: %s\\n  url: /%s\\n  not_numbered: true\\n  expand_sections: true\\n  sections:  %s\" % (title, title, url)\n",
                "                    result = result.replace(markdownUrl, '')\n",
                "                # if there not markdowns and folders contains markdowns, handle them\n",
                "                elif (possibleMarkdowns):\n",
                "                    markdownUrl = possibleMarkdowns[0]\n",
                "                    title = markdownUrl[markdownUrl.rindex(path.sep)+1:].rstrip()\n",
                "                    markdownFilePath = getMarkdownFile(markdownUrl)\n",
                "                    rootmarkdownExists = path.exists(markdownFilePath)\n",
                "                    if (rootmarkdownExists):\n",
                "                        replacedString = \"\\n- title: %s\\n  url: /%s\\n  not_numbered: true\\n  expand_sections: true\\n  sections:  %s\" % (title, title, url)\n",
                "                        result = result.replace(markdownUrl, '')\n",
                "                # there is no markdown and we're adding the first link as is\n",
                "                else:\n",
                "                    markdownUrl = url\n",
                "                    replacedString = \"\\n- title: %s\\n  url: /%s\\n  not_numbered: true\\n\" % (title, title)\n",
                "                result = result.replace(url, replacedString)\n",
                "        # Folders and handling them -> each header is a folder\n",
                "        if (headers):\n",
                "            for header in headers:\n",
                "                title = header[10:].rstrip()\n",
                "                # filters all the urls with /headerName in them\n",
                "                filtered = list(filter(lambda x: (\"%s%s%s\" % (path.sep, title.lower(), path.sep)) in x.lower(), urls))\n",
                "                index = urls.index(filtered[len(filtered)-1])\n",
                "                markdownFilePath = getMarkdownFile(filtered[len(filtered)-1])\n",
                "                markdownExists = path.exists(markdownFilePath)\n",
                "                if (not markdownExists):\n",
                "                    index = urls.index(filtered[0])\n",
                "                folderEndIndex = urls[index].rindex(path.sep)\n",
                "                caseSensitiveFolderName = urls[index][urls[index].rindex(path.sep, 0, folderEndIndex)+1:folderEndIndex]\n",
                "                contentFolders.append(caseSensitiveFolderName)\n",
                "                urlValue = urls[index][urls[index].rindex(path.sep)+1:].rstrip()\n",
                "                replacedString = \"\\n- title: %s\\n  url: /%s/%s\\n%s  expand_sections: true\\n  sections:  \" % (title, caseSensitiveFolderName, urlValue, '  not_numbered: true\\n' if markdownExists else '')\n",
                "                result = result.replace(header, replacedString)\n",
                "                if (markdownExists):\n",
                "                    result = result.replace(urls[index], '')\n",
                "                    del urls[index]\n",
                "            if (urls):\n",
                "                for url in urls:\n",
                "                    title = url[url.rindex(path.sep)+1:].rstrip()\n",
                "                    urlValue = title\n",
                "                    if (len(contentFolders) > 0):\n",
                "                        folders = url[7:].split(path.sep)\n",
                "                        if (folders[len(folders)-2] in contentFolders):\n",
                "                            parentFolder = contentFolders.index(folders[len(folders)-2])\n",
                "                            urlValue = \"%s/%s\" % (contentFolders[parentFolder], title)\n",
                "                            replacedString = \"\\n  - title: %s\\n    url: /%s\" % (title, urlValue)\n",
                "                        else:\n",
                "                            replacedString = \"\\n  - title: %s\\n    url: /%s\" % (title, urlValue) if rootmarkdownExists else \"\\n- title: %s\\n  url: /%s\" % (title, urlValue)\n",
                "                    result = result.replace(url, replacedString)\n",
                "                fwrite = open(tocFilePath, \"w\")\n",
                "                fwrite.write(result)\n",
                "                fwrite.close()\n",
                "        # formattinf any left over urls in the file\n",
                "        elif (urls):\n",
                "            for url in urls:\n",
                "                title = url[url.rindex(path.sep)+1:].rstrip()\n",
                "                urlValue = title\n",
                "                replacedString = \"\\n  - title: %s\\n    url: /%s\" % (title, urlValue) if rootmarkdownExists else \"\\n- title: %s\\n  url: /%s\" % (title, urlValue)\n",
                "                result = result.replace(url, replacedString)\n",
                "        fwrite = open(tocFilePath, \"w\")\n",
                "        fwrite.write(result)\n",
                "        fwrite.close()\n",
                "    else:\n",
                "        raise SystemExit(f'\\n File Name contains unsupported-characters (ex: underscores) by Jupyter Book.\\n')\n",
                "    # Update the Book title in config file\n",
                "    configFilePath = path.join(book_name, \"_config.yml\")\n",
                "    f = open(configFilePath, \"r\")\n",
                "    result = f.read()\n",
                "    f.close()\n",
                "    titleLine = re.search(r'title: [a-zA-Z0-9\\\\.\\s\\-\\/]+$', result, re.MULTILINE).group()\n",
                "    title = 'title: %s' % (path.splitext(path.basename(book_name))[0])\n",
                "    result = result.replace(titleLine, title)\n",
                "    fwrite = open(configFilePath, \"w\")\n",
                "    fwrite.write(result)\n",
                "    fwrite.close()\n",
                "    # cleanup the directories\n",
                "    with os.scandir(book_name) as root_dir:\n",
                "        for path in root_dir:\n",
                "            if path.is_file() and path.name not in ('_config.yml'):\n",
                "                os.remove(path)\n",
                "            if path.is_dir() and path.name not in ('_data', 'content'):\n",
                "                shutil.rmtree(path)\n",
                "except Exception as e:\n",
                "    raise SystemExit(str(e))"
            ],
            "metadata": {
                "azdata_cell_guid": "6124730b-f52e-4103-8dbb-e3a62325fb55",
                "tags": [
                    "hide_input"
                ]
            },
            "outputs": [],
            "execution_count": null
        },
        {
            "cell_type": "markdown",
            "source": [
                "## 4. Open your Book!\r\n",
                "**Run the below cell and click on the link to view your book in Azure Data Studio.**"
            ],
            "metadata": {
                "azdata_cell_guid": "ab100e5c-13f4-484a-9a4a-49bb13cad027"
            }
        },
        {
            "cell_type": "code",
            "source": [
                "import re, os\r\n",
                "from IPython.display import *\r\n",
                "if os.name == 'nt':\r\n",
                "    bookPath = book_name.replace('\\\\', '\\\\\\\\')\r\n",
                "    display(HTML(\"<h2><b><a href=\\\"command:bookTreeView.openBook?&quot;\"+str(bookPath)+\"&quot;\\\"><font size=\\\"3\\\">Click here to open your Book in ADS</font></a></b></h2>\"))\r\n",
                "else:\r\n",
                "    display(HTML(\"<h2><b><a href=\\\"command:bookTreeView.openBook?&quot;\"+str(book_name)+\"&quot;\\\"><font size=\\\"3\\\">Click here to open your Book in ADS</font></a></b></h2>\"))"
            ],
            "metadata": {
                "azdata_cell_guid": "33d8e1cb-1eec-41ed-a368-1aeef9af62d4",
                "tags": []
            },
            "outputs": [],
            "execution_count": null
        },
        {
            "cell_type": "markdown",
            "source": [
                "<span style=\"color:red\">**Note**: On clicking the above link, we create a temporary toc.yml file for your convenience.</span>\r\n",
                "\r\n",
                " Please update that file inside your book (located at: *YourbookPath*/_data/toc.yml) if you want to further customize your book following \r\n",
                " instructions at https://jupyterbook.org/guide/01-5_tour.html#Table-of-Contents.\r\n",
                ""
            ],
            "metadata": {
                "azdata_cell_guid": "d193d588-847b-4725-9591-098d0fb24343"
            }
        },
        {
            "cell_type": "code",
            "source": [
                "display(HTML(\"<h1><b>That's it!</b></h1><br/><p>You are good to view your book in Azure Data Studio by clicking on the above link.</p>\"))"
            ],
            "metadata": {
                "azdata_cell_guid": "bd2fe173-66ce-48b3-8dc3-c4d7560953c8",
                "tags": []
            },
            "outputs": [],
            "execution_count": null
        }
    ]
}