Add support for new version of Jupyter Book (#12086)

* Add support for new jupyter book version

* Add changes to the jupyter notebook to create books

* Create config file

* Add support of new version of jupyter book on ADS

* Fix paths for opening folder with v1 and v2 books

* Add tests for jupyter book v2

* Update tests

* Fix tests

* Fix get parent issue

* Address PR comments

* Fix bookVersion condition in getSections and fix issue on create book notebook

* Fix search

* update python notebook

* Remove commented lines
This commit is contained in:
Barbara Valdez
2020-09-10 11:32:58 -07:00
committed by GitHub
parent ff61eae164
commit 3ff9df8e0b
6 changed files with 959 additions and 582 deletions

View File

@@ -6,7 +6,7 @@
},
"language_info": {
"name": "python",
"version": "3.7.2",
"version": "3.6.6",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
@@ -24,8 +24,13 @@
"cell_type": "markdown",
"source": [
"# Create Jupyter Book\n",
"This is a notebook to create a [Jupyter Book](https://jupyterbook.org/intro.html), which is an organized collections of Jupyter notebooks.\n",
"\n",
"<strong style=\"color: red; opacity: 0.80;\">Note: Jupyter Books in Azure Data Studio only support jupyter-book versions <= 0.6.4 To create a Jupyter Book, we will be uninstalling versions newer than 0.6.4 and replacing with 0.6.4</strong>\n",
" \n",
"\n",
"The best way to use this notebook is by clicking Run all in the toolbar above. This will run all the cells in a step-by-step order so that you can create your Jupyter Book.\n",
"\n",
" \n",
"## 1. Installation\n",
"\n",
"To install the Jupyter Book command-line interface (CLI), use `pip`!"
@@ -39,22 +44,44 @@
"source": [
"import sys\r\n",
"\r\n",
"#install jupyter-book 0.6.4\r\n",
"cmd = f'{sys.executable} -m pip show jupyter-book'\r\n",
"cmdOutput = !{cmd}\r\n",
"if len(cmdOutput) > 1 and '0.6.4' in cmdOutput[1]:\r\n",
" print('Jupyter-book required version is already installed!')\r\n",
"elif len(cmdOutput) > 1:\r\n",
" print('Unsupported version of Jupyter-book installed, Please wait while we uninstall and install the supported version.')\r\n",
" !pip uninstall jupyter-book --yes\r\n",
" !pip install jupyter-book==0.6.4\r\n",
"else:\r\n",
" print('Installing Jupyter-book...')\r\n",
" !pip install jupyter-book==0.6.4"
"def getJupyterBookVersion():\r\n",
" cmd = f'{sys.executable} -m pip show jupyter-book'\r\n",
" cmdOutput = !{cmd}\r\n",
" if len(cmdOutput) <= 1:\r\n",
" !pip install jupyter-book\r\n",
" cmd = f'{sys.executable} -m pip show jupyter-book'\r\n",
" cmdOutput = !{cmd}\r\n",
" for x in cmdOutput:\r\n",
" if 'version' in x.lower():\r\n",
" version_str = tuple(x.split(': ')[1].replace('.', ''))\r\n",
" version = tuple(int(x) for x in version_str)\r\n",
" return version, version_str\r\n",
"\r\n",
"try:\r\n",
" version, version_str = getJupyterBookVersion()\r\n",
" if version:\r\n",
" display_version = '.'.join(version_str)\r\n",
" print(f'Jupyter-book version {display_version} is already installed!')\r\n",
" use_current = input(f'Would you like to create a book using the current installed version [{display_version}] of jupyter-book? [y/n]').lower()\r\n",
"\r\n",
" while use_current not in ['yes', 'y', 'no', 'n']:\r\n",
" use_current = input(f'Would you like to create a book using the current installed version [{display_version}] of jupyter-book? [y/n]').lower()\r\n",
" if use_current in ['no', 'n']:\r\n",
" install_version = input(f'Please enter the version you would like to use (eg. 0.7.4)')\r\n",
" if install_version:\r\n",
" !pip uninstall jupyter-book --yes\r\n",
" version = ''\r\n",
" print('Installing Jupyter-book...')\r\n",
" !pip install jupyter-book==\"$install_version\"\r\n",
" version, _ = getJupyterBookVersion()\r\n",
"except Exception as e:\r\n",
" raise SystemExit(str(e))"
],
"metadata": {
"azdata_cell_guid": "8bd77173-2f63-4bf8-95e8-af2a654fc91e",
"tags": []
"tags": [
"hide_input"
]
},
"outputs": [],
"execution_count": null
@@ -75,7 +102,11 @@
{
"cell_type": "code",
"source": [
"import os, re, shutil\r\n",
"import os, re, subprocess\r\n",
"from sys import platform\r\n",
"import shutil\r\n",
"\r\n",
"os.environ[\"LC_ALL\"]=\"en_US.UTF-8\"\r\n",
"\r\n",
"try:\r\n",
" overwrite = False\r\n",
@@ -91,17 +122,33 @@
"\r\n",
" while (not os.path.exists(content_folder)):\r\n",
" content_folder = input('Cannot find folder ' + content_folder + '. Please provide another path: ')\r\n",
" \r\n",
" if overwrite:\r\n",
" !jupyter-book create \"$book_name\" --content-folder \"$content_folder\" --overwrite\r\n",
" else:\r\n",
" !jupyter-book create \"$book_name\" --content-folder \"$content_folder\"\r\n",
" if version and version < (0,7,0):\r\n",
" if overwrite:\r\n",
" !jupyter-book create \"$book_name\" --content-folder \"$content_folder\" --overwrite\r\n",
" else:\r\n",
" !jupyter-book create \"$book_name\" --content-folder \"$content_folder\"\r\n",
" elif version:\r\n",
" if overwrite:\r\n",
" shutil.rmtree(book_name)\r\n",
" else:\r\n",
" # waiting for shutil to remove the directory\r\n",
" while os.path.exists(book_name):\r\n",
" pass\r\n",
" subprocess.check_call(['mkdir', book_name])\r\n",
" content_folder = os.path.join(content_folder,'')\r\n",
" if platform in ['linux', 'darwin']:\r\n",
" subprocess.check_call(['cp', '-r',content_folder, book_name])\r\n",
" else:\r\n",
" subprocess.check_call(['xcopy', content_folder, book_name])\r\n",
" !jupyter-book toc \"$book_name\"\r\n",
"except Exception as e:\r\n",
" raise SystemExit(str(e))"
],
"metadata": {
"azdata_cell_guid": "d1a363f0-d854-4466-be87-d01d4c7e51ef",
"tags": []
"tags": [
"hide_input"
]
},
"outputs": [],
"execution_count": null
@@ -123,22 +170,9 @@
"cell_type": "code",
"source": [
"# Update toc file, book title and clean up the directores\n",
"\n",
"import shutil\n",
"from os import path\n",
"tocFilePath = path.join(book_name, \"_data\", \"toc.yml\")\n",
"f = open(tocFilePath, \"r\")\n",
"title = ''\n",
"replacedString = ''\n",
"result = f.read()\n",
"f.close()\n",
"contentFolders = []\n",
"\n",
"firstLevelUrls = re.findall(r'^(?:\\s+$[\\r\\n]+)+(\\- url: [a-zA-Z0-9\\\\.\\s\\-\\/]+$[\\r\\n]+)', result, re.MULTILINE)\n",
"urls = re.findall(r'- url: [a-zA-Z0-9\\\\.\\s\\-\\/]+$', result, re.MULTILINE)\n",
"headers = re.findall(r'- header: [a-zA-Z0-9\\\\.\\s-]+$', result, re.MULTILINE)\n",
"# all the markdown urls are placed at the end of the list. \n",
"possibleMarkdowns = re.findall(r'(- url: [a-zA-Z0-9\\\\.\\s\\-\\/]+$[\\r\\n]+)[\\r\\n]', result, re.MULTILINE)\n",
"\n",
"import yaml\n",
"def getMarkdownFile(url):\n",
" # url are usually defined in toc as: \"- url: <Path\\to\\notebookFile>\"\n",
" # substring from 7th postion excluding the \"- url: \" from the path\n",
@@ -151,106 +185,164 @@
" markdownFilePath = url[7:].rstrip() + '.md'\n",
" return markdownFilePath\n",
"\n",
"try:\n",
" if (firstLevelUrls or headers or urls):\n",
" if (firstLevelUrls and len(firstLevelUrls) == 1):\n",
" for url in firstLevelUrls:\n",
" # check first link is the markdown\n",
" title = url[url.rindex(path.sep)+1:].rstrip()\n",
" markdownFilePath = getMarkdownFile(url)\n",
" rootmarkdownExists = path.exists(markdownFilePath)\n",
" markdownUrl = urls[len(urls) -1]\n",
" if (rootmarkdownExists):\n",
" markdownUrl = url\n",
" replacedString = \"\\n- title: %s\\n url: /%s\\n not_numbered: true\\n\" % (title, title)\n",
" # else check if the last link in the url list is markdown \n",
" elif (not headers and path.exists(getMarkdownFile(markdownUrl))):\n",
" rootmarkdownExists = True\n",
" title = markdownUrl[markdownUrl.rindex(path.sep)+1:].rstrip()\n",
" replacedString = \"\\n- title: %s\\n url: /%s\\n not_numbered: true\\n expand_sections: true\\n sections: %s\" % (title, title, url)\n",
" result = result.replace(markdownUrl, '')\n",
" # if there not markdowns and folders contains markdowns, handle them\n",
" elif (possibleMarkdowns):\n",
" markdownUrl = possibleMarkdowns[0]\n",
" title = markdownUrl[markdownUrl.rindex(path.sep)+1:].rstrip()\n",
" markdownFilePath = getMarkdownFile(markdownUrl)\n",
"def getAllSections(visited, data, current):\n",
" if current not in visited:\n",
" visited.append(current)\n",
" for x in range(len(current)):\n",
" if 'sections' in current[x].keys():\n",
" tmp = {}\n",
" tmp['title'] = path.dirname(current[x]['file']).title()\n",
" tmp['file'] = current[x]['file']\n",
" tmp['numbered'] = False\n",
" tmp['expand_sections'] = True\n",
" tmp['sections'] = current[x]['sections']\n",
" current[x] = tmp\n",
" getAllSections(visited, data, current[x]['sections'])\n",
" else:\n",
" current[x]['title'] = path.basename(current[x]['file']).title()\n",
"\n",
"if version < (0,7,0):\n",
" tocFilePath = path.join(book_name, \"_data\", \"toc.yml\")\n",
" f = open(tocFilePath, \"r\")\n",
" title = ''\n",
" replacedString = ''\n",
" result = f.read()\n",
" f.close()\n",
" contentFolders = []\n",
"\n",
" firstLevelUrls = re.findall(r'^(?:\\s+$[\\r\\n]+)+(\\- url: [a-zA-Z0-9\\\\.\\s\\-\\/]+$[\\r\\n]+)', result, re.MULTILINE)\n",
" urls = re.findall(r'- url: [a-zA-Z0-9\\\\.\\s\\-\\/]+$', result, re.MULTILINE)\n",
" headers = re.findall(r'- header: [a-zA-Z0-9\\\\.\\s-]+$', result, re.MULTILINE)\n",
" # all the markdown urls are placed at the end of the list. \n",
" possibleMarkdowns = re.findall(r'(- url: [a-zA-Z0-9\\\\.\\s\\-\\/]+$[\\r\\n]+)[\\r\\n]', result, re.MULTILINE)\n",
"\n",
" try:\n",
" if (firstLevelUrls or headers or urls):\n",
" if (firstLevelUrls and len(firstLevelUrls) == 1):\n",
" for url in firstLevelUrls:\n",
" # check first link is the markdown\n",
" title = url[url.rindex(path.sep)+1:].rstrip()\n",
" markdownFilePath = getMarkdownFile(url)\n",
" rootmarkdownExists = path.exists(markdownFilePath)\n",
" markdownUrl = urls[len(urls) -1]\n",
" if (rootmarkdownExists):\n",
" markdownUrl = url\n",
" replacedString = \"\\n- title: %s\\n url: /%s\\n not_numbered: true\\n\" % (title, title)\n",
" # else check if the last link in the url list is markdown \n",
" elif (not headers and path.exists(getMarkdownFile(markdownUrl))):\n",
" rootmarkdownExists = True\n",
" title = markdownUrl[markdownUrl.rindex(path.sep)+1:].rstrip()\n",
" replacedString = \"\\n- title: %s\\n url: /%s\\n not_numbered: true\\n expand_sections: true\\n sections: %s\" % (title, title, url)\n",
" result = result.replace(markdownUrl, '')\n",
" # there is no markdown and we're adding the first link as is\n",
" else:\n",
" markdownUrl = url\n",
" replacedString = \"\\n- title: %s\\n url: /%s\\n not_numbered: true\\n\" % (title, title)\n",
" result = result.replace(url, replacedString)\n",
" # Folders and handling them -> each header is a folder\n",
" if (headers):\n",
" for header in headers:\n",
" title = header[10:].rstrip()\n",
" # filters all the urls with /headerName in them\n",
" filtered = list(filter(lambda x: (\"%s%s%s\" % (path.sep, title.lower(), path.sep)) in x.lower(), urls))\n",
" index = urls.index(filtered[len(filtered)-1])\n",
" markdownFilePath = getMarkdownFile(filtered[len(filtered)-1])\n",
" markdownExists = path.exists(markdownFilePath)\n",
" if (not markdownExists):\n",
" index = urls.index(filtered[0])\n",
" folderEndIndex = urls[index].rindex(path.sep)\n",
" caseSensitiveFolderName = urls[index][urls[index].rindex(path.sep, 0, folderEndIndex)+1:folderEndIndex]\n",
" contentFolders.append(caseSensitiveFolderName)\n",
" urlValue = urls[index][urls[index].rindex(path.sep)+1:].rstrip()\n",
" replacedString = \"\\n- title: %s\\n url: /%s/%s\\n%s expand_sections: true\\n sections: \" % (title, caseSensitiveFolderName, urlValue, ' not_numbered: true\\n' if markdownExists else '')\n",
" result = result.replace(header, replacedString)\n",
" if (markdownExists):\n",
" result = result.replace(urls[index], '')\n",
" del urls[index]\n",
" if (urls):\n",
" # if there not markdowns and folders contains markdowns, handle them\n",
" elif (possibleMarkdowns):\n",
" markdownUrl = possibleMarkdowns[0]\n",
" title = markdownUrl[markdownUrl.rindex(path.sep)+1:].rstrip()\n",
" markdownFilePath = getMarkdownFile(markdownUrl)\n",
" rootmarkdownExists = path.exists(markdownFilePath)\n",
" if (rootmarkdownExists):\n",
" replacedString = \"\\n- title: %s\\n url: /%s\\n not_numbered: true\\n expand_sections: true\\n sections: %s\" % (title, title, url)\n",
" result = result.replace(markdownUrl, '')\n",
" # there is no markdown and we're adding the first link as is\n",
" else:\n",
" markdownUrl = url\n",
" replacedString = \"\\n- title: %s\\n url: /%s\\n not_numbered: true\\n\" % (title, title)\n",
" result = result.replace(url, replacedString)\n",
" # Folders and handling them -> each header is a folder\n",
" if (headers):\n",
" for header in headers:\n",
" title = header[10:].rstrip()\n",
" # filters all the urls with /headerName in them\n",
" filtered = list(filter(lambda x: (\"%s%s%s\" % (path.sep, title.lower(), path.sep)) in x.lower(), urls))\n",
" index = urls.index(filtered[len(filtered)-1])\n",
" markdownFilePath = getMarkdownFile(filtered[len(filtered)-1])\n",
" markdownExists = path.exists(markdownFilePath)\n",
" if (not markdownExists):\n",
" index = urls.index(filtered[0])\n",
" folderEndIndex = urls[index].rindex(path.sep)\n",
" caseSensitiveFolderName = urls[index][urls[index].rindex(path.sep, 0, folderEndIndex)+1:folderEndIndex]\n",
" contentFolders.append(caseSensitiveFolderName)\n",
" urlValue = urls[index][urls[index].rindex(path.sep)+1:].rstrip()\n",
" replacedString = \"\\n- title: %s\\n url: /%s/%s\\n%s expand_sections: true\\n sections: \" % (title, caseSensitiveFolderName, urlValue, ' not_numbered: true\\n' if markdownExists else '')\n",
" result = result.replace(header, replacedString)\n",
" if (markdownExists):\n",
" result = result.replace(urls[index], '')\n",
" del urls[index]\n",
" if (urls):\n",
" for url in urls:\n",
" title = url[url.rindex(path.sep)+1:].rstrip()\n",
" urlValue = title\n",
" if (len(contentFolders) > 0):\n",
" folders = url[7:].split(path.sep)\n",
" if (folders[len(folders)-2] in contentFolders):\n",
" parentFolder = contentFolders.index(folders[len(folders)-2])\n",
" urlValue = \"%s/%s\" % (contentFolders[parentFolder], title)\n",
" replacedString = \"\\n - title: %s\\n url: /%s\" % (title, urlValue)\n",
" else:\n",
" replacedString = \"\\n - title: %s\\n url: /%s\" % (title, urlValue) if rootmarkdownExists else \"\\n- title: %s\\n url: /%s\" % (title, urlValue)\n",
" result = result.replace(url, replacedString)\n",
" fwrite = open(tocFilePath, \"w\")\n",
" fwrite.write(result)\n",
" fwrite.close()\n",
" # formattinf any left over urls in the file\n",
" elif (urls):\n",
" for url in urls:\n",
" title = url[url.rindex(path.sep)+1:].rstrip()\n",
" urlValue = title\n",
" if (len(contentFolders) > 0):\n",
" folders = url[7:].split(path.sep)\n",
" if (folders[len(folders)-2] in contentFolders):\n",
" parentFolder = contentFolders.index(folders[len(folders)-2])\n",
" urlValue = \"%s/%s\" % (contentFolders[parentFolder], title)\n",
" replacedString = \"\\n - title: %s\\n url: /%s\" % (title, urlValue)\n",
" else:\n",
" replacedString = \"\\n - title: %s\\n url: /%s\" % (title, urlValue) if rootmarkdownExists else \"\\n- title: %s\\n url: /%s\" % (title, urlValue)\n",
" replacedString = \"\\n - title: %s\\n url: /%s\" % (title, urlValue) if rootmarkdownExists else \"\\n- title: %s\\n url: /%s\" % (title, urlValue)\n",
" result = result.replace(url, replacedString)\n",
" fwrite = open(tocFilePath, \"w\")\n",
" fwrite.write(result)\n",
" fwrite.close()\n",
" # formattinf any left over urls in the file\n",
" elif (urls):\n",
" for url in urls:\n",
" title = url[url.rindex(path.sep)+1:].rstrip()\n",
" urlValue = title\n",
" replacedString = \"\\n - title: %s\\n url: /%s\" % (title, urlValue) if rootmarkdownExists else \"\\n- title: %s\\n url: /%s\" % (title, urlValue)\n",
" result = result.replace(url, replacedString)\n",
" fwrite = open(tocFilePath, \"w\")\n",
" fwrite = open(tocFilePath, \"w\")\n",
" fwrite.write(result)\n",
" fwrite.close()\n",
" else:\n",
" raise SystemExit(f'\\n File Name contains unsupported-characters (ex: underscores) by Jupyter Book.\\n')\n",
" # Update the Book title in config file\n",
" configFilePath = path.join(book_name, \"_config.yml\")\n",
" f = open(configFilePath, \"r\")\n",
" result = f.read()\n",
" f.close()\n",
" titleLine = re.search(r'title: [a-zA-Z0-9\\\\.\\s\\-\\/]+$', result, re.MULTILINE).group()\n",
" title = 'title: %s' % (path.splitext(path.basename(book_name))[0])\n",
" result = result.replace(titleLine, title)\n",
" fwrite = open(configFilePath, \"w\")\n",
" fwrite.write(result)\n",
" fwrite.close()\n",
" else:\n",
" raise SystemExit(f'\\n File Name contains unsupported-characters (ex: underscores) by Jupyter Book.\\n')\n",
" # Update the Book title in config file\n",
" # cleanup the directories\n",
" with os.scandir(book_name) as root_dir:\n",
" for path in root_dir:\n",
" if path.is_file() and path.name not in ('_config.yml'):\n",
" os.remove(path)\n",
" if path.is_dir() and path.name not in ('_data', 'content'):\n",
" shutil.rmtree(path)\n",
" except Exception as e:\n",
" raise SystemExit(str(e))\n",
"else:\n",
" tocFilePath = path.join(book_name, \"_toc.yml\")\n",
" configFilePath = path.join(book_name, \"_config.yml\")\n",
" f = open(configFilePath, \"r\")\n",
" result = f.read()\n",
" f.close()\n",
" titleLine = re.search(r'title: [a-zA-Z0-9\\\\.\\s\\-\\/]+$', result, re.MULTILINE).group()\n",
" title = 'title: %s' % (path.splitext(path.basename(book_name))[0])\n",
" result = result.replace(titleLine, title)\n",
" fwrite = open(configFilePath, \"w\")\n",
" fwrite.write(result)\n",
" fwrite.close()\n",
" # cleanup the directories\n",
" with os.scandir(book_name) as root_dir:\n",
" for path in root_dir:\n",
" if path.is_file() and path.name not in ('_config.yml'):\n",
" os.remove(path)\n",
" if path.is_dir() and path.name not in ('_data', 'content'):\n",
" shutil.rmtree(path)\n",
"except Exception as e:\n",
" raise SystemExit(str(e))"
" visited = []\n",
" # modify generated toc file\n",
" with open(tocFilePath, 'r') as stream:\n",
" data = yaml.safe_load(stream)\n",
" if not isinstance(data, list):\n",
" new_data = []\n",
" for k in data:\n",
" if k == 'file':\n",
" new_data.append({k : data[k]})\n",
" elif k == 'sections':\n",
" new_data[-1].update({k : data[k]})\n",
" data = new_data\n",
" for k in data:\n",
" if 'sections' in k:\n",
" getAllSections([], data, k['sections'])\n",
"\n",
" with open(tocFilePath, 'w') as outfile:\n",
" yaml.dump(data, outfile, sort_keys=False)\n",
" # create config file\n",
" config = {}\n",
" config['title'] = path.basename(book_name).title()\n",
" with open(configFilePath, 'w') as output:\n",
" yaml.dump(config, output, default_flow_style=False)\n",
""
],
"metadata": {
"azdata_cell_guid": "6124730b-f52e-4103-8dbb-e3a62325fb55",
@@ -284,7 +376,9 @@
],
"metadata": {
"azdata_cell_guid": "33d8e1cb-1eec-41ed-a368-1aeef9af62d4",
"tags": []
"tags": [
"hide_input"
]
},
"outputs": [],
"execution_count": null
@@ -294,9 +388,14 @@
"source": [
"<span style=\"color:red\">**Note**: On clicking the above link, we create a temporary toc.yml file for your convenience.</span>\r\n",
"\r\n",
"**For versions < 0.7.0**\r\n",
"\r\n",
" Please update that file inside your book (located at: *YourbookPath*/_data/toc.yml) if you want to further customize your book following \r\n",
" instructions at https://jupyterbook.org/guide/01-5_tour.html#Table-of-Contents.\r\n",
""
" instructions at https://legacy.jupyterbook.org/guide/01-5_tour.html#Table-of-Contents.\r\n",
"\r\n",
"**For the newer versions**\r\n",
"\r\n",
"Please update the _toc.yml file inside your book (located at: YourbookPath/_toc.yml). And refer to the documentation at https://jupyterbook.org/customize/toc.html for further customization."
],
"metadata": {
"azdata_cell_guid": "d193d588-847b-4725-9591-098d0fb24343"