From 5c6f334113db9e1dedd37072b2a7a1a68366b7ef Mon Sep 17 00:00:00 2001 From: Brian Bergeron Date: Mon, 8 Jun 2020 12:03:51 -0700 Subject: [PATCH] Azure Arc Postgres - Support + Troubleshooting (#10686) --- extensions/arc/images/support.svg | 11 + extensions/arc/images/wrench.svg | 10 + .../arc/notebooks/arcDataServices/_config.yml | 2 + .../notebooks/arcDataServices/_data/toc.yml | 12 + .../content/postgres/readme.md | 7 + .../arcDataServices/content/postgres/toc.yml | 7 + .../tsg100-troubleshoot-postgres.ipynb | 593 ++++++++++++++++++ .../arcDataServices/content/readme.md | 5 + .../arc/notebooks/arcDataServices/readme.md | 5 + extensions/arc/src/constants.ts | 10 + extensions/arc/src/extension.ts | 2 +- extensions/arc/src/localizedConstants.ts | 74 ++- .../dashboards/postgres/postgresDashboard.ts | 14 +- .../postgresDiagnoseAndSolveProblemsPage.ts | 64 ++ .../postgres/postgresOverviewPage.ts | 2 +- .../postgres/postgresSupportRequestPage.ts | 69 ++ 16 files changed, 850 insertions(+), 37 deletions(-) create mode 100644 extensions/arc/images/support.svg create mode 100644 extensions/arc/images/wrench.svg create mode 100644 extensions/arc/notebooks/arcDataServices/_config.yml create mode 100644 extensions/arc/notebooks/arcDataServices/_data/toc.yml create mode 100644 extensions/arc/notebooks/arcDataServices/content/postgres/readme.md create mode 100644 extensions/arc/notebooks/arcDataServices/content/postgres/toc.yml create mode 100644 extensions/arc/notebooks/arcDataServices/content/postgres/tsg100-troubleshoot-postgres.ipynb create mode 100644 extensions/arc/notebooks/arcDataServices/content/readme.md create mode 100644 extensions/arc/notebooks/arcDataServices/readme.md create mode 100644 extensions/arc/src/ui/dashboards/postgres/postgresDiagnoseAndSolveProblemsPage.ts create mode 100644 extensions/arc/src/ui/dashboards/postgres/postgresSupportRequestPage.ts diff --git a/extensions/arc/images/support.svg b/extensions/arc/images/support.svg new file mode 100644 index 0000000000..177beaa878 --- /dev/null +++ b/extensions/arc/images/support.svg @@ -0,0 +1,11 @@ + + + + + + + + + + + diff --git a/extensions/arc/images/wrench.svg b/extensions/arc/images/wrench.svg new file mode 100644 index 0000000000..68df6597c0 --- /dev/null +++ b/extensions/arc/images/wrench.svg @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/extensions/arc/notebooks/arcDataServices/_config.yml b/extensions/arc/notebooks/arcDataServices/_config.yml new file mode 100644 index 0000000000..be950ea687 --- /dev/null +++ b/extensions/arc/notebooks/arcDataServices/_config.yml @@ -0,0 +1,2 @@ +title: Azure Arc Data Services +description: A collection of notebooks to support Azure Arc Data Services. \ No newline at end of file diff --git a/extensions/arc/notebooks/arcDataServices/_data/toc.yml b/extensions/arc/notebooks/arcDataServices/_data/toc.yml new file mode 100644 index 0000000000..08aa620181 --- /dev/null +++ b/extensions/arc/notebooks/arcDataServices/_data/toc.yml @@ -0,0 +1,12 @@ +- title: Welcome + url: /readme + not_numbered: true +- title: Search + search: true +- title: Postgres + url: /postgres/readme + not_numbered: true + expand_sections: true + sections: + - title: TSG100 - The Azure Arc Postgres troubleshooter + url: postgres/tsg100-troubleshoot-postgres diff --git a/extensions/arc/notebooks/arcDataServices/content/postgres/readme.md b/extensions/arc/notebooks/arcDataServices/content/postgres/readme.md new file mode 100644 index 0000000000..a3fe0eee38 --- /dev/null +++ b/extensions/arc/notebooks/arcDataServices/content/postgres/readme.md @@ -0,0 +1,7 @@ +# Azure Arc Postgres notebooks + +- This chapter contains notebooks for troubleshooting Postgres on Azure Arc + +## Notebooks in this Chapter +- [TSG100 - The Azure Arc Postgres troubleshooter](tsg100-troubleshoot-postgres.ipynb) + diff --git a/extensions/arc/notebooks/arcDataServices/content/postgres/toc.yml b/extensions/arc/notebooks/arcDataServices/content/postgres/toc.yml new file mode 100644 index 0000000000..cf63274703 --- /dev/null +++ b/extensions/arc/notebooks/arcDataServices/content/postgres/toc.yml @@ -0,0 +1,7 @@ +- title: Postgres + url: /postgres/readme + not_numbered: true + expand_sections: true + sections: + - title: TSG100 - The Azure Arc Postgres troubleshooter + url: postgres/tsg100-troubleshoot-postgres diff --git a/extensions/arc/notebooks/arcDataServices/content/postgres/tsg100-troubleshoot-postgres.ipynb b/extensions/arc/notebooks/arcDataServices/content/postgres/tsg100-troubleshoot-postgres.ipynb new file mode 100644 index 0000000000..a0ac270ea8 --- /dev/null +++ b/extensions/arc/notebooks/arcDataServices/content/postgres/tsg100-troubleshoot-postgres.ipynb @@ -0,0 +1,593 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "TSG100 - The Azure Arc Postgres troubleshooter\n", + "==============================================\n", + "\n", + "Description\n", + "-----------\n", + "\n", + "Follow these steps to troubleshoot an Azure Arc Postgres Server.\n", + "\n", + "Steps\n", + "-----\n", + "\n", + "### Parameters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "# The server to troubleshoot. If not provided\n", + "# the user will be prompted to select a server.\n", + "namespace = os.environ.get('POSTGRES_SERVER_NAMESPACE')\n", + "name = os.environ.get('POSTGRES_SERVER_NAME')\n", + "\n", + "tail_lines = 50" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Common functions\n", + "\n", + "Define helper functions used in this notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide_input" + ] + }, + "outputs": [], + "source": [ + "# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n", + "import sys\n", + "import os\n", + "import re\n", + "import json\n", + "import platform\n", + "import shlex\n", + "import shutil\n", + "import datetime\n", + "\n", + "from subprocess import Popen, PIPE\n", + "from IPython.display import Markdown\n", + "\n", + "retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n", + "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n", + "install_hint = {} # The SOP to help install the executable if it cannot be found\n", + "\n", + "first_run = True\n", + "rules = None\n", + "debug_logging = False\n", + "\n", + "def run(cmd, return_output=False, no_output=False, retry_count=0):\n", + " \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n", + "\n", + " NOTES:\n", + "\n", + " 1. Commands that need this kind of ' quoting on Windows e.g.:\n", + "\n", + " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n", + "\n", + " Need to actually pass in as '\"':\n", + "\n", + " kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n", + "\n", + " The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n", + " \n", + " `iter(p.stdout.readline, b'')`\n", + "\n", + " The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n", + " \"\"\"\n", + " MAX_RETRIES = 5\n", + " output = \"\"\n", + " retry = False\n", + "\n", + " global first_run\n", + " global rules\n", + "\n", + " if first_run:\n", + " first_run = False\n", + " rules = load_rules()\n", + "\n", + " # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n", + " #\n", + " # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n", + " #\n", + " if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n", + " cmd = cmd.replace(\"\\n\", \" \")\n", + "\n", + " # shlex.split is required on bash and for Windows paths with spaces\n", + " #\n", + " cmd_actual = shlex.split(cmd)\n", + "\n", + " # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n", + " #\n", + " user_provided_exe_name = cmd_actual[0].lower()\n", + "\n", + " # When running python, use the python in the ADS sandbox ({sys.executable})\n", + " #\n", + " if cmd.startswith(\"python \"):\n", + " cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n", + "\n", + " # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n", + " # with:\n", + " #\n", + " # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n", + " #\n", + " # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n", + " #\n", + " if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n", + " os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n", + "\n", + " # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n", + " #\n", + " if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n", + " cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n", + "\n", + " # To aid supportabilty, determine which binary file will actually be executed on the machine\n", + " #\n", + " which_binary = None\n", + "\n", + " # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n", + " # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n", + " # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n", + " # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n", + " # look for the 2nd installation of CURL in the path)\n", + " if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n", + " path = os.getenv('PATH')\n", + " for p in path.split(os.path.pathsep):\n", + " p = os.path.join(p, \"curl.exe\")\n", + " if os.path.exists(p) and os.access(p, os.X_OK):\n", + " if p.lower().find(\"system32\") == -1:\n", + " cmd_actual[0] = p\n", + " which_binary = p\n", + " break\n", + "\n", + " # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n", + " # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n", + " #\n", + " # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n", + " #\n", + " if which_binary == None:\n", + " which_binary = shutil.which(cmd_actual[0])\n", + "\n", + " if which_binary == None:\n", + " if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n", + " display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n", + "\n", + " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n", + " else: \n", + " cmd_actual[0] = which_binary\n", + "\n", + " start_time = datetime.datetime.now().replace(microsecond=0)\n", + "\n", + " print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n", + " print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n", + " print(f\" cwd: {os.getcwd()}\")\n", + "\n", + " # Command-line tools such as CURL and AZDATA HDFS commands output\n", + " # scrolling progress bars, which causes Jupyter to hang forever, to\n", + " # workaround this, use no_output=True\n", + " #\n", + "\n", + " # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n", + " #\n", + " wait = True \n", + "\n", + " try:\n", + " if no_output:\n", + " p = Popen(cmd_actual)\n", + " else:\n", + " p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n", + " with p.stdout:\n", + " for line in iter(p.stdout.readline, b''):\n", + " line = line.decode()\n", + " if return_output:\n", + " output = output + line\n", + " else:\n", + " if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n", + " regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n", + " match = regex.match(line)\n", + " if match:\n", + " if match.group(1).find(\"HTML\") != -1:\n", + " display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n", + " else:\n", + " display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n", + "\n", + " wait = False\n", + " break # otherwise infinite hang, have not worked out why yet.\n", + " else:\n", + " print(line, end='')\n", + " if rules is not None:\n", + " apply_expert_rules(line)\n", + "\n", + " if wait:\n", + " p.wait()\n", + " except FileNotFoundError as e:\n", + " if install_hint is not None:\n", + " display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n", + "\n", + " raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n", + "\n", + " exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n", + "\n", + " if not no_output:\n", + " for line in iter(p.stderr.readline, b''):\n", + " try:\n", + " line_decoded = line.decode()\n", + " except UnicodeDecodeError:\n", + " # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n", + " #\n", + " # \\xa0\n", + " #\n", + " # For example see this in the response from `az group create`:\n", + " #\n", + " # ERROR: Get Token request returned http error: 400 and server \n", + " # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n", + " # The refresh token has expired due to inactivity.\\xa0The token was \n", + " # issued on 2018-10-25T23:35:11.9832872Z\n", + " #\n", + " # which generates the exception:\n", + " #\n", + " # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n", + " #\n", + " print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n", + " print(line)\n", + " line_decoded = \"\"\n", + " pass\n", + " else:\n", + "\n", + " # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n", + " # print this empty \"ERR:\" as it confuses.\n", + " #\n", + " if line_decoded == \"\":\n", + " continue\n", + " \n", + " print(f\"STDERR: {line_decoded}\", end='')\n", + "\n", + " if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n", + " exit_code_workaround = 1\n", + "\n", + " # inject HINTs to next TSG/SOP based on output in stderr\n", + " #\n", + " if user_provided_exe_name in error_hints:\n", + " for error_hint in error_hints[user_provided_exe_name]:\n", + " if line_decoded.find(error_hint[0]) != -1:\n", + " display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n", + "\n", + " # apply expert rules (to run follow-on notebooks), based on output\n", + " #\n", + " if rules is not None:\n", + " apply_expert_rules(line_decoded)\n", + "\n", + " # Verify if a transient error, if so automatically retry (recursive)\n", + " #\n", + " if user_provided_exe_name in retry_hints:\n", + " for retry_hint in retry_hints[user_provided_exe_name]:\n", + " if line_decoded.find(retry_hint) != -1:\n", + " if retry_count < MAX_RETRIES:\n", + " print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n", + " retry_count = retry_count + 1\n", + " output = run(cmd, return_output=return_output, retry_count=retry_count)\n", + "\n", + " if return_output:\n", + " return output\n", + " else:\n", + " return\n", + "\n", + " elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n", + "\n", + " # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n", + " # don't wait here, if success known above\n", + " #\n", + " if wait: \n", + " if p.returncode != 0:\n", + " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n", + " else:\n", + " if exit_code_workaround !=0 :\n", + " raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n", + "\n", + " print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n", + "\n", + " if return_output:\n", + " return output\n", + "\n", + "def load_json(filename):\n", + " \"\"\"Load a json file from disk and return the contents\"\"\"\n", + "\n", + " with open(filename, encoding=\"utf8\") as json_file:\n", + " return json.load(json_file)\n", + "\n", + "def load_rules():\n", + " \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n", + "\n", + " # Load this notebook as json to get access to the expert rules in the notebook metadata.\n", + " #\n", + " try:\n", + " j = load_json(\"tsg100-troubleshoot-postgres.ipynb\")\n", + " except:\n", + " pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n", + " else:\n", + " if \"metadata\" in j and \\\n", + " \"azdata\" in j[\"metadata\"] and \\\n", + " \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n", + " \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n", + "\n", + " rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n", + "\n", + " rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n", + "\n", + " # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n", + "\n", + " return rules\n", + "\n", + "def apply_expert_rules(line):\n", + " \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n", + " inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n", + "\n", + " global rules\n", + "\n", + " for rule in rules:\n", + " notebook = rule[1]\n", + " cell_type = rule[2]\n", + " output_type = rule[3] # i.e. stream or error\n", + " output_type_name = rule[4] # i.e. ename or name \n", + " output_type_value = rule[5] # i.e. SystemExit or stdout\n", + " details_name = rule[6] # i.e. evalue or text \n", + " expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n", + "\n", + " if debug_logging:\n", + " print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n", + "\n", + " if re.match(expression, line, re.DOTALL):\n", + "\n", + " if debug_logging:\n", + " print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n", + "\n", + " match_found = True\n", + "\n", + " display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n", + "\n", + "\n", + "\n", + "print('Common functions defined successfully.')\n", + "\n", + "# Hints for binary (transient fault) retry, (known) error and install guide\n", + "#\n", + "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n", + "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n", + "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Get Postgres server" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide_input" + ] + }, + "outputs": [], + "source": [ + "# Sets the 'server' variable to the spec of the Postgres server\n", + "\n", + "import math\n", + "\n", + "# If a server was provided, get it\n", + "if namespace and name:\n", + " server = json.loads(run(f'kubectl get dbs -n {namespace} {name} -o json', return_output=True))\n", + "else:\n", + " # Otherwise prompt the user to select a server\n", + " servers = json.loads(run(f'kubectl get dbs --all-namespaces -o json', return_output=True))['items']\n", + " if not servers:\n", + " raise Exception('No Postgres servers found')\n", + "\n", + " full_name = lambda s: f'{s[\"metadata\"][\"namespace\"]}.{s[\"metadata\"][\"name\"]}'\n", + " servers.sort(key=lambda s: full_name(s))\n", + "\n", + " pad = math.floor(math.log10(len(servers)) + 1) + 3\n", + " for i, s in enumerate(servers):\n", + " print(f'{f\"[{i+1}]\":<{pad}}{full_name(s)}')\n", + "\n", + " while True:\n", + " try:\n", + " i = int(input('Enter the index of a server to troubleshoot: '))\n", + " except ValueError:\n", + " continue\n", + "\n", + " if i >= 1 and i <= len(servers):\n", + " server = servers[i-1]\n", + " namespace = server['metadata']['namespace']\n", + " name = server['metadata']['name']\n", + " break\n", + "\n", + "display(Markdown(f'#### Got server {namespace}.{name}'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Summarize all resources" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "uid = server['metadata']['uid']\n", + "\n", + "display(Markdown(f'#### Server summary'))\n", + "run(f'kubectl get dbs -n {namespace} {name}')\n", + "\n", + "display(Markdown(f'#### Resource summary'))\n", + "run(f'kubectl get pods,pvc,svc,ep -n {namespace} -l dusky.microsoft.com/serviceId={uid}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Troubleshoot the server" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "display(Markdown(f'#### Troubleshooting server {namespace}.{name}'))\n", + "run(f'kubectl describe dbs -n {namespace} {name}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Troubleshoot the pods" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pods = json.loads(run(f'kubectl get pods -n {namespace} -l dusky.microsoft.com/serviceId={uid} -o json', return_output=True))['items']\n", + "\n", + "# Summarize and describe each pod\n", + "for pod in pods:\n", + " pod_name = pod['metadata']['name']\n", + " pod_phase = pod['status']['phase']\n", + " con_statuses = pod['status'].get('containerStatuses', [])\n", + " num_con_ready = sum(1 for c in con_statuses if c['ready'])\n", + " num_cons = len(pod['spec']['containers'])\n", + " num_restarts = sum(c['restartCount'] for c in con_statuses)\n", + "\n", + " display(Markdown(f'#### Troubleshooting pod {namespace}.{pod_name}\\n'\n", + " f'#### {pod_phase} with {num_con_ready}/{num_cons} '\n", + " f'containers ready and {num_restarts} restarts'))\n", + "\n", + " run(f'kubectl describe pod -n {namespace} {pod_name}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Troubleshoot the containers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Summarize and get logs from each container\n", + "for pod in pods:\n", + " pod_name = pod['metadata']['name']\n", + " cons = pod['spec']['containers']\n", + " con_statuses = pod['status'].get('containerStatuses', [])\n", + " display(Markdown(f'#### Troubleshooting {len(cons)} container{\"\" if len(cons) < 2 else \"s\"} '\n", + " f'containers for pod {namespace}.{pod_name}'))\n", + "\n", + " for i, con in enumerate(cons):\n", + " con_name = con['name']\n", + " con_status = next((c for c in con_statuses if c['name'] == con_name), {})\n", + " con_started = con_status.get('started', False)\n", + " con_ready = con_status.get('ready', False)\n", + " con_restarts = con_status.get('restartCount', 0)\n", + "\n", + " display(Markdown(f'#### Troubleshooting container {namespace}.{pod_name}/{con_name} ({i+1}/{len(cons)})\\n'\n", + " f'#### {\"S\" if con_started else \"Not s\"}tarted and '\n", + " f'{\"\" if con_ready else \"not \"}ready with {con_restarts} restarts'))\n", + "\n", + " run(f'kubectl logs -n {namespace} {pod_name} {con_name} --tail {tail_lines}')\n", + "\n", + " # Get logs from the previous terminated container if one exists\n", + " if con_restarts > 0:\n", + " display(Markdown(f'#### Logs from previous terminated container {namespace}.{pod_name}/{con_name}'))\n", + " run(f'kubectl logs -n {namespace} {pod_name} {con_name} --tail {tail_lines} --previous')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Troubleshoot the PersistentVolumeClaims" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "display(Markdown(f'#### Troubleshooting PersistentVolumeClaims'))\n", + "run(f'kubectl describe pvc -n {namespace} -l dusky.microsoft.com/serviceId={uid}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print('Notebook execution complete.')" + ] + } + ], + "nbformat": 4, + "nbformat_minor": 5, + "metadata": { + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "azdata": { + "test": { + "ci": false, + "gci": false + }, + "contract": { + "requires": { + "kubectl": { + "installed": true + } + } + }, + "side_effects": false + } + } +} + diff --git a/extensions/arc/notebooks/arcDataServices/content/readme.md b/extensions/arc/notebooks/arcDataServices/content/readme.md new file mode 100644 index 0000000000..9b119fab9e --- /dev/null +++ b/extensions/arc/notebooks/arcDataServices/content/readme.md @@ -0,0 +1,5 @@ +# Azure Arc Data Services Jupyter Book + +## Chapters + +1. [Postgres](postgres/readme.md) - notebooks for troubleshooting Postgres on Azure Arc. diff --git a/extensions/arc/notebooks/arcDataServices/readme.md b/extensions/arc/notebooks/arcDataServices/readme.md new file mode 100644 index 0000000000..9b119fab9e --- /dev/null +++ b/extensions/arc/notebooks/arcDataServices/readme.md @@ -0,0 +1,5 @@ +# Azure Arc Data Services Jupyter Book + +## Chapters + +1. [Postgres](postgres/readme.md) - notebooks for troubleshooting Postgres on Azure Arc. diff --git a/extensions/arc/src/constants.ts b/extensions/arc/src/constants.ts index e2b897135a..6969f55af1 100644 --- a/extensions/arc/src/constants.ts +++ b/extensions/arc/src/constants.ts @@ -28,6 +28,8 @@ export class IconPathHelper { public static properties: IconPath; public static networking: IconPath; public static refresh: IconPath; + public static support: IconPath; + public static wrench: IconPath; public static setExtensionContext(context: vscode.ExtensionContext) { IconPathHelper.context = context; @@ -91,6 +93,14 @@ export class IconPathHelper { light: context.asAbsolutePath('images/refresh.svg'), dark: context.asAbsolutePath('images/refresh.svg') }; + IconPathHelper.support = { + light: context.asAbsolutePath('images/support.svg'), + dark: context.asAbsolutePath('images/support.svg') + }; + IconPathHelper.wrench = { + light: context.asAbsolutePath('images/wrench.svg'), + dark: context.asAbsolutePath('images/wrench.svg') + }; } } diff --git a/extensions/arc/src/extension.ts b/extensions/arc/src/extension.ts index e2cfb6040b..f55b13b06a 100644 --- a/extensions/arc/src/extension.ts +++ b/extensions/arc/src/extension.ts @@ -26,7 +26,7 @@ export async function activate(context: vscode.ExtensionContext): Promise try { const controllerModel = new ControllerModel(controllerUrl, auth); const postgresModel = new PostgresModel(controllerUrl, auth, dbNamespace, dbName); - const postgresDashboard = new PostgresDashboard(loc.postgresDashboard, controllerModel, postgresModel); + const postgresDashboard = new PostgresDashboard(loc.postgresDashboard, context, controllerModel, postgresModel); await Promise.all([ postgresDashboard.showDashboard(), diff --git a/extensions/arc/src/localizedConstants.ts b/extensions/arc/src/localizedConstants.ts index dce75b36c4..bbf1fe89ed 100644 --- a/extensions/arc/src/localizedConstants.ts +++ b/extensions/arc/src/localizedConstants.ts @@ -15,8 +15,11 @@ export const networking = localize('arc.networking', "Networking"); export const properties = localize('arc.properties', "Properties"); export const settings = localize('arc.settings', "Settings"); export const security = localize('arc.security', "Security"); -export const computeAndStorage = localize('arc.computeAndStorage', 'Compute + Storage'); +export const computeAndStorage = localize('arc.computeAndStorage', "Compute + Storage"); export const backup = localize('arc.backup', "Backup"); +export const newSupportRequest = localize('arc.newSupportRequest', "New support request"); +export const diagnoseAndSolveProblems = localize('arc.diagnoseAndSolveProblems', "Diagnose and solve problems"); +export const supportAndTroubleshooting = localize('arc.supportAndTroubleshooting', "Support + troubleshooting"); export const createNew = localize('arc.createNew', "Create New"); export const deleteText = localize('arc.delete', "Delete"); @@ -32,40 +35,42 @@ export const host = localize('arc.host', "Host"); export const name = localize('arc.name', "Name"); export const type = localize('arc.type', "Type"); export const status = localize('arc.status', "Status"); -export const dataController = localize('arc.dataController', 'Data controller'); -export const kibanaDashboard = localize('arc.kibanaDashboard', 'Kibana Dashboard'); -export const grafanaDashboard = localize('arc.grafanaDashboard', 'Grafana Dashboard'); -export const kibanaDashboardDescription = localize('arc.kibanaDashboardDescription', 'Dashboard for viewing logs'); -export const grafanaDashboardDescription = localize('arc.grafanaDashboardDescription', 'Dashboard for viewing metrics'); -export const serviceEndpoints = localize('arc.serviceEndpoints', 'Service endpoints'); -export const endpoint = localize('arc.endpoint', 'Endpoint'); -export const description = localize('arc.description', 'Description'); -export const yes = localize('arc.yes', 'Yes'); -export const no = localize('arc.no', 'No'); -export const feedback = localize('arc.feedback', 'Feedback'); -export const selectConnectionString = localize('arc.selectConnectionString', 'Select from available client connection strings below'); -export const vCores = localize('arc.vCores', 'vCores'); -export const ram = localize('arc.ram', 'RAM'); -export const refresh = localize('arc.refresh', 'Refresh'); +export const dataController = localize('arc.dataController', "Data controller"); +export const kibanaDashboard = localize('arc.kibanaDashboard', "Kibana Dashboard"); +export const grafanaDashboard = localize('arc.grafanaDashboard', "Grafana Dashboard"); +export const kibanaDashboardDescription = localize('arc.kibanaDashboardDescription', "Dashboard for viewing logs"); +export const grafanaDashboardDescription = localize('arc.grafanaDashboardDescription', "Dashboard for viewing metrics"); +export const serviceEndpoints = localize('arc.serviceEndpoints', "Service endpoints"); +export const endpoint = localize('arc.endpoint', "Endpoint"); +export const description = localize('arc.description', "Description"); +export const yes = localize('arc.yes', "Yes"); +export const no = localize('arc.no', "No"); +export const feedback = localize('arc.feedback', "Feedback"); +export const selectConnectionString = localize('arc.selectConnectionString', "Select from available client connection strings below"); +export const vCores = localize('arc.vCores', "vCores"); +export const ram = localize('arc.ram', "RAM"); +export const refresh = localize('arc.refresh', "Refresh"); +export const troubleshoot = localize('arc.troubleshoot', "Troubleshoot"); +export const clickTheNewSupportRequestButton = localize('arc.clickTheNewSupportRequestButton', "Click the new support request button to file a support request in the Azure Portal."); // Postgres constants -export const coordinatorEndpoint = localize('arc.coordinatorEndpoint', 'Coordinator endpoint'); -export const postgresAdminUsername = localize('arc.postgresAdminUsername', 'Admin username'); -export const nodeConfiguration = localize('arc.nodeConfiguration', 'Node configuration'); -export const postgresVersion = localize('arc.postgresVersion', 'PostgreSQL version'); -export const serverGroupType = localize('arc.serverGroupType', 'Server group type'); -export const serverGroupNodes = localize('arc.serverGroupNodes', 'Server group nodes'); -export const fullyQualifiedDomain = localize('arc.fullyQualifiedDomain', 'Fully qualified domain'); -export const postgresArcProductName = localize('arc.postgresArcProductName', 'Azure Database for PostgreSQL - Azure Arc'); -export const coordinator = localize('arc.coordinator', 'Coordinator'); -export const worker = localize('arc.worker', 'Worker'); -export const newDatabase = localize('arc.newDatabase', 'New Database'); -export const databaseName = localize('arc.databaseName', 'Database name'); -export const newPassword = localize('arc.newPassword', 'New password'); -export const learnAboutPostgresClients = localize('arc.learnAboutPostgresClients', 'Learn more about Azure PostgreSQL Hyperscale client interfaces'); -export const node = localize('arc.node', 'node'); -export const nodes = localize('arc.nodes', 'nodes'); -export const storagePerNode = localize('arc.storagePerNode', 'storage per node'); +export const coordinatorEndpoint = localize('arc.coordinatorEndpoint', "Coordinator endpoint"); +export const postgresAdminUsername = localize('arc.postgresAdminUsername', "Admin username"); +export const nodeConfiguration = localize('arc.nodeConfiguration', "Node configuration"); +export const postgresVersion = localize('arc.postgresVersion', "PostgreSQL version"); +export const serverGroupType = localize('arc.serverGroupType', "Server group type"); +export const serverGroupNodes = localize('arc.serverGroupNodes', "Server group nodes"); +export const fullyQualifiedDomain = localize('arc.fullyQualifiedDomain', "Fully qualified domain"); +export const postgresArcProductName = localize('arc.postgresArcProductName', "Azure Database for PostgreSQL - Azure Arc"); +export const coordinator = localize('arc.coordinator', "Coordinator"); +export const worker = localize('arc.worker', "Worker"); +export const newDatabase = localize('arc.newDatabase', "New Database"); +export const databaseName = localize('arc.databaseName', "Database name"); +export const newPassword = localize('arc.newPassword', "New password"); +export const learnAboutPostgresClients = localize('arc.learnAboutPostgresClients', "Learn more about Azure PostgreSQL Hyperscale client interfaces"); +export const node = localize('arc.node', "node"); +export const nodes = localize('arc.nodes', "nodes"); +export const storagePerNode = localize('arc.storagePerNode', "storage per node"); export function databaseCreated(name: string): string { return localize('arc.databaseCreated', "Database {0} created", name); } export function databaseCreationFailed(name: string, error: any): string { return localize('arc.databaseCreationFailed', "Failed to create database {0}. {1}", name, (error instanceof Error ? error.message : error)); } @@ -75,8 +80,9 @@ export function deleteServicePrompt(name: string): string { return localize('arc export function serviceDeleted(name: string): string { return localize('arc.serviceDeleted', "Service {0} deleted", name); } export function serviceDeletionFailed(name: string, error: any): string { return localize('arc.serviceDeletionFailed', "Failed to delete service {0}. {1}", name, (error instanceof Error ? error.message : error)); } export function couldNotFindAzureResource(name: string): string { return localize('arc.couldNotFindAzureResource', "Could not find Azure resource for {0}", name); } -export function copiedToClipboard(name: string): string { return localize('arc.copiedToClipboard', '{0} copied to clipboard', name); } +export function copiedToClipboard(name: string): string { return localize('arc.copiedToClipboard', "{0} copied to clipboard", name); } export function refreshFailed(error: any): string { return localize('arc.refreshFailed', "Refresh failed. {0}", (error instanceof Error ? error.message : error)); } export function failedToManagePostgres(name: string, error: any): string { return localize('arc.failedToManagePostgres', "Failed to manage Postgres {0}. {1}", name, (error instanceof Error ? error.message : error)); } +export function clickTheTroubleshootButton(resourceType: string): string { return localize('arc.clickTheTroubleshootButton', "Click the troubleshoot button to open the Azure Arc {0} troubleshooting notebook.", resourceType); } export const arcResources = localize('arc.arcResources', "Azure Arc Resources"); diff --git a/extensions/arc/src/ui/dashboards/postgres/postgresDashboard.ts b/extensions/arc/src/ui/dashboards/postgres/postgresDashboard.ts index 4b69e7d837..6adcda34e2 100644 --- a/extensions/arc/src/ui/dashboards/postgres/postgresDashboard.ts +++ b/extensions/arc/src/ui/dashboards/postgres/postgresDashboard.ts @@ -3,6 +3,7 @@ * Licensed under the Source EULA. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ +import * as vscode from 'vscode'; import * as azdata from 'azdata'; import * as loc from '../../../localizedConstants'; import { ControllerModel } from '../../../models/controllerModel'; @@ -14,9 +15,11 @@ import { PostgresBackupPage } from './postgresBackupPage'; import { PostgresPropertiesPage } from './postgresPropertiesPage'; import { PostgresNetworkingPage } from './postgresNetworkingPage'; import { Dashboard } from '../../components/dashboard'; +import { PostgresDiagnoseAndSolveProblemsPage } from './postgresDiagnoseAndSolveProblemsPage'; +import { PostgresSupportRequestPage } from './postgresSupportRequestPage'; export class PostgresDashboard extends Dashboard { - constructor(title: string, private _controllerModel: ControllerModel, private _postgresModel: PostgresModel) { + constructor(title: string, private _context: vscode.ExtensionContext, private _controllerModel: ControllerModel, private _postgresModel: PostgresModel) { super(title); } @@ -27,6 +30,8 @@ export class PostgresDashboard extends Dashboard { const backupPage = new PostgresBackupPage(modelView); const propertiesPage = new PostgresPropertiesPage(modelView, this._controllerModel, this._postgresModel); const networkingPage = new PostgresNetworkingPage(modelView); + const diagnoseAndSolveProblemsPage = new PostgresDiagnoseAndSolveProblemsPage(modelView, this._context, this._postgresModel); + const supportRequestPage = new PostgresSupportRequestPage(modelView, this._controllerModel, this._postgresModel); return [ overviewPage.tab, @@ -43,6 +48,13 @@ export class PostgresDashboard extends Dashboard { tabs: [ networkingPage.tab ] + }, + { + title: loc.supportAndTroubleshooting, + tabs: [ + diagnoseAndSolveProblemsPage.tab, + supportRequestPage.tab + ] } ]; } diff --git a/extensions/arc/src/ui/dashboards/postgres/postgresDiagnoseAndSolveProblemsPage.ts b/extensions/arc/src/ui/dashboards/postgres/postgresDiagnoseAndSolveProblemsPage.ts new file mode 100644 index 0000000000..e7a90f7f6f --- /dev/null +++ b/extensions/arc/src/ui/dashboards/postgres/postgresDiagnoseAndSolveProblemsPage.ts @@ -0,0 +1,64 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the Source EULA. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import * as vscode from 'vscode'; +import * as azdata from 'azdata'; +import * as loc from '../../../localizedConstants'; +import { IconPathHelper, cssStyles } from '../../../constants'; +import { DashboardPage } from '../../components/dashboardPage'; +import { PostgresModel } from '../../../models/postgresModel'; + +export class PostgresDiagnoseAndSolveProblemsPage extends DashboardPage { + constructor(protected modelView: azdata.ModelView, private _context: vscode.ExtensionContext, private _postgresModel: PostgresModel) { + super(modelView); + } + + protected get title(): string { + return loc.diagnoseAndSolveProblems; + } + + protected get id(): string { + return 'postgres-diagnose-and-solve-problems'; + } + + protected get icon(): { dark: string; light: string; } { + return IconPathHelper.wrench; + } + + protected get container(): azdata.Component { + const root = this.modelView.modelBuilder.divContainer().component(); + const content = this.modelView.modelBuilder.divContainer().component(); + root.addItem(content, { CSSStyles: { 'margin': '20px' } }); + + content.addItem(this.modelView.modelBuilder.text().withProperties({ + value: loc.diagnoseAndSolveProblems, + CSSStyles: { ...cssStyles.title, 'margin-bottom': '20px' } + }).component()); + + content.addItem(this.modelView.modelBuilder.text().withProperties({ + value: loc.clickTheTroubleshootButton('Postgres'), + CSSStyles: { ...cssStyles.text, 'margin-bottom': '20px' } + }).component()); + + const troubleshootButton = this.modelView.modelBuilder.button().withProperties({ + iconPath: IconPathHelper.wrench, + label: loc.troubleshoot, + width: '160px' + }).component(); + + troubleshootButton.onDidClick(() => { + process.env['POSTGRES_SERVER_NAMESPACE'] = this._postgresModel.namespace(); + process.env['POSTGRES_SERVER_NAME'] = this._postgresModel.name(); + vscode.commands.executeCommand('bookTreeView.openBook', this._context.asAbsolutePath('notebooks/arc'), true, 'postgres/tsg100-troubleshoot-postgres'); + }); + + content.addItem(troubleshootButton); + return root; + } + + protected get toolbarContainer(): azdata.ToolbarContainer { + return this.modelView.modelBuilder.toolbarContainer().component(); + } +} diff --git a/extensions/arc/src/ui/dashboards/postgres/postgresOverviewPage.ts b/extensions/arc/src/ui/dashboards/postgres/postgresOverviewPage.ts index 5d03e12ac4..27e19ef23c 100644 --- a/extensions/arc/src/ui/dashboards/postgres/postgresOverviewPage.ts +++ b/extensions/arc/src/ui/dashboards/postgres/postgresOverviewPage.ts @@ -245,7 +245,7 @@ export class PostgresOverviewPage extends DashboardPage { openInAzurePortalButton.onDidClick(async () => { const r = this._controllerModel.registration('postgresInstances', this._postgresModel.namespace(), this._postgresModel.name()); - if (r === undefined) { + if (!r) { vscode.window.showErrorMessage(loc.couldNotFindAzureResource(this._postgresModel.fullName())); } else { vscode.env.openExternal(vscode.Uri.parse( diff --git a/extensions/arc/src/ui/dashboards/postgres/postgresSupportRequestPage.ts b/extensions/arc/src/ui/dashboards/postgres/postgresSupportRequestPage.ts new file mode 100644 index 0000000000..44d3ac5dac --- /dev/null +++ b/extensions/arc/src/ui/dashboards/postgres/postgresSupportRequestPage.ts @@ -0,0 +1,69 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the Source EULA. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import * as vscode from 'vscode'; +import * as azdata from 'azdata'; +import * as loc from '../../../localizedConstants'; +import { IconPathHelper, cssStyles } from '../../../constants'; +import { DashboardPage } from '../../components/dashboardPage'; +import { ControllerModel } from '../../../models/controllerModel'; +import { PostgresModel } from '../../../models/postgresModel'; + +export class PostgresSupportRequestPage extends DashboardPage { + constructor(protected modelView: azdata.ModelView, private _controllerModel: ControllerModel, private _postgresModel: PostgresModel) { + super(modelView); + } + + protected get title(): string { + return loc.newSupportRequest; + } + + protected get id(): string { + return 'postgres-support-request'; + } + + protected get icon(): { dark: string; light: string; } { + return IconPathHelper.support; + } + + protected get container(): azdata.Component { + const root = this.modelView.modelBuilder.divContainer().component(); + const content = this.modelView.modelBuilder.divContainer().component(); + root.addItem(content, { CSSStyles: { 'margin': '20px' } }); + + content.addItem(this.modelView.modelBuilder.text().withProperties({ + value: loc.newSupportRequest, + CSSStyles: { ...cssStyles.title, 'margin-bottom': '20px' } + }).component()); + + content.addItem(this.modelView.modelBuilder.text().withProperties({ + value: loc.clickTheNewSupportRequestButton, + CSSStyles: { ...cssStyles.text, 'margin-bottom': '20px' } + }).component()); + + const supportRequestButton = this.modelView.modelBuilder.button().withProperties({ + iconPath: IconPathHelper.support, + label: loc.newSupportRequest, + width: '205px' + }).component(); + + supportRequestButton.onDidClick(() => { + const r = this._controllerModel.registration('postgresInstances', this._postgresModel.namespace(), this._postgresModel.name()); + if (!r) { + vscode.window.showErrorMessage(loc.couldNotFindAzureResource(this._postgresModel.fullName())); + } else { + vscode.env.openExternal(vscode.Uri.parse( + `https://portal.azure.com/#resource/subscriptions/${r.subscriptionId}/resourceGroups/${r.resourceGroupName}/providers/Microsoft.AzureData/postgresInstances/${r.instanceName}/supportrequest`)); + } + }); + + content.addItem(supportRequestButton); + return root; + } + + protected get toolbarContainer(): azdata.ToolbarContainer { + return this.modelView.modelBuilder.toolbarContainer().component(); + } +}