Azure Arc Postgres - Support + Troubleshooting (#10686)

This commit is contained in:
Brian Bergeron
2020-06-08 12:03:51 -07:00
committed by GitHub
parent 9ca17dbe7f
commit 5c6f334113
16 changed files with 850 additions and 37 deletions

View File

@@ -0,0 +1,11 @@
<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
<g clip-path="url(#clip0)">
<path d="M9.10021 10.4C9.67398 10.0352 10.148 9.53353 10.4797 8.94002C10.8114 8.34651 10.9902 7.67983 11.0002 7C11.0002 5.93913 10.5788 4.92172 9.82863 4.17157C9.07849 3.42143 8.06107 3 7.00021 3C5.93934 3 4.92192 3.42143 4.17178 4.17157C3.42163 4.92172 3.00021 5.93913 3.00021 7C3.01016 7.67983 3.18903 8.34651 3.5207 8.94002C3.85237 9.53353 4.32643 10.0352 4.90021 10.4C3.75008 10.8144 2.75707 11.5758 2.05842 12.5789C1.35978 13.5821 0.990001 14.7776 1.00021 16H2.00021C2.00021 14.6739 2.52699 13.4021 3.46467 12.4645C4.40235 11.5268 5.67412 11 7.00021 11C8.32629 11 9.59806 11.5268 10.5357 12.4645C11.4734 13.4021 12.0002 14.6739 12.0002 16H13.0002C13.0104 14.7776 12.6406 13.5821 11.942 12.5789C11.2433 11.5758 10.2503 10.8144 9.10021 10.4ZM4.00021 7C3.98624 6.60227 4.05428 6.20592 4.2001 5.83563C4.34592 5.46533 4.5664 5.12901 4.84781 4.8476C5.12922 4.56619 5.46554 4.34572 5.83583 4.1999C6.20613 4.05408 6.60248 3.98603 7.00021 4C7.39793 3.98603 7.79428 4.05408 8.16458 4.1999C8.53487 4.34572 8.87119 4.56619 9.1526 4.8476C9.43401 5.12901 9.65449 5.46533 9.80031 5.83563C9.94613 6.20592 10.0142 6.60227 10.0002 7C10.0142 7.39773 9.94613 7.79408 9.80031 8.16437C9.65449 8.53467 9.43401 8.87099 9.1526 9.1524C8.87119 9.43381 8.53487 9.65428 8.16458 9.8001C7.79428 9.94592 7.39793 10.014 7.00021 10C6.60248 10.014 6.20613 9.94592 5.83583 9.8001C5.46554 9.65428 5.12922 9.43381 4.84781 9.1524C4.5664 8.87099 4.34592 8.53467 4.2001 8.16437C4.05428 7.79408 3.98624 7.39773 4.00021 7Z" fill="#0078D4"/>
<path d="M14 7C14.0277 6.07324 13.8656 5.1506 13.5236 4.28881C13.1816 3.42701 12.667 2.64425 12.0114 1.98864C11.3557 1.33303 10.573 0.818415 9.71119 0.47643C8.84939 0.134445 7.92676 -0.0276878 7 -4.59957e-06C5.53483 0.0317979 4.12829 0.58158 3.02993 1.5518C1.93157 2.52202 1.21239 3.84995 1 5.3H1C1 5.4326 1.05268 5.55978 1.14645 5.65355C1.24021 5.74732 1.36739 5.8 1.5 5.8C1.59374 5.81785 1.69063 5.8085 1.77923 5.77306C1.86782 5.73762 1.94443 5.67757 2 5.6C2.18072 4.37665 2.77585 3.25239 3.68592 2.41513C4.59599 1.57786 5.76586 1.07832 7 0.999995C7.79166 0.98646 8.57795 1.13241 9.31202 1.42914C10.0461 1.72588 10.7129 2.16733 11.2728 2.7272C11.8327 3.28707 12.2741 3.95391 12.5709 4.68798C12.8676 5.42205 13.0135 6.20833 13 7V8H8.5C8.36739 8 8.24021 8.05267 8.14645 8.14644C8.05268 8.24021 8 8.36739 8 8.5C8 8.6326 8.05268 8.75978 8.14645 8.85355C8.24021 8.94732 8.36739 9 8.5 9H13.5C13.6326 9 13.7598 8.94732 13.8536 8.85355C13.9473 8.75978 14 8.6326 14 8.5V7Z" fill="#0078D4"/>
</g>
<defs>
<clipPath id="clip0">
<path d="M0 0H16V16H0V0Z" fill="white"/>
</clipPath>
</defs>
</svg>

After

Width:  |  Height:  |  Size: 2.6 KiB

View File

@@ -0,0 +1,10 @@
<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
<g clip-path="url(#clip0)">
<path d="M15.1 1.7L15.4 2.4C15.5736 2.55387 15.6811 2.76881 15.7 3C15.8005 3.22232 15.8678 3.45816 15.9 3.7C15.9823 3.95823 16.0162 4.22945 16 4.5C15.9824 4.90665 15.9153 5.30963 15.8 5.7C15.7111 6.08133 15.5768 6.45063 15.4 6.8L14.7 7.7L13.8 8.4L12.7 8.8L11.5 9H10.8L4.5 15.2L3.6 15.8L2.6 16L1.6 15.8C1.27608 15.6906 0.995712 15.4803 0.8 15.2C0.519663 15.0043 0.309389 14.7239 0.2 14.4C0.0862188 14.0778 0.0188766 13.7411 0 13.4C0.00910713 13.0577 0.0767509 12.7195 0.2 12.4C0.32891 12.0583 0.534121 11.7504 0.8 11.5L7.1 5.2C7.06382 5.15942 7.03629 5.11188 7.01909 5.0603C7.0019 5.00872 6.9954 4.95418 7 4.9V4.5C7.01757 4.09335 7.08474 3.69037 7.2 3.3C7.2889 2.91867 7.42319 2.54937 7.6 2.2L8.3 1.3L9.2 0.6L10.3 0.2L11.5 0H12.3L13 0.2C13.2312 0.218943 13.4461 0.326416 13.6 0.5L14.3 0.8L11.2 4L12 4.8L15.1 1.7ZM11.5 8C11.9824 7.99754 12.459 7.8954 12.9 7.7C13.3017 7.52684 13.673 7.29051 14 7C14.2905 6.67302 14.5268 6.30166 14.7 5.9C14.8954 5.45899 14.9975 4.98235 15 4.5C14.9815 4.12619 14.9143 3.7564 14.8 3.4L12 6.2L9.8 4L12.6 1.2L11.5 1C11.0176 1.00246 10.541 1.1046 10.1 1.3C9.69834 1.47316 9.32698 1.70949 9 2C8.70949 2.32698 8.47316 2.69834 8.3 3.1C8.1046 3.54101 8.00246 4.01765 8 4.5V5C8.1 5.2 8.1 5.4 8.2 5.5L1.5 12.2L1.1 12.8C1.01969 12.9893 0.985415 13.1949 1 13.4C0.985415 13.6051 1.01969 13.8107 1.1 14C1.17493 14.206 1.31547 14.3817 1.5 14.5C1.61832 14.6845 1.794 14.8251 2 14.9H3.2L3.8 14.5L10.5 7.8L11 8H11.5Z" fill="#0078D4"/>
</g>
<defs>
<clipPath id="clip0">
<path d="M0 0H16V16H0V0Z" fill="white"/>
</clipPath>
</defs>
</svg>

After

Width:  |  Height:  |  Size: 1.6 KiB

View File

@@ -0,0 +1,2 @@
title: Azure Arc Data Services
description: A collection of notebooks to support Azure Arc Data Services.

View File

@@ -0,0 +1,12 @@
- title: Welcome
url: /readme
not_numbered: true
- title: Search
search: true
- title: Postgres
url: /postgres/readme
not_numbered: true
expand_sections: true
sections:
- title: TSG100 - The Azure Arc Postgres troubleshooter
url: postgres/tsg100-troubleshoot-postgres

View File

@@ -0,0 +1,7 @@
# Azure Arc Postgres notebooks
- This chapter contains notebooks for troubleshooting Postgres on Azure Arc
## Notebooks in this Chapter
- [TSG100 - The Azure Arc Postgres troubleshooter](tsg100-troubleshoot-postgres.ipynb)

View File

@@ -0,0 +1,7 @@
- title: Postgres
url: /postgres/readme
not_numbered: true
expand_sections: true
sections:
- title: TSG100 - The Azure Arc Postgres troubleshooter
url: postgres/tsg100-troubleshoot-postgres

View File

@@ -0,0 +1,593 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"TSG100 - The Azure Arc Postgres troubleshooter\n",
"==============================================\n",
"\n",
"Description\n",
"-----------\n",
"\n",
"Follow these steps to troubleshoot an Azure Arc Postgres Server.\n",
"\n",
"Steps\n",
"-----\n",
"\n",
"### Parameters"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"parameters"
]
},
"outputs": [],
"source": [
"import os\n",
"\n",
"# The server to troubleshoot. If not provided\n",
"# the user will be prompted to select a server.\n",
"namespace = os.environ.get('POSTGRES_SERVER_NAMESPACE')\n",
"name = os.environ.get('POSTGRES_SERVER_NAME')\n",
"\n",
"tail_lines = 50"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Common functions\n",
"\n",
"Define helper functions used in this notebook."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"hide_input"
]
},
"outputs": [],
"source": [
"# Define `run` function for transient fault handling, suggestions on error, and scrolling updates on Windows\n",
"import sys\n",
"import os\n",
"import re\n",
"import json\n",
"import platform\n",
"import shlex\n",
"import shutil\n",
"import datetime\n",
"\n",
"from subprocess import Popen, PIPE\n",
"from IPython.display import Markdown\n",
"\n",
"retry_hints = {} # Output in stderr known to be transient, therefore automatically retry\n",
"error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
"install_hint = {} # The SOP to help install the executable if it cannot be found\n",
"\n",
"first_run = True\n",
"rules = None\n",
"debug_logging = False\n",
"\n",
"def run(cmd, return_output=False, no_output=False, retry_count=0):\n",
" \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
"\n",
" NOTES:\n",
"\n",
" 1. Commands that need this kind of ' quoting on Windows e.g.:\n",
"\n",
" kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='data-pool')].metadata.name}\n",
"\n",
" Need to actually pass in as '\"':\n",
"\n",
" kubectl get nodes -o jsonpath={.items[?(@.metadata.annotations.pv-candidate=='\"'data-pool'\"')].metadata.name}\n",
"\n",
" The ' quote approach, although correct when pasting into Windows cmd, will hang at the line:\n",
" \n",
" `iter(p.stdout.readline, b'')`\n",
"\n",
" The shlex.split call does the right thing for each platform, just use the '\"' pattern for a '\n",
" \"\"\"\n",
" MAX_RETRIES = 5\n",
" output = \"\"\n",
" retry = False\n",
"\n",
" global first_run\n",
" global rules\n",
"\n",
" if first_run:\n",
" first_run = False\n",
" rules = load_rules()\n",
"\n",
" # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
" #\n",
" # ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
" #\n",
" if platform.system() == \"Windows\" and cmd.startswith(\"azdata sql query\"):\n",
" cmd = cmd.replace(\"\\n\", \" \")\n",
"\n",
" # shlex.split is required on bash and for Windows paths with spaces\n",
" #\n",
" cmd_actual = shlex.split(cmd)\n",
"\n",
" # Store this (i.e. kubectl, python etc.) to support binary context aware error_hints and retries\n",
" #\n",
" user_provided_exe_name = cmd_actual[0].lower()\n",
"\n",
" # When running python, use the python in the ADS sandbox ({sys.executable})\n",
" #\n",
" if cmd.startswith(\"python \"):\n",
" cmd_actual[0] = cmd_actual[0].replace(\"python\", sys.executable)\n",
"\n",
" # On Mac, when ADS is not launched from terminal, LC_ALL may not be set, which causes pip installs to fail\n",
" # with:\n",
" #\n",
" # UnicodeDecodeError: 'ascii' codec can't decode byte 0xc5 in position 4969: ordinal not in range(128)\n",
" #\n",
" # Setting it to a default value of \"en_US.UTF-8\" enables pip install to complete\n",
" #\n",
" if platform.system() == \"Darwin\" and \"LC_ALL\" not in os.environ:\n",
" os.environ[\"LC_ALL\"] = \"en_US.UTF-8\"\n",
"\n",
" # When running `kubectl`, if AZDATA_OPENSHIFT is set, use `oc`\n",
" #\n",
" if cmd.startswith(\"kubectl \") and \"AZDATA_OPENSHIFT\" in os.environ:\n",
" cmd_actual[0] = cmd_actual[0].replace(\"kubectl\", \"oc\")\n",
"\n",
" # To aid supportabilty, determine which binary file will actually be executed on the machine\n",
" #\n",
" which_binary = None\n",
"\n",
" # Special case for CURL on Windows. The version of CURL in Windows System32 does not work to\n",
" # get JWT tokens, it returns \"(56) Failure when receiving data from the peer\". If another instance\n",
" # of CURL exists on the machine use that one. (Unfortunately the curl.exe in System32 is almost\n",
" # always the first curl.exe in the path, and it can't be uninstalled from System32, so here we\n",
" # look for the 2nd installation of CURL in the path)\n",
" if platform.system() == \"Windows\" and cmd.startswith(\"curl \"):\n",
" path = os.getenv('PATH')\n",
" for p in path.split(os.path.pathsep):\n",
" p = os.path.join(p, \"curl.exe\")\n",
" if os.path.exists(p) and os.access(p, os.X_OK):\n",
" if p.lower().find(\"system32\") == -1:\n",
" cmd_actual[0] = p\n",
" which_binary = p\n",
" break\n",
"\n",
" # Find the path based location (shutil.which) of the executable that will be run (and display it to aid supportability), this\n",
" # seems to be required for .msi installs of azdata.cmd/az.cmd. (otherwise Popen returns FileNotFound) \n",
" #\n",
" # NOTE: Bash needs cmd to be the list of the space separated values hence shlex.split.\n",
" #\n",
" if which_binary == None:\n",
" which_binary = shutil.which(cmd_actual[0])\n",
"\n",
" if which_binary == None:\n",
" if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
" display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
"\n",
" raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\")\n",
" else: \n",
" cmd_actual[0] = which_binary\n",
"\n",
" start_time = datetime.datetime.now().replace(microsecond=0)\n",
"\n",
" print(f\"START: {cmd} @ {start_time} ({datetime.datetime.utcnow().replace(microsecond=0)} UTC)\")\n",
" print(f\" using: {which_binary} ({platform.system()} {platform.release()} on {platform.machine()})\")\n",
" print(f\" cwd: {os.getcwd()}\")\n",
"\n",
" # Command-line tools such as CURL and AZDATA HDFS commands output\n",
" # scrolling progress bars, which causes Jupyter to hang forever, to\n",
" # workaround this, use no_output=True\n",
" #\n",
"\n",
" # Work around a infinite hang when a notebook generates a non-zero return code, break out, and do not wait\n",
" #\n",
" wait = True \n",
"\n",
" try:\n",
" if no_output:\n",
" p = Popen(cmd_actual)\n",
" else:\n",
" p = Popen(cmd_actual, stdout=PIPE, stderr=PIPE, bufsize=1)\n",
" with p.stdout:\n",
" for line in iter(p.stdout.readline, b''):\n",
" line = line.decode()\n",
" if return_output:\n",
" output = output + line\n",
" else:\n",
" if cmd.startswith(\"azdata notebook run\"): # Hyperlink the .ipynb file\n",
" regex = re.compile(' \"(.*)\"\\: \"(.*)\"') \n",
" match = regex.match(line)\n",
" if match:\n",
" if match.group(1).find(\"HTML\") != -1:\n",
" display(Markdown(f' - \"{match.group(1)}\": \"{match.group(2)}\"'))\n",
" else:\n",
" display(Markdown(f' - \"{match.group(1)}\": \"[{match.group(2)}]({match.group(2)})\"'))\n",
"\n",
" wait = False\n",
" break # otherwise infinite hang, have not worked out why yet.\n",
" else:\n",
" print(line, end='')\n",
" if rules is not None:\n",
" apply_expert_rules(line)\n",
"\n",
" if wait:\n",
" p.wait()\n",
" except FileNotFoundError as e:\n",
" if install_hint is not None:\n",
" display(Markdown(f'HINT: Use {install_hint} to resolve this issue.'))\n",
"\n",
" raise FileNotFoundError(f\"Executable '{cmd_actual[0]}' not found in path (where/which)\") from e\n",
"\n",
" exit_code_workaround = 0 # WORKAROUND: azdata hangs on exception from notebook on p.wait()\n",
"\n",
" if not no_output:\n",
" for line in iter(p.stderr.readline, b''):\n",
" try:\n",
" line_decoded = line.decode()\n",
" except UnicodeDecodeError:\n",
" # NOTE: Sometimes we get characters back that cannot be decoded(), e.g.\n",
" #\n",
" # \\xa0\n",
" #\n",
" # For example see this in the response from `az group create`:\n",
" #\n",
" # ERROR: Get Token request returned http error: 400 and server \n",
" # response: {\"error\":\"invalid_grant\",# \"error_description\":\"AADSTS700082: \n",
" # The refresh token has expired due to inactivity.\\xa0The token was \n",
" # issued on 2018-10-25T23:35:11.9832872Z\n",
" #\n",
" # which generates the exception:\n",
" #\n",
" # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa0 in position 179: invalid start byte\n",
" #\n",
" print(\"WARNING: Unable to decode stderr line, printing raw bytes:\")\n",
" print(line)\n",
" line_decoded = \"\"\n",
" pass\n",
" else:\n",
"\n",
" # azdata emits a single empty line to stderr when doing an hdfs cp, don't\n",
" # print this empty \"ERR:\" as it confuses.\n",
" #\n",
" if line_decoded == \"\":\n",
" continue\n",
" \n",
" print(f\"STDERR: {line_decoded}\", end='')\n",
"\n",
" if line_decoded.startswith(\"An exception has occurred\") or line_decoded.startswith(\"ERROR: An error occurred while executing the following cell\"):\n",
" exit_code_workaround = 1\n",
"\n",
" # inject HINTs to next TSG/SOP based on output in stderr\n",
" #\n",
" if user_provided_exe_name in error_hints:\n",
" for error_hint in error_hints[user_provided_exe_name]:\n",
" if line_decoded.find(error_hint[0]) != -1:\n",
" display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
"\n",
" # apply expert rules (to run follow-on notebooks), based on output\n",
" #\n",
" if rules is not None:\n",
" apply_expert_rules(line_decoded)\n",
"\n",
" # Verify if a transient error, if so automatically retry (recursive)\n",
" #\n",
" if user_provided_exe_name in retry_hints:\n",
" for retry_hint in retry_hints[user_provided_exe_name]:\n",
" if line_decoded.find(retry_hint) != -1:\n",
" if retry_count < MAX_RETRIES:\n",
" print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
" retry_count = retry_count + 1\n",
" output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
"\n",
" if return_output:\n",
" return output\n",
" else:\n",
" return\n",
"\n",
" elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
"\n",
" # WORKAROUND: We avoid infinite hang above in the `azdata notebook run` failure case, by inferring success (from stdout output), so\n",
" # don't wait here, if success known above\n",
" #\n",
" if wait: \n",
" if p.returncode != 0:\n",
" raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(p.returncode)}.\\n')\n",
" else:\n",
" if exit_code_workaround !=0 :\n",
" raise SystemExit(f'Shell command:\\n\\n\\t{cmd} ({elapsed}s elapsed)\\n\\nreturned non-zero exit code: {str(exit_code_workaround)}.\\n')\n",
"\n",
" print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
"\n",
" if return_output:\n",
" return output\n",
"\n",
"def load_json(filename):\n",
" \"\"\"Load a json file from disk and return the contents\"\"\"\n",
"\n",
" with open(filename, encoding=\"utf8\") as json_file:\n",
" return json.load(json_file)\n",
"\n",
"def load_rules():\n",
" \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
"\n",
" # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
" #\n",
" try:\n",
" j = load_json(\"tsg100-troubleshoot-postgres.ipynb\")\n",
" except:\n",
" pass # If the user has renamed the book, we can't load ourself. NOTE: Is there a way in Jupyter, to know your own filename?\n",
" else:\n",
" if \"metadata\" in j and \\\n",
" \"azdata\" in j[\"metadata\"] and \\\n",
" \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
" \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
"\n",
" rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
"\n",
" rules.sort() # Sort rules, so they run in priority order (the [0] element). Lowest value first.\n",
"\n",
" # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
"\n",
" return rules\n",
"\n",
"def apply_expert_rules(line):\n",
" \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
" inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
"\n",
" global rules\n",
"\n",
" for rule in rules:\n",
" notebook = rule[1]\n",
" cell_type = rule[2]\n",
" output_type = rule[3] # i.e. stream or error\n",
" output_type_name = rule[4] # i.e. ename or name \n",
" output_type_value = rule[5] # i.e. SystemExit or stdout\n",
" details_name = rule[6] # i.e. evalue or text \n",
" expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
"\n",
" if debug_logging:\n",
" print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
"\n",
" if re.match(expression, line, re.DOTALL):\n",
"\n",
" if debug_logging:\n",
" print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
"\n",
" match_found = True\n",
"\n",
" display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
"\n",
"\n",
"\n",
"print('Common functions defined successfully.')\n",
"\n",
"# Hints for binary (transient fault) retry, (known) error and install guide\n",
"#\n",
"retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n",
"error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n",
"install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Get Postgres server"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"hide_input"
]
},
"outputs": [],
"source": [
"# Sets the 'server' variable to the spec of the Postgres server\n",
"\n",
"import math\n",
"\n",
"# If a server was provided, get it\n",
"if namespace and name:\n",
" server = json.loads(run(f'kubectl get dbs -n {namespace} {name} -o json', return_output=True))\n",
"else:\n",
" # Otherwise prompt the user to select a server\n",
" servers = json.loads(run(f'kubectl get dbs --all-namespaces -o json', return_output=True))['items']\n",
" if not servers:\n",
" raise Exception('No Postgres servers found')\n",
"\n",
" full_name = lambda s: f'{s[\"metadata\"][\"namespace\"]}.{s[\"metadata\"][\"name\"]}'\n",
" servers.sort(key=lambda s: full_name(s))\n",
"\n",
" pad = math.floor(math.log10(len(servers)) + 1) + 3\n",
" for i, s in enumerate(servers):\n",
" print(f'{f\"[{i+1}]\":<{pad}}{full_name(s)}')\n",
"\n",
" while True:\n",
" try:\n",
" i = int(input('Enter the index of a server to troubleshoot: '))\n",
" except ValueError:\n",
" continue\n",
"\n",
" if i >= 1 and i <= len(servers):\n",
" server = servers[i-1]\n",
" namespace = server['metadata']['namespace']\n",
" name = server['metadata']['name']\n",
" break\n",
"\n",
"display(Markdown(f'#### Got server {namespace}.{name}'))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Summarize all resources"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"uid = server['metadata']['uid']\n",
"\n",
"display(Markdown(f'#### Server summary'))\n",
"run(f'kubectl get dbs -n {namespace} {name}')\n",
"\n",
"display(Markdown(f'#### Resource summary'))\n",
"run(f'kubectl get pods,pvc,svc,ep -n {namespace} -l dusky.microsoft.com/serviceId={uid}')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Troubleshoot the server"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"display(Markdown(f'#### Troubleshooting server {namespace}.{name}'))\n",
"run(f'kubectl describe dbs -n {namespace} {name}')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Troubleshoot the pods"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pods = json.loads(run(f'kubectl get pods -n {namespace} -l dusky.microsoft.com/serviceId={uid} -o json', return_output=True))['items']\n",
"\n",
"# Summarize and describe each pod\n",
"for pod in pods:\n",
" pod_name = pod['metadata']['name']\n",
" pod_phase = pod['status']['phase']\n",
" con_statuses = pod['status'].get('containerStatuses', [])\n",
" num_con_ready = sum(1 for c in con_statuses if c['ready'])\n",
" num_cons = len(pod['spec']['containers'])\n",
" num_restarts = sum(c['restartCount'] for c in con_statuses)\n",
"\n",
" display(Markdown(f'#### Troubleshooting pod {namespace}.{pod_name}\\n'\n",
" f'#### {pod_phase} with {num_con_ready}/{num_cons} '\n",
" f'containers ready and {num_restarts} restarts'))\n",
"\n",
" run(f'kubectl describe pod -n {namespace} {pod_name}')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Troubleshoot the containers"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Summarize and get logs from each container\n",
"for pod in pods:\n",
" pod_name = pod['metadata']['name']\n",
" cons = pod['spec']['containers']\n",
" con_statuses = pod['status'].get('containerStatuses', [])\n",
" display(Markdown(f'#### Troubleshooting {len(cons)} container{\"\" if len(cons) < 2 else \"s\"} '\n",
" f'containers for pod {namespace}.{pod_name}'))\n",
"\n",
" for i, con in enumerate(cons):\n",
" con_name = con['name']\n",
" con_status = next((c for c in con_statuses if c['name'] == con_name), {})\n",
" con_started = con_status.get('started', False)\n",
" con_ready = con_status.get('ready', False)\n",
" con_restarts = con_status.get('restartCount', 0)\n",
"\n",
" display(Markdown(f'#### Troubleshooting container {namespace}.{pod_name}/{con_name} ({i+1}/{len(cons)})\\n'\n",
" f'#### {\"S\" if con_started else \"Not s\"}tarted and '\n",
" f'{\"\" if con_ready else \"not \"}ready with {con_restarts} restarts'))\n",
"\n",
" run(f'kubectl logs -n {namespace} {pod_name} {con_name} --tail {tail_lines}')\n",
"\n",
" # Get logs from the previous terminated container if one exists\n",
" if con_restarts > 0:\n",
" display(Markdown(f'#### Logs from previous terminated container {namespace}.{pod_name}/{con_name}'))\n",
" run(f'kubectl logs -n {namespace} {pod_name} {con_name} --tail {tail_lines} --previous')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Troubleshoot the PersistentVolumeClaims"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"display(Markdown(f'#### Troubleshooting PersistentVolumeClaims'))\n",
"run(f'kubectl describe pvc -n {namespace} -l dusky.microsoft.com/serviceId={uid}')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print('Notebook execution complete.')"
]
}
],
"nbformat": 4,
"nbformat_minor": 5,
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"azdata": {
"test": {
"ci": false,
"gci": false
},
"contract": {
"requires": {
"kubectl": {
"installed": true
}
}
},
"side_effects": false
}
}
}

View File

@@ -0,0 +1,5 @@
# Azure Arc Data Services Jupyter Book
## Chapters
1. [Postgres](postgres/readme.md) - notebooks for troubleshooting Postgres on Azure Arc.

View File

@@ -0,0 +1,5 @@
# Azure Arc Data Services Jupyter Book
## Chapters
1. [Postgres](postgres/readme.md) - notebooks for troubleshooting Postgres on Azure Arc.

View File

@@ -28,6 +28,8 @@ export class IconPathHelper {
public static properties: IconPath;
public static networking: IconPath;
public static refresh: IconPath;
public static support: IconPath;
public static wrench: IconPath;
public static setExtensionContext(context: vscode.ExtensionContext) {
IconPathHelper.context = context;
@@ -91,6 +93,14 @@ export class IconPathHelper {
light: context.asAbsolutePath('images/refresh.svg'),
dark: context.asAbsolutePath('images/refresh.svg')
};
IconPathHelper.support = {
light: context.asAbsolutePath('images/support.svg'),
dark: context.asAbsolutePath('images/support.svg')
};
IconPathHelper.wrench = {
light: context.asAbsolutePath('images/wrench.svg'),
dark: context.asAbsolutePath('images/wrench.svg')
};
}
}

View File

@@ -26,7 +26,7 @@ export async function activate(context: vscode.ExtensionContext): Promise<void>
try {
const controllerModel = new ControllerModel(controllerUrl, auth);
const postgresModel = new PostgresModel(controllerUrl, auth, dbNamespace, dbName);
const postgresDashboard = new PostgresDashboard(loc.postgresDashboard, controllerModel, postgresModel);
const postgresDashboard = new PostgresDashboard(loc.postgresDashboard, context, controllerModel, postgresModel);
await Promise.all([
postgresDashboard.showDashboard(),

View File

@@ -15,8 +15,11 @@ export const networking = localize('arc.networking', "Networking");
export const properties = localize('arc.properties', "Properties");
export const settings = localize('arc.settings', "Settings");
export const security = localize('arc.security', "Security");
export const computeAndStorage = localize('arc.computeAndStorage', 'Compute + Storage');
export const computeAndStorage = localize('arc.computeAndStorage', "Compute + Storage");
export const backup = localize('arc.backup', "Backup");
export const newSupportRequest = localize('arc.newSupportRequest', "New support request");
export const diagnoseAndSolveProblems = localize('arc.diagnoseAndSolveProblems', "Diagnose and solve problems");
export const supportAndTroubleshooting = localize('arc.supportAndTroubleshooting', "Support + troubleshooting");
export const createNew = localize('arc.createNew', "Create New");
export const deleteText = localize('arc.delete', "Delete");
@@ -32,40 +35,42 @@ export const host = localize('arc.host', "Host");
export const name = localize('arc.name', "Name");
export const type = localize('arc.type', "Type");
export const status = localize('arc.status', "Status");
export const dataController = localize('arc.dataController', 'Data controller');
export const kibanaDashboard = localize('arc.kibanaDashboard', 'Kibana Dashboard');
export const grafanaDashboard = localize('arc.grafanaDashboard', 'Grafana Dashboard');
export const kibanaDashboardDescription = localize('arc.kibanaDashboardDescription', 'Dashboard for viewing logs');
export const grafanaDashboardDescription = localize('arc.grafanaDashboardDescription', 'Dashboard for viewing metrics');
export const serviceEndpoints = localize('arc.serviceEndpoints', 'Service endpoints');
export const endpoint = localize('arc.endpoint', 'Endpoint');
export const description = localize('arc.description', 'Description');
export const yes = localize('arc.yes', 'Yes');
export const no = localize('arc.no', 'No');
export const feedback = localize('arc.feedback', 'Feedback');
export const selectConnectionString = localize('arc.selectConnectionString', 'Select from available client connection strings below');
export const vCores = localize('arc.vCores', 'vCores');
export const ram = localize('arc.ram', 'RAM');
export const refresh = localize('arc.refresh', 'Refresh');
export const dataController = localize('arc.dataController', "Data controller");
export const kibanaDashboard = localize('arc.kibanaDashboard', "Kibana Dashboard");
export const grafanaDashboard = localize('arc.grafanaDashboard', "Grafana Dashboard");
export const kibanaDashboardDescription = localize('arc.kibanaDashboardDescription', "Dashboard for viewing logs");
export const grafanaDashboardDescription = localize('arc.grafanaDashboardDescription', "Dashboard for viewing metrics");
export const serviceEndpoints = localize('arc.serviceEndpoints', "Service endpoints");
export const endpoint = localize('arc.endpoint', "Endpoint");
export const description = localize('arc.description', "Description");
export const yes = localize('arc.yes', "Yes");
export const no = localize('arc.no', "No");
export const feedback = localize('arc.feedback', "Feedback");
export const selectConnectionString = localize('arc.selectConnectionString', "Select from available client connection strings below");
export const vCores = localize('arc.vCores', "vCores");
export const ram = localize('arc.ram', "RAM");
export const refresh = localize('arc.refresh', "Refresh");
export const troubleshoot = localize('arc.troubleshoot', "Troubleshoot");
export const clickTheNewSupportRequestButton = localize('arc.clickTheNewSupportRequestButton', "Click the new support request button to file a support request in the Azure Portal.");
// Postgres constants
export const coordinatorEndpoint = localize('arc.coordinatorEndpoint', 'Coordinator endpoint');
export const postgresAdminUsername = localize('arc.postgresAdminUsername', 'Admin username');
export const nodeConfiguration = localize('arc.nodeConfiguration', 'Node configuration');
export const postgresVersion = localize('arc.postgresVersion', 'PostgreSQL version');
export const serverGroupType = localize('arc.serverGroupType', 'Server group type');
export const serverGroupNodes = localize('arc.serverGroupNodes', 'Server group nodes');
export const fullyQualifiedDomain = localize('arc.fullyQualifiedDomain', 'Fully qualified domain');
export const postgresArcProductName = localize('arc.postgresArcProductName', 'Azure Database for PostgreSQL - Azure Arc');
export const coordinator = localize('arc.coordinator', 'Coordinator');
export const worker = localize('arc.worker', 'Worker');
export const newDatabase = localize('arc.newDatabase', 'New Database');
export const databaseName = localize('arc.databaseName', 'Database name');
export const newPassword = localize('arc.newPassword', 'New password');
export const learnAboutPostgresClients = localize('arc.learnAboutPostgresClients', 'Learn more about Azure PostgreSQL Hyperscale client interfaces');
export const node = localize('arc.node', 'node');
export const nodes = localize('arc.nodes', 'nodes');
export const storagePerNode = localize('arc.storagePerNode', 'storage per node');
export const coordinatorEndpoint = localize('arc.coordinatorEndpoint', "Coordinator endpoint");
export const postgresAdminUsername = localize('arc.postgresAdminUsername', "Admin username");
export const nodeConfiguration = localize('arc.nodeConfiguration', "Node configuration");
export const postgresVersion = localize('arc.postgresVersion', "PostgreSQL version");
export const serverGroupType = localize('arc.serverGroupType', "Server group type");
export const serverGroupNodes = localize('arc.serverGroupNodes', "Server group nodes");
export const fullyQualifiedDomain = localize('arc.fullyQualifiedDomain', "Fully qualified domain");
export const postgresArcProductName = localize('arc.postgresArcProductName', "Azure Database for PostgreSQL - Azure Arc");
export const coordinator = localize('arc.coordinator', "Coordinator");
export const worker = localize('arc.worker', "Worker");
export const newDatabase = localize('arc.newDatabase', "New Database");
export const databaseName = localize('arc.databaseName', "Database name");
export const newPassword = localize('arc.newPassword', "New password");
export const learnAboutPostgresClients = localize('arc.learnAboutPostgresClients', "Learn more about Azure PostgreSQL Hyperscale client interfaces");
export const node = localize('arc.node', "node");
export const nodes = localize('arc.nodes', "nodes");
export const storagePerNode = localize('arc.storagePerNode', "storage per node");
export function databaseCreated(name: string): string { return localize('arc.databaseCreated', "Database {0} created", name); }
export function databaseCreationFailed(name: string, error: any): string { return localize('arc.databaseCreationFailed', "Failed to create database {0}. {1}", name, (error instanceof Error ? error.message : error)); }
@@ -75,8 +80,9 @@ export function deleteServicePrompt(name: string): string { return localize('arc
export function serviceDeleted(name: string): string { return localize('arc.serviceDeleted', "Service {0} deleted", name); }
export function serviceDeletionFailed(name: string, error: any): string { return localize('arc.serviceDeletionFailed', "Failed to delete service {0}. {1}", name, (error instanceof Error ? error.message : error)); }
export function couldNotFindAzureResource(name: string): string { return localize('arc.couldNotFindAzureResource', "Could not find Azure resource for {0}", name); }
export function copiedToClipboard(name: string): string { return localize('arc.copiedToClipboard', '{0} copied to clipboard', name); }
export function copiedToClipboard(name: string): string { return localize('arc.copiedToClipboard', "{0} copied to clipboard", name); }
export function refreshFailed(error: any): string { return localize('arc.refreshFailed', "Refresh failed. {0}", (error instanceof Error ? error.message : error)); }
export function failedToManagePostgres(name: string, error: any): string { return localize('arc.failedToManagePostgres', "Failed to manage Postgres {0}. {1}", name, (error instanceof Error ? error.message : error)); }
export function clickTheTroubleshootButton(resourceType: string): string { return localize('arc.clickTheTroubleshootButton', "Click the troubleshoot button to open the Azure Arc {0} troubleshooting notebook.", resourceType); }
export const arcResources = localize('arc.arcResources', "Azure Arc Resources");

View File

@@ -3,6 +3,7 @@
* Licensed under the Source EULA. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import * as vscode from 'vscode';
import * as azdata from 'azdata';
import * as loc from '../../../localizedConstants';
import { ControllerModel } from '../../../models/controllerModel';
@@ -14,9 +15,11 @@ import { PostgresBackupPage } from './postgresBackupPage';
import { PostgresPropertiesPage } from './postgresPropertiesPage';
import { PostgresNetworkingPage } from './postgresNetworkingPage';
import { Dashboard } from '../../components/dashboard';
import { PostgresDiagnoseAndSolveProblemsPage } from './postgresDiagnoseAndSolveProblemsPage';
import { PostgresSupportRequestPage } from './postgresSupportRequestPage';
export class PostgresDashboard extends Dashboard {
constructor(title: string, private _controllerModel: ControllerModel, private _postgresModel: PostgresModel) {
constructor(title: string, private _context: vscode.ExtensionContext, private _controllerModel: ControllerModel, private _postgresModel: PostgresModel) {
super(title);
}
@@ -27,6 +30,8 @@ export class PostgresDashboard extends Dashboard {
const backupPage = new PostgresBackupPage(modelView);
const propertiesPage = new PostgresPropertiesPage(modelView, this._controllerModel, this._postgresModel);
const networkingPage = new PostgresNetworkingPage(modelView);
const diagnoseAndSolveProblemsPage = new PostgresDiagnoseAndSolveProblemsPage(modelView, this._context, this._postgresModel);
const supportRequestPage = new PostgresSupportRequestPage(modelView, this._controllerModel, this._postgresModel);
return [
overviewPage.tab,
@@ -43,6 +48,13 @@ export class PostgresDashboard extends Dashboard {
tabs: [
networkingPage.tab
]
},
{
title: loc.supportAndTroubleshooting,
tabs: [
diagnoseAndSolveProblemsPage.tab,
supportRequestPage.tab
]
}
];
}

View File

@@ -0,0 +1,64 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the Source EULA. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import * as vscode from 'vscode';
import * as azdata from 'azdata';
import * as loc from '../../../localizedConstants';
import { IconPathHelper, cssStyles } from '../../../constants';
import { DashboardPage } from '../../components/dashboardPage';
import { PostgresModel } from '../../../models/postgresModel';
export class PostgresDiagnoseAndSolveProblemsPage extends DashboardPage {
constructor(protected modelView: azdata.ModelView, private _context: vscode.ExtensionContext, private _postgresModel: PostgresModel) {
super(modelView);
}
protected get title(): string {
return loc.diagnoseAndSolveProblems;
}
protected get id(): string {
return 'postgres-diagnose-and-solve-problems';
}
protected get icon(): { dark: string; light: string; } {
return IconPathHelper.wrench;
}
protected get container(): azdata.Component {
const root = this.modelView.modelBuilder.divContainer().component();
const content = this.modelView.modelBuilder.divContainer().component();
root.addItem(content, { CSSStyles: { 'margin': '20px' } });
content.addItem(this.modelView.modelBuilder.text().withProperties<azdata.TextComponentProperties>({
value: loc.diagnoseAndSolveProblems,
CSSStyles: { ...cssStyles.title, 'margin-bottom': '20px' }
}).component());
content.addItem(this.modelView.modelBuilder.text().withProperties<azdata.TextComponentProperties>({
value: loc.clickTheTroubleshootButton('Postgres'),
CSSStyles: { ...cssStyles.text, 'margin-bottom': '20px' }
}).component());
const troubleshootButton = this.modelView.modelBuilder.button().withProperties<azdata.ButtonProperties>({
iconPath: IconPathHelper.wrench,
label: loc.troubleshoot,
width: '160px'
}).component();
troubleshootButton.onDidClick(() => {
process.env['POSTGRES_SERVER_NAMESPACE'] = this._postgresModel.namespace();
process.env['POSTGRES_SERVER_NAME'] = this._postgresModel.name();
vscode.commands.executeCommand('bookTreeView.openBook', this._context.asAbsolutePath('notebooks/arc'), true, 'postgres/tsg100-troubleshoot-postgres');
});
content.addItem(troubleshootButton);
return root;
}
protected get toolbarContainer(): azdata.ToolbarContainer {
return this.modelView.modelBuilder.toolbarContainer().component();
}
}

View File

@@ -245,7 +245,7 @@ export class PostgresOverviewPage extends DashboardPage {
openInAzurePortalButton.onDidClick(async () => {
const r = this._controllerModel.registration('postgresInstances', this._postgresModel.namespace(), this._postgresModel.name());
if (r === undefined) {
if (!r) {
vscode.window.showErrorMessage(loc.couldNotFindAzureResource(this._postgresModel.fullName()));
} else {
vscode.env.openExternal(vscode.Uri.parse(

View File

@@ -0,0 +1,69 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the Source EULA. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import * as vscode from 'vscode';
import * as azdata from 'azdata';
import * as loc from '../../../localizedConstants';
import { IconPathHelper, cssStyles } from '../../../constants';
import { DashboardPage } from '../../components/dashboardPage';
import { ControllerModel } from '../../../models/controllerModel';
import { PostgresModel } from '../../../models/postgresModel';
export class PostgresSupportRequestPage extends DashboardPage {
constructor(protected modelView: azdata.ModelView, private _controllerModel: ControllerModel, private _postgresModel: PostgresModel) {
super(modelView);
}
protected get title(): string {
return loc.newSupportRequest;
}
protected get id(): string {
return 'postgres-support-request';
}
protected get icon(): { dark: string; light: string; } {
return IconPathHelper.support;
}
protected get container(): azdata.Component {
const root = this.modelView.modelBuilder.divContainer().component();
const content = this.modelView.modelBuilder.divContainer().component();
root.addItem(content, { CSSStyles: { 'margin': '20px' } });
content.addItem(this.modelView.modelBuilder.text().withProperties<azdata.TextComponentProperties>({
value: loc.newSupportRequest,
CSSStyles: { ...cssStyles.title, 'margin-bottom': '20px' }
}).component());
content.addItem(this.modelView.modelBuilder.text().withProperties<azdata.TextComponentProperties>({
value: loc.clickTheNewSupportRequestButton,
CSSStyles: { ...cssStyles.text, 'margin-bottom': '20px' }
}).component());
const supportRequestButton = this.modelView.modelBuilder.button().withProperties<azdata.ButtonProperties>({
iconPath: IconPathHelper.support,
label: loc.newSupportRequest,
width: '205px'
}).component();
supportRequestButton.onDidClick(() => {
const r = this._controllerModel.registration('postgresInstances', this._postgresModel.namespace(), this._postgresModel.name());
if (!r) {
vscode.window.showErrorMessage(loc.couldNotFindAzureResource(this._postgresModel.fullName()));
} else {
vscode.env.openExternal(vscode.Uri.parse(
`https://portal.azure.com/#resource/subscriptions/${r.subscriptionId}/resourceGroups/${r.resourceGroupName}/providers/Microsoft.AzureData/postgresInstances/${r.instanceName}/supportrequest`));
}
});
content.addItem(supportRequestButton);
return root;
}
protected get toolbarContainer(): azdata.ToolbarContainer {
return this.modelView.modelBuilder.toolbarContainer().component();
}
}