Arc updates for March release (#14970)

* Updated Postgres Spec for where to find engine version, removed calling calling -ev in edit commands (#14735) * Added spec.engine.version, took out calling engine version with edit calls * Added text wrong place * missed updates * PR fix * Update Arc Postgres troubleshooting notebook Co-authored-by: Brian Bergeron <brberger@microsoft.com> * Remove AzdataSession from azdata commands (#14856) * remove session * Add in controller-context support * Revert "Add in controller-context support" This reverts commit 3b39b968efbf6054041cb01cb2d8443532643a82. * Add azdataContext to login * Undo book change * Undo change correctly * Add controller context support (#14862) * remove session * Add in controller-context support * Add params to fake * Fix tests * Add info and placeholder for controller URL/name (#14887) * Add info and placeholder for controller URL * add period + update name * update memento and allow editing of namespace/URL * vBump * vBump * Fix tests Co-authored-by: nasc17 <69922333+nasc17@users.noreply.github.com> Co-authored-by: Brian Bergeron <brian.e.bergeron@gmail.com> Co-authored-by: Brian Bergeron <brberger@microsoft.com>
2026-02-05 01:25:38 -05:00 · 2021-04-05 11:47:36 -07:00
parent 71b91c3890
commit febd8b29c9
44 changed files with 525 additions and 740 deletions
--- a/extensions/arc/notebooks/arcDataServices/content/postgres/readme.md
+++ b/extensions/arc/notebooks/arcDataServices/content/postgres/readme.md
@@ -2,6 +2,10 @@

 - This chapter contains notebooks for troubleshooting Postgres on Azure Arc

-## Notebooks in this Chapter
- [TSG100 - The Azure Arc enabled PostgreSQL Hyperscale troubleshooter](tsg100-troubleshoot-postgres.ipynb)

+
+[Home](../readme.md)
+
+## Notebooks in this Chapter
+
+ - [TSG100 - The Azure Arc enabled PostgreSQL Hyperscale troubleshooter](../postgres/tsg100-troubleshoot-postgres.ipynb)
--- a/extensions/arc/notebooks/arcDataServices/content/postgres/toc.yml
+++ b/extensions/arc/notebooks/arcDataServices/content/postgres/toc.yml
@@ -1,7 +0,0 @@
- title: Postgres
-  url: /postgres/readme
-  not_numbered: true
-  expand_sections: true
-  sections:
-  - title: TSG100 - The Azure Arc enabled PostgreSQL Hyperscale troubleshooter
-    url: postgres/tsg100-troubleshoot-postgres
--- a/extensions/arc/notebooks/arcDataServices/content/postgres/tsg100-troubleshoot-postgres.ipynb
+++ b/extensions/arc/notebooks/arcDataServices/content/postgres/tsg100-troubleshoot-postgres.ipynb
@@ -2,7 +2,11 @@
 "cells": [
  {
   "cell_type": "markdown",
-   "metadata": {},
+   "execution_count": null,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
   "source": [
    "TSG100 - The Azure Arc enabled PostgreSQL Hyperscale troubleshooter\n",
    "===================================================================\n",
@@ -35,14 +39,17 @@
    "# the user will be prompted to select a server.\n",
    "namespace = os.environ.get('POSTGRES_SERVER_NAMESPACE')\n",
    "name = os.environ.get('POSTGRES_SERVER_NAME')\n",
-    "version = os.environ.get('POSTGRES_SERVER_VERSION')\n",
    "\n",
    "tail_lines = 50"
   ]
  },
  {
   "cell_type": "markdown",
-   "metadata": {},
+   "execution_count": null,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
   "source": [
    "### Common functions\n",
    "\n",
@@ -63,7 +70,6 @@
    "import sys\n",
    "import os\n",
    "import re\n",
-    "import json\n",
    "import platform\n",
    "import shlex\n",
    "import shutil\n",
@@ -76,11 +82,7 @@
    "error_hints = {} # Output in stderr where a known SOP/TSG exists which will be HINTed for further help\n",
    "install_hint = {} # The SOP to help install the executable if it cannot be found\n",
    "\n",
-    "first_run = True\n",
-    "rules = None\n",
-    "debug_logging = False\n",
-    "\n",
-    "def run(cmd, return_output=False, no_output=False, retry_count=0):\n",
+    "def run(cmd, return_output=False, no_output=False, retry_count=0, base64_decode=False, return_as_json=False):\n",
    "    \"\"\"Run shell command, stream stdout, print stderr and optionally return output\n",
    "\n",
    "    NOTES:\n",
@@ -103,13 +105,6 @@
    "    output = \"\"\n",
    "    retry = False\n",
    "\n",
-    "    global first_run\n",
-    "    global rules\n",
-    "\n",
-    "    if first_run:\n",
-    "        first_run = False\n",
-    "        rules = load_rules()\n",
-    "\n",
    "    # When running `azdata sql query` on Windows, replace any \\n in \"\"\" strings, with \" \", otherwise we see:\n",
    "    #\n",
    "    #    ('HY090', '[HY090] [Microsoft][ODBC Driver Manager] Invalid string or buffer length (0) (SQLExecDirectW)')\n",
@@ -172,7 +167,12 @@
    "    if which_binary == None:\n",
    "        which_binary = shutil.which(cmd_actual[0])\n",
    "\n",
+    "    # Display an install HINT, so the user can click on a SOP to install the missing binary\n",
+    "    #\n",
    "    if which_binary == None:\n",
+    "        print(f\"The path used to search for '{cmd_actual[0]}' was:\")\n",
+    "        print(sys.path)\n",
+    "\n",
    "        if user_provided_exe_name in install_hint and install_hint[user_provided_exe_name] is not None:\n",
    "            display(Markdown(f'HINT: Use [{install_hint[user_provided_exe_name][0]}]({install_hint[user_provided_exe_name][1]}) to resolve this issue.'))\n",
    "\n",
@@ -219,8 +219,6 @@
    "                                    break # otherwise infinite hang, have not worked out why yet.\n",
    "                        else:\n",
    "                            print(line, end='')\n",
-    "                            if rules is not None:\n",
-    "                                apply_expert_rules(line)\n",
    "\n",
    "        if wait:\n",
    "            p.wait()\n",
@@ -276,25 +274,22 @@
    "                        if line_decoded.find(error_hint[0]) != -1:\n",
    "                            display(Markdown(f'HINT: Use [{error_hint[1]}]({error_hint[2]}) to resolve this issue.'))\n",
    "\n",
-    "                # apply expert rules (to run follow-on notebooks), based on output\n",
-    "                #\n",
-    "                if rules is not None:\n",
-    "                    apply_expert_rules(line_decoded)\n",
-    "\n",
    "                # Verify if a transient error, if so automatically retry (recursive)\n",
    "                #\n",
    "                if user_provided_exe_name in retry_hints:\n",
    "                    for retry_hint in retry_hints[user_provided_exe_name]:\n",
    "                        if line_decoded.find(retry_hint) != -1:\n",
-    "                            if retry_count < MAX_RETRIES:\n",
+    "                            if retry_count \u003c MAX_RETRIES:\n",
    "                                print(f\"RETRY: {retry_count} (due to: {retry_hint})\")\n",
    "                                retry_count = retry_count + 1\n",
    "                                output = run(cmd, return_output=return_output, retry_count=retry_count)\n",
    "\n",
    "                                if return_output:\n",
-    "                                    return output\n",
-    "                                else:\n",
-    "                                    return\n",
+    "                                    if base64_decode:\n",
+    "                                        import base64\n",
+    "                                        return base64.b64decode(output).decode('utf-8')\n",
+    "                                    else:\n",
+    "                                        return output\n",
    "\n",
    "    elapsed = datetime.datetime.now().replace(microsecond=0) - start_time\n",
    "\n",
@@ -311,78 +306,31 @@
    "    print(f'\\nSUCCESS: {elapsed}s elapsed.\\n')\n",
    "\n",
    "    if return_output:\n",
-    "        return output\n",
-    "\n",
-    "def load_json(filename):\n",
-    "    \"\"\"Load a json file from disk and return the contents\"\"\"\n",
-    "\n",
-    "    with open(filename, encoding=\"utf8\") as json_file:\n",
-    "        return json.load(json_file)\n",
-    "\n",
-    "def load_rules():\n",
-    "    \"\"\"Load any 'expert rules' from the metadata of this notebook (.ipynb) that should be applied to the stderr of the running executable\"\"\"\n",
-    "\n",
-    "    # Load this notebook as json to get access to the expert rules in the notebook metadata.\n",
-    "    #\n",
-    "    try:\n",
-    "        j = load_json(\"tsg100-troubleshoot-postgres.ipynb\")\n",
-    "    except:\n",
-    "        pass # If the user has renamed the book, we can't load ourself.  NOTE: Is there a way in Jupyter, to know your own filename?\n",
-    "    else:\n",
-    "        if \"metadata\" in j and \\\n",
-    "            \"azdata\" in j[\"metadata\"] and \\\n",
-    "            \"expert\" in j[\"metadata\"][\"azdata\"] and \\\n",
-    "            \"expanded_rules\" in j[\"metadata\"][\"azdata\"][\"expert\"]:\n",
-    "\n",
-    "            rules = j[\"metadata\"][\"azdata\"][\"expert\"][\"expanded_rules\"]\n",
-    "\n",
-    "            rules.sort() # Sort rules, so they run in priority order (the [0] element).  Lowest value first.\n",
-    "\n",
-    "            # print (f\"EXPERT: There are {len(rules)} rules to evaluate.\")\n",
-    "\n",
-    "            return rules\n",
-    "\n",
-    "def apply_expert_rules(line):\n",
-    "    \"\"\"Determine if the stderr line passed in, matches the regular expressions for any of the 'expert rules', if so\n",
-    "    inject a 'HINT' to the follow-on SOP/TSG to run\"\"\"\n",
-    "\n",
-    "    global rules\n",
-    "\n",
-    "    for rule in rules:\n",
-    "        notebook = rule[1]\n",
-    "        cell_type = rule[2]\n",
-    "        output_type = rule[3] # i.e. stream or error\n",
-    "        output_type_name = rule[4] # i.e. ename or name \n",
-    "        output_type_value = rule[5] # i.e. SystemExit or stdout\n",
-    "        details_name = rule[6]  # i.e. evalue or text \n",
-    "        expression = rule[7].replace(\"\\\\*\", \"*\") # Something escaped *, and put a \\ in front of it!\n",
-    "\n",
-    "        if debug_logging:\n",
-    "            print(f\"EXPERT: If rule '{expression}' satisfied', run '{notebook}'.\")\n",
-    "\n",
-    "        if re.match(expression, line, re.DOTALL):\n",
-    "\n",
-    "            if debug_logging:\n",
-    "                print(\"EXPERT: MATCH: name = value: '{0}' = '{1}' matched expression '{2}', therefore HINT '{4}'\".format(output_type_name, output_type_value, expression, notebook))\n",
-    "\n",
-    "            match_found = True\n",
-    "\n",
-    "            display(Markdown(f'HINT: Use [{notebook}]({notebook}) to resolve this issue.'))\n",
+    "        if base64_decode:\n",
+    "            import base64\n",
+    "            return base64.b64decode(output).decode('utf-8')\n",
+    "        else:\n",
+    "            return output\n",
    "\n",
    "\n",
    "\n",
-    "print('Common functions defined successfully.')\n",
-    "\n",
-    "# Hints for binary (transient fault) retry, (known) error and install guide\n",
+    "# Hints for tool retry (on transient fault), known errors and install guide\n",
    "#\n",
-    "retry_hints = {'kubectl': ['A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond']}\n",
-    "error_hints = {'kubectl': [['no such host', 'TSG010 - Get configuration contexts', '../monitor-k8s/tsg010-get-kubernetes-contexts.ipynb'], ['No connection could be made because the target machine actively refused it', 'TSG056 - Kubectl fails with No connection could be made because the target machine actively refused it', '../repair/tsg056-kubectl-no-connection-could-be-made.ipynb']]}\n",
-    "install_hint = {'kubectl': ['SOP036 - Install kubectl command line interface', '../install/sop036-install-kubectl.ipynb']}"
+    "retry_hints = {}\n",
+    "error_hints = {}\n",
+    "install_hint = {}\n",
+    "\n",
+    "\n",
+    "print('Common functions defined successfully.')"
   ]
  },
  {
   "cell_type": "markdown",
-   "metadata": {},
+   "execution_count": null,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
   "source": [
    "### Get Postgres server"
   ]
@@ -400,10 +348,11 @@
    "# Sets the 'server' variable to the spec of the Postgres server\n",
    "\n",
    "import math\n",
+    "import json\n",
    "\n",
    "# If a server was provided, get it\n",
-    "if namespace and name and version:\n",
-    "    server = json.loads(run(f'kubectl get postgresql-{version} -n {namespace} {name} -o json', return_output=True))\n",
+    "if namespace and name:\n",
+    "    server = json.loads(run(f'kubectl get postgresqls -n {namespace} {name} -o json', return_output=True))\n",
    "else:\n",
    "    # Otherwise prompt the user to select a server\n",
    "    servers = json.loads(run(f'kubectl get postgresqls --all-namespaces -o json', return_output=True))['items']\n",
@@ -415,19 +364,18 @@
    "\n",
    "    pad = math.floor(math.log10(len(servers)) + 1) + 3\n",
    "    for i, s in enumerate(servers):\n",
-    "        print(f'{f\"[{i+1}]\":<{pad}}{full_name(s)}')\n",
+    "        print(f'{f\"[{i+1}]\":\u003c{pad}}{full_name(s)}')\n",
    "\n",
    "    while True:\n",
    "        try:\n",
-    "            i = int(input('Enter the index of a server to troubleshoot: '))\n",
+    "            i = int(input('Enter the index of a server'))\n",
    "        except ValueError:\n",
    "            continue\n",
    "\n",
-    "        if i >= 1 and i <= len(servers):\n",
+    "        if i \u003e= 1 and i \u003c= len(servers):\n",
    "            server = servers[i-1]\n",
    "            namespace = server['metadata']['namespace']\n",
    "            name = server['metadata']['name']\n",
-    "            version = server['kind'][len('postgresql-'):]\n",
    "            break\n",
    "\n",
    "display(Markdown(f'#### Got server {namespace}.{name}'))"
@@ -435,7 +383,11 @@
  },
  {
   "cell_type": "markdown",
-   "metadata": {},
+   "execution_count": null,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
   "source": [
    "### Summarize all resources"
   ]
@@ -443,13 +395,15 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
   "outputs": [],
   "source": [
    "uid = server['metadata']['uid']\n",
    "\n",
    "display(Markdown(f'#### Server summary'))\n",
-    "run(f'kubectl get postgresql-{version} -n {namespace} {name}')\n",
+    "run(f'kubectl get postgresqls -n {namespace} {name}')\n",
    "\n",
    "display(Markdown(f'#### Resource summary'))\n",
    "run(f'kubectl get sts,pods,pvc,svc,ep -n {namespace} -l postgresqls.arcdata.microsoft.com/cluster-id={uid}')"
@@ -457,7 +411,11 @@
  },
  {
   "cell_type": "markdown",
-   "metadata": {},
+   "execution_count": null,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
   "source": [
    "### Troubleshoot the server"
   ]
@@ -465,16 +423,22 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
   "outputs": [],
   "source": [
    "display(Markdown(f'#### Troubleshooting server {namespace}.{name}'))\n",
-    "run(f'kubectl describe postgresql-{version} -n {namespace} {name}')"
+    "run(f'kubectl describe postgresqls -n {namespace} {name}')"
   ]
  },
  {
   "cell_type": "markdown",
-   "metadata": {},
+   "execution_count": null,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
   "source": [
    "### Troubleshoot the pods"
   ]
@@ -482,7 +446,9 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
   "outputs": [],
   "source": [
    "pods = json.loads(run(f'kubectl get pods -n {namespace} -l postgresqls.arcdata.microsoft.com/cluster-id={uid} -o json', return_output=True))['items']\n",
@@ -505,7 +471,11 @@
  },
  {
   "cell_type": "markdown",
-   "metadata": {},
+   "execution_count": null,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
   "source": [
    "### Troubleshoot the containers"
   ]
@@ -513,7 +483,9 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
   "outputs": [],
   "source": [
    "# Summarize and get logs from each container\n",
@@ -521,7 +493,7 @@
    "    pod_name = pod['metadata']['name']\n",
    "    cons = pod['spec']['containers']\n",
    "    con_statuses = pod['status'].get('containerStatuses', [])\n",
-    "    display(Markdown(f'#### Troubleshooting {len(cons)} container{\"\" if len(cons) < 2 else \"s\"} '\n",
+    "    display(Markdown(f'#### Troubleshooting {len(cons)} container{\"\" if len(cons) \u003c 2 else \"s\"} '\n",
    "                     f'containers for pod {namespace}.{pod_name}'))\n",
    "\n",
    "    for i, con in enumerate(cons):\n",
@@ -537,14 +509,18 @@
    "        run(f'kubectl logs -n {namespace} {pod_name} {con_name} --tail {tail_lines}')\n",
    "\n",
    "        # Get logs from the previous terminated container if one exists\n",
-    "        if con_restarts > 0:\n",
+    "        if con_restarts \u003e 0:\n",
    "            display(Markdown(f'#### Logs from previous terminated container {namespace}.{pod_name}/{con_name}'))\n",
    "            run(f'kubectl logs -n {namespace} {pod_name} {con_name} --tail {tail_lines} --previous')"
   ]
  },
  {
   "cell_type": "markdown",
-   "metadata": {},
+   "execution_count": null,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
   "source": [
    "### Troubleshoot the PersistentVolumeClaims"
   ]
@@ -552,7 +528,9 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
   "outputs": [],
   "source": [
    "display(Markdown(f'#### Troubleshooting PersistentVolumeClaims'))\n",
@@ -562,10 +540,12 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
   "outputs": [],
   "source": [
-    "print('Notebook execution complete.')"
+    "print(\"Notebook execution is complete.\")"
   ]
  }
 ],
@@ -576,20 +556,36 @@
   "name": "python3",
   "display_name": "Python 3"
  },
-  "azdata": {
+  "pansop": {
+   "related": "",
   "test": {
-    "ci": false,
-    "gci": false
-   },
-   "contract": {
-    "requires": {
-     "kubectl": {
-      "installed": true
-     }
+    "strategy": "",
+    "types": null,
+    "disable": {
+     "reason": "",
+     "workitems": null,
+     "types": null
    }
   },
-   "side_effects": false
-  }
+   "target": {
+    "current": "public",
+    "final": "public"
+   },
+   "internal": {
+    "parameters": null,
+    "symlink": false
+   },
+   "timeout": "0"
+  },
+  "language_info": {
+   "codemirror_mode": "{ Name: \"\", Version: \"\"}",
+   "file_extension": "",
+   "mimetype": "",
+   "name": "",
+   "nbconvert_exporter": "",
+   "pygments_lexer": "",
+   "version": ""
+  },
+  "widgets": []
 }
 }
-