azuredatastudio/extensions/notebook/src/intellisense/text.ts

// Copyright (c) Jupyter Development Team.
// Distributed under the terms of the Modified BSD License.

// This code is originally from @jupyterlab/packages/coreutils/src/text.ts
// Note: this code doesn't seem to do anything in the sqlops environment since the
// surr

// javascript stores text as utf16 and string indices use "code units",
// which stores high-codepoint characters as "surrogate pairs",
// which occupy two indices in the javascript string.
// We need to translate cursor_pos in the Jupyter protocol (in characters)
// to js offset (with surrogate pairs taking two spots).

// allow-any-unicode-next-line
const HAS_SURROGATES: boolean = '𝐚'.length > 1;

/**
 * Convert a javascript string index into a unicode character offset
 *
 * @param jsIdx - The javascript string index (counting surrogate pairs)
 *
 * @param text - The text in which the offset is calculated
 *
 * @returns The unicode character offset
 */
export function jsIndexToCharIndex(jsIdx: number, text: string): number {
	if (!HAS_SURROGATES) {
		// not using surrogates, nothing to do
		return jsIdx;
	}
	let charIdx = jsIdx;
	for (let i = 0; i + 1 < text.length && i < jsIdx; i++) {
		let charCode = text.charCodeAt(i);
		// check for surrogate pair
		if (charCode >= 0xD800 && charCode <= 0xDBFF) {
			let nextCharCode = text.charCodeAt(i + 1);
			if (nextCharCode >= 0xDC00 && nextCharCode <= 0xDFFF) {
				charIdx--;
				i++;
			}
		}
	}
	return charIdx;
}

/**
 * Get the diff between pure character count and JS-based count with 2 chars per surrogate pair.
 *
 * @param text - The text in which the offset is calculated
 *
 * @returns The js-native index
 */
export function charCountToJsCountDiff(text: string): number {
	let diff = 0;
	if (!HAS_SURROGATES) {
		// not using surrogates, nothing to do
		return diff;
	}
	for (let i = 0; i + 1 < text.length; i++) {
		let charCode = text.charCodeAt(i);
		// check for surrogate pair
		if (charCode >= 0xD800 && charCode <= 0xDBFF) {
			let nextCharCode = text.charCodeAt(i + 1);
			if (nextCharCode >= 0xDC00 && nextCharCode <= 0xDFFF) {
				diff++;
				i++;
			}
		}
	}
	return diff;
}