stringWidth.ts
ink/stringWidth.ts
223
Lines
7156
Bytes
1
Exports
4
Imports
10
Keywords
What this is
This page documents one file from the repository and includes its full source so you can read it without leaving the docs site.
Beginner explanation
This file is one piece of the larger system. Its name, directory, imports, and exports show where it fits. Start by reading the exports and related files first.
How it is used
Start from the exports list and related files. Those are the easiest clues for where this file fits into the system.
Expert explanation
Architecturally, this file intersects with ui-flow. It contains 223 lines, 4 detected imports, and 1 detected exports.
Important relationships
Detected exports
stringWidth
Keywords
codepointwidthgraphemefirstsignsoffsetstringwidthmarkscharacterschar
Detected imports
emoji-regexget-east-asian-widthstrip-ansi../utils/intl.js
Source notes
This page embeds the full file contents. Small or leaf files are still indexed honestly instead of being over-explained.
Full source
import emojiRegex from 'emoji-regex'
import { eastAsianWidth } from 'get-east-asian-width'
import stripAnsi from 'strip-ansi'
import { getGraphemeSegmenter } from '../utils/intl.js'
const EMOJI_REGEX = emojiRegex()
/**
* Fallback JavaScript implementation of stringWidth when Bun.stringWidth is not available.
*
* Get the display width of a string as it would appear in a terminal.
*
* This is a more accurate alternative to the string-width package that correctly handles
* characters like ⚠ (U+26A0) which string-width incorrectly reports as width 2.
*
* The implementation uses eastAsianWidth directly with ambiguousAsWide: false,
* which correctly treats ambiguous-width characters as narrow (width 1) as
* recommended by the Unicode standard for Western contexts.
*/
function stringWidthJavaScript(str: string): number {
if (typeof str !== 'string' || str.length === 0) {
return 0
}
// Fast path: pure ASCII string (no ANSI codes, no wide chars)
let isPureAscii = true
for (let i = 0; i < str.length; i++) {
const code = str.charCodeAt(i)
// Check for non-ASCII or ANSI escape (0x1b)
if (code >= 127 || code === 0x1b) {
isPureAscii = false
break
}
}
if (isPureAscii) {
// Count printable characters (exclude control chars)
let width = 0
for (let i = 0; i < str.length; i++) {
const code = str.charCodeAt(i)
if (code > 0x1f) {
width++
}
}
return width
}
// Strip ANSI if escape character is present
if (str.includes('\x1b')) {
str = stripAnsi(str)
if (str.length === 0) {
return 0
}
}
// Fast path: simple Unicode (no emoji, variation selectors, or joiners)
if (!needsSegmentation(str)) {
let width = 0
for (const char of str) {
const codePoint = char.codePointAt(0)!
if (!isZeroWidth(codePoint)) {
width += eastAsianWidth(codePoint, { ambiguousAsWide: false })
}
}
return width
}
let width = 0
for (const { segment: grapheme } of getGraphemeSegmenter().segment(str)) {
// Check for emoji first (most emoji sequences are width 2)
EMOJI_REGEX.lastIndex = 0
if (EMOJI_REGEX.test(grapheme)) {
width += getEmojiWidth(grapheme)
continue
}
// Calculate width for non-emoji graphemes
// For grapheme clusters (like Devanagari conjuncts with virama+ZWJ), only count
// the first non-zero-width character's width since the cluster renders as one glyph
for (const char of grapheme) {
const codePoint = char.codePointAt(0)!
if (!isZeroWidth(codePoint)) {
width += eastAsianWidth(codePoint, { ambiguousAsWide: false })
break
}
}
}
return width
}
function needsSegmentation(str: string): boolean {
for (const char of str) {
const cp = char.codePointAt(0)!
// Emoji ranges
if (cp >= 0x1f300 && cp <= 0x1faff) return true
if (cp >= 0x2600 && cp <= 0x27bf) return true
if (cp >= 0x1f1e6 && cp <= 0x1f1ff) return true
// Variation selectors, ZWJ
if (cp >= 0xfe00 && cp <= 0xfe0f) return true
if (cp === 0x200d) return true
}
return false
}
function getEmojiWidth(grapheme: string): number {
// Regional indicators: single = 1, pair = 2
const first = grapheme.codePointAt(0)!
if (first >= 0x1f1e6 && first <= 0x1f1ff) {
let count = 0
for (const _ of grapheme) count++
return count === 1 ? 1 : 2
}
// Incomplete keycap: digit/symbol + VS16 without U+20E3
if (grapheme.length === 2) {
const second = grapheme.codePointAt(1)
if (
second === 0xfe0f &&
((first >= 0x30 && first <= 0x39) || first === 0x23 || first === 0x2a)
) {
return 1
}
}
return 2
}
function isZeroWidth(codePoint: number): boolean {
// Fast path for common printable range
if (codePoint >= 0x20 && codePoint < 0x7f) return false
if (codePoint >= 0xa0 && codePoint < 0x0300) return codePoint === 0x00ad
// Control characters
if (codePoint <= 0x1f || (codePoint >= 0x7f && codePoint <= 0x9f)) return true
// Zero-width and invisible characters
if (
(codePoint >= 0x200b && codePoint <= 0x200d) || // ZW space/joiner
codePoint === 0xfeff || // BOM
(codePoint >= 0x2060 && codePoint <= 0x2064) // Word joiner etc.
) {
return true
}
// Variation selectors
if (
(codePoint >= 0xfe00 && codePoint <= 0xfe0f) ||
(codePoint >= 0xe0100 && codePoint <= 0xe01ef)
) {
return true
}
// Combining diacritical marks
if (
(codePoint >= 0x0300 && codePoint <= 0x036f) ||
(codePoint >= 0x1ab0 && codePoint <= 0x1aff) ||
(codePoint >= 0x1dc0 && codePoint <= 0x1dff) ||
(codePoint >= 0x20d0 && codePoint <= 0x20ff) ||
(codePoint >= 0xfe20 && codePoint <= 0xfe2f)
) {
return true
}
// Indic script combining marks (covers Devanagari through Malayalam)
if (codePoint >= 0x0900 && codePoint <= 0x0d4f) {
// Signs and vowel marks at start of each script block
const offset = codePoint & 0x7f
if (offset <= 0x03) return true // Signs at block start
if (offset >= 0x3a && offset <= 0x4f) return true // Vowel signs, virama
if (offset >= 0x51 && offset <= 0x57) return true // Stress signs
if (offset >= 0x62 && offset <= 0x63) return true // Vowel signs
}
// Thai/Lao combining marks
// Note: U+0E32 (SARA AA), U+0E33 (SARA AM), U+0EB2, U+0EB3 are spacing vowels (width 1), not combining marks
if (
codePoint === 0x0e31 || // Thai MAI HAN-AKAT
(codePoint >= 0x0e34 && codePoint <= 0x0e3a) || // Thai vowel signs (skip U+0E32, U+0E33)
(codePoint >= 0x0e47 && codePoint <= 0x0e4e) || // Thai vowel signs and marks
codePoint === 0x0eb1 || // Lao MAI KAN
(codePoint >= 0x0eb4 && codePoint <= 0x0ebc) || // Lao vowel signs (skip U+0EB2, U+0EB3)
(codePoint >= 0x0ec8 && codePoint <= 0x0ecd) // Lao tone marks
) {
return true
}
// Arabic formatting
if (
(codePoint >= 0x0600 && codePoint <= 0x0605) ||
codePoint === 0x06dd ||
codePoint === 0x070f ||
codePoint === 0x08e2
) {
return true
}
// Surrogates, tag characters
if (codePoint >= 0xd800 && codePoint <= 0xdfff) return true
if (codePoint >= 0xe0000 && codePoint <= 0xe007f) return true
return false
}
// Note: complex-script graphemes like Devanagari क्ष (ka+virama+ZWJ+ssa) render
// as a single ligature glyph but occupy 2 terminal cells (wcwidth sums the base
// consonants). Bun.stringWidth=2 matches terminal cell allocation, which is what
// we need for cursor positioning — the JS fallback's grapheme-cluster width of 1
// would desync Ink's layout from the terminal.
//
// Bun.stringWidth is resolved once at module scope rather than checked on every
// call — typeof guards deopt property access and this is a hot path (~100k calls/frame).
const bunStringWidth =
typeof Bun !== 'undefined' && typeof Bun.stringWidth === 'function'
? Bun.stringWidth
: null
const BUN_STRING_WIDTH_OPTS = { ambiguousIsNarrow: true } as const
export const stringWidth: (str: string) => number = bunStringWidth
? str => bunStringWidth(str, BUN_STRING_WIDTH_OPTS)
: stringWidthJavaScript