/**
 * Simple text tokeniser that uses whitespace to the split text.
 * 
 * Double-quotes can be used to make a token containing whitespace.
 * 
 * Does NOT strip punctuation or other special characters.
 * 
 * (If we need to significantly enhance this, it would probably be better to use
 * a third-party library rather than extend this.)
 * 
 * Examples:
 *   'foo' => ['foo']
 *   'foo bar' => ['foo', 'bar']
 *   '"foo bar" qux' => ['foo bar', 'qux']
 */
export function tokenise(text: string | undefined): string[] {
    if (!text?.trim()) {
        return [];
    }

    const tokens: string[] = [];
    let currentToken = '';
    let insideQuotes = false;

    text = text.trim();
    for (let i = 0; i < text.length; i++) {
        const char = text[i];

        if (insideQuotes) {
            if (char === '"') {
                insideQuotes = false;
                tokens.push(currentToken);
                currentToken = '';
            } else {
                currentToken += char;
            }
        } else {
            if (char === '"') {
                insideQuotes = true;
            } else if (char.trim() === '') {
                if (currentToken) {
                    tokens.push(currentToken);
                    currentToken = '';
                }
            } else {
                currentToken += char;
            }
        }
    }

    if (currentToken) {
        tokens.push(currentToken);
    }

    return tokens;
}
