tokenize

Tokenize a string.

Usage

var tokenize = require( '@stdlib/nlp/tokenize' );

tokenize( str[, keepWhitespace] )

Tokenizes a string.

var str = 'Hello Mrs. Maple, could you call me back?';
var out = tokenize( str );
// returns  [ 'Hello', 'Mrs.', 'Maple', ',', 'could', 'you', 'call', 'me', 'back', '?' ]

To include whitespace characters (spaces, tabs, line breaks) in the output array, set keepWhitespace to true.

var str = 'Hello World!\n';
var out = tokenize( str, true );
// returns  [ 'Hello', ' ', 'World', '!', '\n' ]

Examples

var tokenize = require( '@stdlib/nlp/tokenize' );


console.log( tokenize( 'Hello World!' ) );
// => [ 'Hello', 'World', '!' ]

console.log( tokenize( '' ) );
// => []

var str = 'Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod.';
console.log( tokenize( str ) );
/* =>
    [
        'Lorem',
        'ipsum',
        'dolor',
        'sit',
        'amet',
        ',',
        'consetetur',
        'sadipscing',
        'elitr',
        ',',
        'sed',
        'diam',
        'nonumy',
        'eirmod',
        '.'
    ]
*/