tokenize
Tokenize a string.
Usage
var tokenize = require( '@stdlib/nlp/tokenize' );
tokenize( str[, keepWhitespace] )
Tokenizes a string.
var str = 'Hello Mrs. Maple, could you call me back?';
var out = tokenize( str );
// returns [ 'Hello', 'Mrs.', 'Maple', ',', 'could', 'you', 'call', 'me', 'back', '?' ]
To include whitespace characters (spaces, tabs, line breaks) in the output array, set keepWhitespace
to true
.
var str = 'Hello World!\n';
var out = tokenize( str, true );
// returns [ 'Hello', ' ', 'World', '!', '\n' ]
Examples
var tokenize = require( '@stdlib/nlp/tokenize' );
console.log( tokenize( 'Hello World!' ) );
// => [ 'Hello', 'World', '!' ]
console.log( tokenize( '' ) );
// => []
var str = 'Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod.';
console.log( tokenize( str ) );
/* =>
[
'Lorem',
'ipsum',
'dolor',
'sit',
'amet',
',',
'consetetur',
'sadipscing',
'elitr',
',',
'sed',
'diam',
'nonumy',
'eirmod',
'.'
]
*/