Tokenisation

import pandas as pd
content = ['hey boss!','great..','nice@']
df=pd.DataFrame(content,columns={'Sms content'})
df
Sms content
0 hey boss!
1 great..
2 nice@
import re
def tokenize(text):
    tokens=re.split('\W+',text)
    return tokens
df['tokenized_text']=df['Sms content'].apply(lambda row : tokenize(row.lower()))
df.head()
Sms content tokenized_text
0 hey boss! [hey, boss, ]
1 great.. [great, ]
2 nice@ [nice, ]