content = ['hey i am your boss!','he is great..','nice@']
df=pd.DataFrame(content,columns={'Sms content'})
df
|
Sms content |
0 |
hey i am your boss! |
1 |
he is great.. |
2 |
nice@ |
import nltk
nltk.download('stopwords')
stopwords=nltk.corpus.stopwords.words('english')
stopwords[:5]
[nltk_data] Downloading package stopwords to /home/vaish/nltk_data...
[nltk_data] Package stopwords is already up-to-date!
['i', 'me', 'my', 'myself', 'we']
def remove_stopwords(text):
clean_text=[word for word in text if word not in stopwords]
return clean_text
df['clean_text'] = df['Sms content'].apply(lambda row : remove_stopwords(row))
df.head()
|
Sms content |
clean_text |
0 |
hey i am your boss! |
[h, e, , , , , u, r, , b, !] |
1 |
he is great.. |
[h, e, , , g, r, e, ., .] |
2 |
nice@ |
[n, c, e, @] |