Remove Stopwords

09 Aug 2020

import pandas as pd

content = ['hey  i am your boss!','he is great..','nice@']

df=pd.DataFrame(content,columns={'Sms content'})
df

	Sms content
0	hey i am your boss!
1	he is great..
2	nice@

import nltk
nltk.download('stopwords')
stopwords=nltk.corpus.stopwords.words('english')
stopwords[:5]

[nltk_data] Downloading package stopwords to /home/vaish/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!





['i', 'me', 'my', 'myself', 'we']

def remove_stopwords(text):
    clean_text=[word for word in text if word not in stopwords]
    return clean_text

df['clean_text'] = df['Sms content'].apply(lambda row : remove_stopwords(row))
df.head()

	Sms content	clean_text
0	hey i am your boss!	[h, e, , , , , u, r, , b, !]
1	he is great..	[h, e, , , g, r, e, ., .]
2	nice@	[n, c, e, @]