Articles → NLP → Topic Modelling In NLP
Topic Modelling In NLP
What Is A Topic?
What Is Latent Dirichlet Allocation?
Example
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
docs = [
"I love playing cricket and football",
"Football and cricket are popular sports",
"Virat Kohli scored a century in cricket",
"Apple released a new iPhone",
"Samsung launches new Android phone",
"Smartphones have powerful processors"
]
cv = CountVectorizer(stop_words='english')
dtm = cv.fit_transform(docs)
lda = LatentDirichletAllocation(n_components=3, random_state=42)
lda.fit(dtm)
for i, topic in enumerate(lda.components_):
print(f"Topic {i}:")
for word in topic.argsort()[-5:]:
print(" ", cv.get_feature_names_out()[word])
Output
Document Topic Matrix
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
import pandas as pd
docs = [
"I love playing cricket and football",
"Football and cricket are popular sports",
"Virat Kohli scored a century in cricket",
"Apple released a new iPhone",
"Samsung launches new Android phone",
"Smartphones have powerful processors"
]
cv = CountVectorizer(stop_words='english')
dtm = cv.fit_transform(docs)
lda = LatentDirichletAllocation(n_components=3, random_state=42)
lda.fit(dtm)
# Create document-topic matrix
doc_topic_matrix = lda.transform(dtm)
# Convert to DataFrame
doc_topic_df = pd.DataFrame(
doc_topic_matrix,
columns=[f"Topic {i}" for i in range(lda.n_components)]
)
print(doc_topic_df)
| Posted By - | Karan Gupta |
| |
| Posted On - | Friday, January 23, 2026 |