Articles → NLP → Word Embeddings In NLP
Word Embeddings In NLP
What Are N-Dimensional Vectors?
What Is The Dense Vector?
What Are Word Embeddings?
Example
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from collections import defaultdict
import numpy as np
# Download required resources
nltk.download('punkt')
nltk.download('stopwords')
# Sample text
text = "The cat sat on the mat. The dog barked at the cat. The cat ran away."
# Tokenize and clean
tokens = word_tokenize(text.lower())
tokens = [word for word in tokens if word.isalpha() and word not in stopwords.words('english')]
# Build vocabulary
vocab = list(set(tokens))
vocab_index = {word: i for i, word in enumerate(vocab)}
# Create co-occurrence matrix
window_size = 2
co_matrix = np.zeros((len(vocab), len(vocab)))
for i, word in enumerate(tokens):
word_idx = vocab_index[word]
for j in range(max(0, i - window_size), min(len(tokens), i + window_size + 1)):
if i != j:
neighbor = tokens[j]
neighbor_idx = vocab_index[neighbor]
co_matrix[word_idx][neighbor_idx] += 1
# Display matrix
print("Vocabulary:", vocab)
print("Co-occurrence Matrix:\n", co_matrix)
Output
| Posted By - | Karan Gupta |
| |
| Posted On - | Friday, November 21, 2025 |