Articles → NLP → Add A Custom Component To The NLP Pipeline Using Spacy
Add A Custom Component To The NLP Pipeline Using Spacy
Code
import spacy
from spacy.tokens import Doc
from spacy.language import Language
# Step 1: Register a custom Doc-level attribute
Doc.set_extension("has_email", default=False)
# Step 2: Define and register the custom pipeline component
@Language.component("email_detector")
def email_detector(doc):
"""
Custom pipeline component that checks if a Doc contains an email address.
If found, sets doc._.has_email = True, otherwise False.
"""
for token in doc:
if "@" in token.text:
doc._.has_email = True
break
else:
doc._.has_email = False
return doc
# Step 3: Load a spaCy model
nlp = spacy.load("en_core_web_sm")
# Step 4: Add the custom component to the pipeline using its registered name
nlp.add_pipe("email_detector", last=True)
# Step 5: Run the pipeline on sample text
doc1 = nlp("Contact us at support@example.com for more info.")
doc2 = nlp("This text has no email address.")
# Step 6: Print results
print(f"Text: {doc1.text}")
print(f"Contains email? {doc1._.has_email}")
print(f"Text: {doc2.text}")
print(f"Contains email? {doc2._.has_email}")
# Step 7: Inspect pipeline order
print("\nPipeline components:", nlp.pipe_names)
Output
| Posted By - | Karan Gupta |
| |
| Posted On - | Friday, January 9, 2026 |