Articles → LANGCHAIN → Read A PDF File And Store It In Datastax Astra DB
Read A PDF File And Store It In Datastax Astra DB
Create Datastax Astra DB
Activate The Environment
https://www.python.org/downloads/release/python-3110/
py -3.11 -m venv rag_env
rag_env\Scripts\activate
Install The Required Libraries
pip install -U langchain langchain-community langchain-openai langchain-text-splitters cassio cassandra-driver openai pypdf
Code
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores.cassandra import Cassandra
import cassio
# =========================================
# CONFIG
# =========================================
OPENAI_API_KEY = "your_open_api_key"
ASTRA_DB_APPLICATION_TOKEN = "db_token"
ASTRA_DB_ID = "db_id"
ASTRA_DB_REGION = "us-east-2"
KEYSPACE = "default_keyspace"
PDF_FILE = "Access Resources In Android Studio.pdf"
# =========================================
# INIT ASTRA CONNECTION
# =========================================
cassio.init(
token=ASTRA_DB_APPLICATION_TOKEN,
database_id=ASTRA_DB_ID,
keyspace=KEYSPACE
)
loader = PyPDFLoader(PDF_FILE)
docs = loader.load()
splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=50
)
chunks = splitter.split_documents(docs)
embeddings = OpenAIEmbeddings(
api_key=OPENAI_API_KEY
)
vectorstore = Cassandra.from_documents(
documents=chunks,
embedding=embeddings,
keyspace=KEYSPACE,
table_name="pdf_vectors"
)
print("PDF stored successfully in Astra DB!")
Output
| Posted By - | Karan Gupta |
| |
| Posted On - | Saturday, May 23, 2026 |