- I want to set up a Semantic Search With LangChain and MongoDB
- I created a free cluster, a database, a collection and a search index.
- the params (DB_NAME, COLLECTION_NAME, ATLAS_VECTOR_SEARCH_INDEX_NAME )are properly set up.
I get this error : raise OperationFailure(errmsg, code, response, max_wire_version) pymongo.errors.OperationFailure: Error in specification { name: "embedding_SON([('$', '2dsphere')])", key: { embedding: { $: "2dsphere" } } } :: caused by :: Values in v:2 index key pattern cannot be of type object. Only numbers > 0, numbers < 0, and strings are allowed., full error: {'ok': 0.0, 'errmsg': 'Error in specification { name: "embedding_SON([('$', '2dsphere')])", key: { embedding: { $: "2dsphere" } } } :: caused by :: Values in v:2 index key pattern cannot be of type object. Only numbers > 0, numbers < 0, and strings are allowed.', 'code': 67, 'codeName': 'CannotCreateIndex', '$clusterTime': {'clusterTime': Timestamp(1711621244, 1), 'signature': {'hash': b'pV~@\xb12@\x03\x17d\xaf\x1d\xb1aq\xb7IS\xd3\xbe', 'keyId': 7302093767895416833}}, 'operationTime': Timestamp(1711621244, 1)}
impossible to figure out the meaning of this error. Any idea ?
from pymongo import MongoClient, IndexModel, ASCENDING
from bson import SON
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import MongoDBAtlasVectorSearch
from langchain_openai import ChatOpenAI, OpenAI, OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from pymongo import MongoClient
from dotenv import load_dotenv
import os
import argparse
import warnings
load_dotenv()
# Filter out the UserWarning from langchain
warnings.filterwarnings("ignore",
category=UserWarning,
module="langchain.chains.llm")
# https://python.langchain.com/docs/integrations/vectorstores/mongodb_atlas
# (getting started with ATLAS)https://www.mongodb.com/docs/atlas/getting-started/
# (Atlas - LangChain Integration) https://www.mongodb.com/docs/atlas/atlas-vector-search/ai-integrations/langchain/#create-the-atlas-vector-search-index
# Process arguments
parser = argparse.ArgumentParser(description='Atlas Vector Search Demo')
parser.add_argument('-q', '--question', help="The question to ask")
args = parser.parse_args()
if args.question is None:
# Some questions to try...
query = "How big is the telecom company?"
query = "Who started AT&T?"
#query = "Where is AT&T based?"
#query = "What venues are AT&T branded?"
#query = "How big is BofA?"
#query = "When was the financial institution started?"
#query = "Does the bank have an investment arm?"
#query = "Where does the bank's revenue come from?"
#query = "Tell me about charity."
#query = "What buildings are BofA branded?"
else:
query = args.question
DB_NAME = "db"
COLLECTION_NAME = "courses"
ATLAS_VECTOR_SEARCH_INDEX_NAME = "vector_search_index"
DB_PASSWORD = os.getenv("DB_PASSWORD")
ATLAS_CONNECTION_STRING = "connection string"
# initialize MongoDB python client
client = MongoClient(ATLAS_CONNECTION_STRING)
collection = client[DB_NAME][COLLECTION_NAME]
# Erstellen Sie das IndexModel für den knnVector
knn_vector_index = IndexModel([("embedding", SON([("$**", "2dsphere")]))])
# Fügen Sie den Index zur Sammlung hinzu
collection.create_indexes([knn_vector_index])
# Load the PDF
loader = TextLoader("./docs/faq.txt")
data = loader.load()
# Split PDF into documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
docs = text_splitter.split_documents(data)
print("\nYour question:")
print("-------------")
print(query)
llm = OpenAI()
compressor = LLMChainExtractor.from_llm(llm)
# Create the vector store for test.books
# vectorStore = MongoDBAtlasVectorSearch.from_documents(
# documents=docs,
# embedding=OpenAIEmbeddings(disallowed_special=()),
# collection=collection,
# index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME
# )
vectorStore = MongoDBAtlasVectorSearch(
collection,
OpenAIEmbeddings(),
index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME
)
docs = vectorStore.max_marginal_relevance_search(query, K=1)
print(docs[0])
compression_retriever = ContextualCompressionRetriever(
base_compressor=compressor,
base_retriever=vectorStore.as_retriever()
)
print("\nAI Response:")
print("-----------")
compressed_docs = compression_retriever.get_relevant_documents(query)