Answers 2.7
Theory
Practice
1.
def embed_document(document_text):
"""
Placeholder function for document embedding.
In a real scenario, this function would convert document text into a numerical vector.
"""
return [hash(document_text) % 100] # Simulated embedding for demonstration purposes
def create_vector_store(documents):
"""
Converts a list of documents into embeddings and stores them in a simple in-memory structure.
Args:
documents (list of str): List of document texts to be converted into embeddings.
Returns:
list: A list of embeddings representing the input documents.
"""
vector_store = [embed_document(doc) for doc in documents]
return vector_store
# Example usage
documents = [
"Document 1 text content here.",
"Document 2 text content, possibly different.",
"Another document, the third one."
]
vector_store = create_vector_store(documents)
print("Vector Store:", vector_store)
2.
def calculate_similarity(query_embedding, document_embedding):
"""
Placeholder function for calculating similarity between two embeddings.
In practice, this could use cosine similarity, Euclidean distance, etc.
Args:
query_embedding (list): The embedding of the query.
document_embedding (list): The embedding of a document.
Returns:
float: A simulated similarity score between the query and the document.
"""
# Simplified similarity calculation for demonstration
return -abs(query_embedding[0] - document_embedding[0])
def perform_semantic_search(query, vector_store):
"""
Performs a semantic search to find the document most similar to the query in the vector store.
Args:
query (str): The search query.
vector_store (list): The in-memory structure storing document embeddings.
Returns:
int: The index of the most similar document in the vector store.
"""
query_embedding = embed_document(query)
similarity_scores = [calculate_similarity(query_embedding, doc_embedding) for doc_embedding in vector_store]
# Finding the index of the highest similarity score
most_similar_index = similarity_scores.index(max(similarity_scores))
return most_similar_index
# Example usage
query = "Document content that resembles document 1 more than others."
most_similar_doc_index = perform_semantic_search(query, vector_store)
print("The most similar document index:", most_similar_doc_index)
3.
class Chatbot:
def __init__(self):
# Initialize an empty list to store chat history, with each element being a tuple (query, response)
self.history = []
def generate_response(self, query, context):
"""
Placeholder function to simulate response generation based on the current query and context.
Args:
query (str): The user's current query.
context (list of tuples): The chat history, where each tuple contains a (query, response) pair.
Returns:
str: A simulated response.
"""
# For simplicity, the response is just the query reversed with a note about the number of past interactions
return f"Response to '{query}' (with {len(context)} past interactions)."
def respond_to_query(self, query):
"""
Takes a user's query as input, generates a response considering the chat history, and updates the history.
Args:
query (str): The user's query.
Returns:
str: The generated response.
"""
# Use the current state of the history as context for generating a response
response = self.generate_response(query, self.history)
# Update the chat history with the current query and response
self.history.append((query, response))
return response
# Example usage
chatbot = Chatbot()
print(chatbot.respond_to_query("Hello, how are you?"))
print(chatbot.respond_to_query("What is the weather like today?"))
print(chatbot.respond_to_query("Thank you!"))
4
class LanguageModel:
def predict(self, input_text):
# Placeholder prediction method
return f"Mock response for: {input_text}"
class DocumentRetriever:
def retrieve(self, query):
# Placeholder document retrieval method
return f"Mock document related to: {query}"
class ConversationMemory:
def __init__(self):
self.memory = []
def add_to_memory(self, query, response):
self.memory.append((query, response))
def reset_memory(self):
self.memory = []
def get_memory(self):
return self.memory
def setup_conversational_retrieval_chain():
# Initialize the components of the retrieval chain
language_model = LanguageModel()
document_retriever = DocumentRetriever()
conversation_memory = ConversationMemory()
# For demonstration purposes, this function will just return a dictionary
# representing the initialized components. In a real implementation,
# these components would be integrated into a more complex retrieval system.
retrieval_chain = {
"language_model": language_model,
"document_retriever": document_retriever,
"conversation_memory": conversation_memory
}
return retrieval_chain
# Example usage
retrieval_chain = setup_conversational_retrieval_chain()
print(retrieval_chain)
5.
class EnhancedChatbot(Chatbot):
def __init__(self):
super().__init__()
# Use ConversationMemory from the previous task for managing chat history
self.conversation_memory = ConversationMemory()
def add_to_history(self, query, response):
"""
Adds a new entry to the conversation history.
Args:
query (str): The user's query.
response (str): The chatbot's response.
"""
self.conversation_memory.add_to_memory(query, response)
def reset_history(self):
"""
Resets the conversation history, clearing all past interactions.
"""
self.conversation_memory.reset_memory()
def respond_to_query(self, query):
"""
Override the method to incorporate conversation memory management.
"""
# Generate a response considering the updated history
response = super().generate_response(query, self.conversation_memory.get_memory())
# Update the conversation history with the new interaction
self.add_to_history(query, response)
return response
# Example usage
enhanced_chatbot = EnhancedChatbot()
print(enhanced_chatbot.respond_to_query("Hello, how are you?"))
enhanced_chatbot.reset_history()
print(enhanced_chatbot.respond_to_query("Starting a new conversation."))
6.
def embed_document(document_text):
# Placeholder function to simulate document text embedding
return sum(ord(char) for char in document_text) % 100 # Simple hash for demonstration
def split_document_into_chunks(document, chunk_size=100):
# Splits document text into manageable chunks
return [document[i:i+chunk_size] for i in range(0, len(document), chunk_size)]
def perform_semantic_search(query_embedding, vector_store):
# Finds the most relevant document chunk based on embedding similarity (placeholder logic)
similarities = [abs(query_embedding - chunk_embedding) for chunk_embedding in vector_store]
return similarities.index(min(similarities))
def generate_answer_from_chunk(chunk):
# Placeholder function to simulate answer generation from a selected document chunk
return f"Based on your question, a relevant piece of information is: \"{chunk[:50]}...\""
# Main Q&A system logic
document = "This is a long document. " * 100 # Simulated document
chunks = split_document_into_chunks(document, 100)
vector_store = [embed_document(chunk) for chunk in chunks]
# Simulate a user question and its embedding
user_question = "What is this document about?"
question_embedding = embed_document(user_question)
# Find the most relevant chunk
relevant_chunk_index = perform_semantic_search(question_embedding, vector_store)
relevant_chunk = chunks[relevant_chunk_index]
# Generate an answer based on the most relevant chunk
answer = generate_answer_from_chunk(relevant_chunk)
print(answer)
7.
def integrate_memory_with_retrieval_chain(retrieval_chain, user_query):
"""
Integrates the conversational retrieval chain with a memory system to maintain context.
Args:
retrieval_chain (dict): A mock retrieval chain containing language model,
document retriever, and conversation memory.
user_query (str): User's query to process.
"""
# Retrieve components from the retrieval chain
conversation_memory = retrieval_chain["conversation_memory"]
language_model = retrieval_chain["language_model"]
document_retriever = retrieval_chain["document_retriever"]
# Simulate using the document retriever to find relevant information
relevant_info = document_retriever.retrieve(user_query)
# Retrieve conversation history as context
context = conversation_memory.get_memory()
# Simulate generating a response with the language model using the query and context
response = language_model.predict(f"Query: {user_query}, Context: {context}, Relevant Info: {relevant_info}")
# Update conversation memory with the new interaction
conversation_memory.add_to_memory(user_query, response)
return response
# Using the retrieval chain from Task 4 with a dummy query for demonstration
dummy_query = "Tell me more about this document."
response = integrate_memory_with_retrieval_chain(retrieval_chain, dummy_query)
print("Generated Response:", response)
8.
def chatbot_cli():
enhanced_chatbot = EnhancedChatbot() # Assuming EnhancedChatbot is the extended version from previous tasks
while True:
print("\nOptions: ask [question], view history, reset history, exit")
user_input = input("What would you like to do? ").strip().lower()
if user_input.startswith("ask "):
question = user_input[4:]
response = enhanced_chatbot.respond_to_query(question)
print("Chatbot:", response)
elif user_input == "view history":
for i, (q, a) in enumerate(enhanced_chatbot.conversation_memory.get_memory(), 1):
print(f"{i}. Q: {q} A: {a}")
elif user_input == "reset history":
enhanced_chatbot.reset_history()
print("Conversation history has been reset.")
elif user_input == "exit":
break
else:
print("Invalid option. Please try again.")
# To run the CLI, simply call the function (commented out to prevent execution in non-interactive environments)
# chatbot_cli()