Commit b0250e71 by zaid

added data sources

parent bd8f91ae
...@@ -4,44 +4,44 @@ from dotenv import load_dotenv ...@@ -4,44 +4,44 @@ from dotenv import load_dotenv
from PIL import Image from PIL import Image
from PyPDF2 import PdfReader from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain from langchain.chains import ConversationalRetrievalChain
from htmlTemplates import css, bot_template, user_template from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
import os import os
# Load environment variables from .env file
load_dotenv()
def main(): def main():
st.set_page_config(page_title="Chat with multiple PDFs", st.set_page_config(page_title="Chat with multiple PDFs", page_icon=logo)
page_icon=logo)
st.write(css, unsafe_allow_html=True)
if "conversation" not in st.session_state: if "conversation" not in st.session_state:
st.session_state.conversation = None st.session_state.conversation = None
if "question"not in st.session_state: if "question" not in st.session_state:
st.session_state.question = [] st.session_state.question = []
if "answer" not in st.session_state: if "answer" not in st.session_state:
st.session_state.answer = [] st.session_state.answer = []
st.header("Chat with your Indexed PDFs")
st.header("Chat with multiple PDFs :books:") response_container, container = st.container(), st.container()
resonse_container, container = st.container(), st.container()
with container: with container:
user_question = st.text_input("Ask a question about your documents:") user_question = st.text_input("Ask a question about your documents:")
with resonse_container: with response_container:
if user_question: if user_question:
handle_userinput(user_question) handle_user_input(user_question)
st.sidebar.image(logo, width=50) st.sidebar.image(logo, width=50)
with st.sidebar: with st.sidebar:
OPENAI_API_KEY = st.text_input('Enter your OpenAI API key',type='password') OPENAI_API_KEY = st.text_input('Enter your OpenAI API key', type='password')
if OPENAI_API_KEY: if OPENAI_API_KEY:
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
st.subheader("Your documents") st.subheader("Your documents")
pdf_docs = st.file_uploader( pdf_docs = st.file_uploader(
"Upload your PDFs here and click on 'Process'", accept_multiple_files=True) "Upload your PDFs here and click on 'Process'", accept_multiple_files=True, type=['pdf'])
if st.button("Process"): if st.button("Process"):
with st.spinner("Processing"): with st.spinner("Processing"):
# get pdf text # get pdf text
...@@ -56,16 +56,17 @@ def main(): ...@@ -56,16 +56,17 @@ def main():
# create conversation chain # create conversation chain
st.session_state.conversation = get_conversation_chain( st.session_state.conversation = get_conversation_chain(
vectorstore) vectorstore)
hide_streamlit_style = """ hide_streamlit_style = """
<style> <style>
footer {visibility: hidden;} footer {visibility: hidden;}
</style> </style>
""" """
st.markdown(hide_streamlit_style, unsafe_allow_html=True) st.markdown(hide_streamlit_style, unsafe_allow_html=True)
logo = Image.open(r'assets/dsd_icon.png') logo = Image.open(r'assets/dsd_icon.png')
logo_path = './assets/dsd_icon.png' logo_path = './assets/dsd_icon.png'
def get_pdf_text(pdf_docs): def get_pdf_text(pdf_docs):
text = "" text = ""
for pdf in pdf_docs: for pdf in pdf_docs:
...@@ -74,7 +75,6 @@ def get_pdf_text(pdf_docs): ...@@ -74,7 +75,6 @@ def get_pdf_text(pdf_docs):
text += page.extract_text() text += page.extract_text()
return text return text
def get_text_chunks(text): def get_text_chunks(text):
text_splitter = CharacterTextSplitter( text_splitter = CharacterTextSplitter(
separator="\n", separator="\n",
...@@ -85,32 +85,47 @@ def get_text_chunks(text): ...@@ -85,32 +85,47 @@ def get_text_chunks(text):
chunks = text_splitter.split_text(text) chunks = text_splitter.split_text(text)
return chunks return chunks
def get_vectorstore(text_chunks): def get_vectorstore(text_chunks):
embeddings = OpenAIEmbeddings() embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings) vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
return vectorstore return vectorstore
# def similarity_search(self, query):
def get_conversation_chain(vectorstore): def get_conversation_chain(vectorstore):
llm = ChatOpenAI() llm = ChatOpenAI()
memory = ConversationBufferMemory( memory = ConversationBufferMemory(
memory_key='chat_history', return_messages=True) memory_key='chat_history', return_messages=True)
# Create a prompt template similar to Project2
prompt_template = """
You are a helpful assistant who provide information and answer questions based on the documents uploaded.
If the user have any questions related to the content within those documents,
please provide the answer to the best of your ability.
However, please note that you are only allowed to provide answers based on the information contained in the uploaded documents.
If user ask anything that is not covered within those documents, say 'I don't know,'
as I do not have access to information beyond what you've provided.
Do not ask any question to the user.
"""
messages = [
SystemMessagePromptTemplate.from_template(prompt_template)
]
prompt = ChatPromptTemplate.from_messages( messages )
conversation_chain = ConversationalRetrievalChain.from_llm( conversation_chain = ConversationalRetrievalChain.from_llm(
llm=llm, llm=llm,
retriever=vectorstore.as_retriever(), retriever=vectorstore.as_retriever(),
memory=memory memory=memory,
condense_question_prompt = prompt
) )
return conversation_chain return conversation_chain
def handle_user_input(user_question):
def handle_userinput(user_question):
response = st.session_state.conversation({'question': user_question}) response = st.session_state.conversation({'question': user_question})
st.session_state['question'].append(user_question) st.session_state['question'].append(user_question)
st.session_state['answer'].append(response['answer']) st.session_state['answer'].append(response['answer'])
for i in range(len(st.session_state['answer'])): for i in range(len(st.session_state['answer'])):
user_message_key = str(i) + '_user' user_message_key = str(i) + '_user'
answer_message_key = str(i) + '_answer' answer_message_key = str(i) + '_answer'
message(st.session_state['question'][i], is_user=True, key=user_message_key) message(st.session_state['question'][i], is_user=True, key=user_message_key)
message(st.session_state["answer"][i], key=answer_message_key) message(st.session_state["answer"][i], key=answer_message_key)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment