Commit b0250e71 by zaid

added data sources

parent bd8f91ae
......@@ -4,44 +4,44 @@ from dotenv import load_dotenv
from PIL import Image
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from htmlTemplates import css, bot_template, user_template
from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
import os
# Load environment variables from .env file
load_dotenv()
def main():
st.set_page_config(page_title="Chat with multiple PDFs",
page_icon=logo)
st.write(css, unsafe_allow_html=True)
st.set_page_config(page_title="Chat with multiple PDFs", page_icon=logo)
if "conversation" not in st.session_state:
st.session_state.conversation = None
if "question"not in st.session_state:
if "question" not in st.session_state:
st.session_state.question = []
if "answer" not in st.session_state:
st.session_state.answer = []
st.header("Chat with multiple PDFs :books:")
resonse_container, container = st.container(), st.container()
st.header("Chat with your Indexed PDFs")
response_container, container = st.container(), st.container()
with container:
user_question = st.text_input("Ask a question about your documents:")
with resonse_container:
with response_container:
if user_question:
handle_userinput(user_question)
handle_user_input(user_question)
st.sidebar.image(logo, width=50)
with st.sidebar:
OPENAI_API_KEY = st.text_input('Enter your OpenAI API key',type='password')
OPENAI_API_KEY = st.text_input('Enter your OpenAI API key', type='password')
if OPENAI_API_KEY:
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
st.subheader("Your documents")
pdf_docs = st.file_uploader(
"Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
"Upload your PDFs here and click on 'Process'", accept_multiple_files=True, type=['pdf'])
if st.button("Process"):
with st.spinner("Processing"):
# get pdf text
......@@ -56,16 +56,17 @@ def main():
# create conversation chain
st.session_state.conversation = get_conversation_chain(
vectorstore)
hide_streamlit_style = """
<style>
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
<style>
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
logo = Image.open(r'assets/dsd_icon.png')
logo_path = './assets/dsd_icon.png'
def get_pdf_text(pdf_docs):
text = ""
for pdf in pdf_docs:
......@@ -74,7 +75,6 @@ def get_pdf_text(pdf_docs):
text += page.extract_text()
return text
def get_text_chunks(text):
text_splitter = CharacterTextSplitter(
separator="\n",
......@@ -85,32 +85,47 @@ def get_text_chunks(text):
chunks = text_splitter.split_text(text)
return chunks
def get_vectorstore(text_chunks):
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
return vectorstore
# def similarity_search(self, query):
def get_conversation_chain(vectorstore):
llm = ChatOpenAI()
memory = ConversationBufferMemory(
memory_key='chat_history', return_messages=True)
# Create a prompt template similar to Project2
prompt_template = """
You are a helpful assistant who provide information and answer questions based on the documents uploaded.
If the user have any questions related to the content within those documents,
please provide the answer to the best of your ability.
However, please note that you are only allowed to provide answers based on the information contained in the uploaded documents.
If user ask anything that is not covered within those documents, say 'I don't know,'
as I do not have access to information beyond what you've provided.
Do not ask any question to the user.
"""
messages = [
SystemMessagePromptTemplate.from_template(prompt_template)
]
prompt = ChatPromptTemplate.from_messages( messages )
conversation_chain = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=vectorstore.as_retriever(),
memory=memory
memory=memory,
condense_question_prompt = prompt
)
return conversation_chain
def handle_userinput(user_question):
def handle_user_input(user_question):
response = st.session_state.conversation({'question': user_question})
st.session_state['question'].append(user_question)
st.session_state['answer'].append(response['answer'])
for i in range(len(st.session_state['answer'])):
user_message_key = str(i) + '_user'
answer_message_key = str(i) + '_answer'
message(st.session_state['question'][i], is_user=True, key=user_message_key)
message(st.session_state["answer"][i], key=answer_message_key)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment