Commit 131cd7b7 by zaid

updated datasets

parent b0250e71
#run using ~"./install.ps1" in project folder
# List of Python dependencies
$dependencies = @(
"langchain==0.0.184",
"PyPDF2==3.0.1",
"python-dotenv==1.0.0",
"streamlit==1.18.1",
"openai==0.27.6",
"faiss-cpu==1.7.4",
"altair==4",
"tiktoken==0.4.0",
"Pillow==9.5.0",
"streamlit-chat==0.1.1"
)
# Loop through each dependency and install it
foreach ($dependency in $dependencies) {
Write-Host "Installing $dependency"
pip install $dependency
}
Write-Host "All Python dependencies installed successfully."
import streamlit as st
from streamlit_chat import message
from dotenv import load_dotenv
from PIL import Image
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
......@@ -9,39 +7,32 @@ from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
from htmlTemplates import css, bot_template, user_template
import os
# Load environment variables from .env file
load_dotenv()
def main():
st.set_page_config(page_title="Chat with multiple PDFs", page_icon=logo)
st.set_page_config(page_title="Chat with multiple PDFs",
page_icon=logo)
st.write(css, unsafe_allow_html=True)
if "conversation" not in st.session_state:
st.session_state.conversation = None
if "question" not in st.session_state:
st.session_state.question = []
if "answer" not in st.session_state:
st.session_state.answer = []
if "chat_history" not in st.session_state:
st.session_state.chat_history = None
st.header("Chat with your Indexed PDFs")
response_container, container = st.container(), st.container()
with container:
st.header("Chat with multiple PDFs :books:")
user_question = st.text_input("Ask a question about your documents:")
with response_container:
if user_question:
handle_user_input(user_question)
handle_userinput(user_question)
st.sidebar.image(logo, width=50)
with st.sidebar:
OPENAI_API_KEY = st.text_input('Enter your OpenAI API key', type='password')
if OPENAI_API_KEY:
OPENAI_API_KEY=st.text_input('Enter your OpenAI API key',type='password')
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
st.subheader("Your documents")
pdf_docs = st.file_uploader(
"Upload your PDFs here and click on 'Process'", accept_multiple_files=True, type=['pdf'])
"Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
if st.button("Process"):
with st.spinner("Processing"):
# get pdf text
......@@ -51,11 +42,12 @@ def main():
text_chunks = get_text_chunks(raw_text)
# create vector store
vectorstore = get_vectorstore(text_chunks)
embeddings = create_embeddings(text_chunks)
# create conversation chain
st.session_state.conversation = get_conversation_chain(
vectorstore)
st.session_state.conversation = execute_conversation_chain(
embeddings)
hide_streamlit_style = """
<style>
......@@ -63,10 +55,8 @@ def main():
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
logo = Image.open(r'assets/dsd_icon.png')
logo_path = './assets/dsd_icon.png'
def get_pdf_text(pdf_docs):
text = ""
for pdf in pdf_docs:
......@@ -75,6 +65,7 @@ def get_pdf_text(pdf_docs):
text += page.extract_text()
return text
def get_text_chunks(text):
text_splitter = CharacterTextSplitter(
separator="\n",
......@@ -85,49 +76,37 @@ def get_text_chunks(text):
chunks = text_splitter.split_text(text)
return chunks
def get_vectorstore(text_chunks):
def create_embeddings(text_chunks):
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
return vectorstore
# def similarity_search(self, query):
def get_conversation_chain(vectorstore):
def execute_conversation_chain(vectorstore):
llm = ChatOpenAI()
memory = ConversationBufferMemory(
memory_key='chat_history', return_messages=True)
# Create a prompt template similar to Project2
prompt_template = """
You are a helpful assistant who provide information and answer questions based on the documents uploaded.
If the user have any questions related to the content within those documents,
please provide the answer to the best of your ability.
However, please note that you are only allowed to provide answers based on the information contained in the uploaded documents.
If user ask anything that is not covered within those documents, say 'I don't know,'
as I do not have access to information beyond what you've provided.
Do not ask any question to the user.
"""
messages = [
SystemMessagePromptTemplate.from_template(prompt_template)
]
prompt = ChatPromptTemplate.from_messages( messages )
conversation_chain = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=vectorstore.as_retriever(),
memory=memory,
condense_question_prompt = prompt
memory=memory
)
return conversation_chain
def handle_user_input(user_question):
def handle_userinput(user_question):
response = st.session_state.conversation({'question': user_question})
st.session_state['question'].append(user_question)
st.session_state['answer'].append(response['answer'])
for i in range(len(st.session_state['answer'])):
user_message_key = str(i) + '_user'
answer_message_key = str(i) + '_answer'
message(st.session_state['question'][i], is_user=True, key=user_message_key)
message(st.session_state["answer"][i], key=answer_message_key)
st.session_state.chat_history = response['chat_history']
for i, message in enumerate(st.session_state.chat_history):
if i % 2 == 0:
st.write(user_template.replace(
"{{MSG}}", message.content), unsafe_allow_html=True)
else:
st.write(bot_template.replace(
"{{MSG}}", message.content), unsafe_allow_html=True)
if __name__ == '__main__':
main()
......@@ -2,7 +2,6 @@ langchain==0.0.184
PyPDF2==3.0.1
python-dotenv==1.0.0
streamlit==1.18.1
streamlit-chat==0.1.1
openai==0.27.6
faiss-cpu==1.7.4
altair==4
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment