Commit 131cd7b7 by zaid

updated datasets

parent b0250e71
#run using ~"./install.ps1" in project folder
# List of Python dependencies
$dependencies = @(
"langchain==0.0.184",
"PyPDF2==3.0.1",
"python-dotenv==1.0.0",
"streamlit==1.18.1",
"openai==0.27.6",
"faiss-cpu==1.7.4",
"altair==4",
"tiktoken==0.4.0",
"Pillow==9.5.0",
"streamlit-chat==0.1.1"
)
# Loop through each dependency and install it
foreach ($dependency in $dependencies) {
Write-Host "Installing $dependency"
pip install $dependency
}
Write-Host "All Python dependencies installed successfully."
import streamlit as st import streamlit as st
from streamlit_chat import message
from dotenv import load_dotenv
from PIL import Image from PIL import Image
from PyPDF2 import PdfReader from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter from langchain.text_splitter import CharacterTextSplitter
...@@ -9,64 +7,56 @@ from langchain.vectorstores import FAISS ...@@ -9,64 +7,56 @@ from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate from htmlTemplates import css, bot_template, user_template
import os import os
# Load environment variables from .env file
load_dotenv()
def main(): def main():
st.set_page_config(page_title="Chat with multiple PDFs", page_icon=logo) st.set_page_config(page_title="Chat with multiple PDFs",
page_icon=logo)
st.write(css, unsafe_allow_html=True)
if "conversation" not in st.session_state: if "conversation" not in st.session_state:
st.session_state.conversation = None st.session_state.conversation = None
if "question" not in st.session_state: if "chat_history" not in st.session_state:
st.session_state.question = [] st.session_state.chat_history = None
if "answer" not in st.session_state:
st.session_state.answer = []
st.header("Chat with your Indexed PDFs")
response_container, container = st.container(), st.container()
with container:
user_question = st.text_input("Ask a question about your documents:") st.header("Chat with multiple PDFs :books:")
with response_container: user_question = st.text_input("Ask a question about your documents:")
if user_question: if user_question:
handle_user_input(user_question) handle_userinput(user_question)
st.sidebar.image(logo, width=50) st.sidebar.image(logo, width=50)
with st.sidebar: with st.sidebar:
OPENAI_API_KEY = st.text_input('Enter your OpenAI API key', type='password') OPENAI_API_KEY=st.text_input('Enter your OpenAI API key',type='password')
if OPENAI_API_KEY: os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY st.subheader("Your documents")
st.subheader("Your documents") pdf_docs = st.file_uploader(
pdf_docs = st.file_uploader( "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
"Upload your PDFs here and click on 'Process'", accept_multiple_files=True, type=['pdf']) if st.button("Process"):
if st.button("Process"): with st.spinner("Processing"):
with st.spinner("Processing"): # get pdf text
# get pdf text raw_text = get_pdf_text(pdf_docs)
raw_text = get_pdf_text(pdf_docs)
# get the text chunks
# get the text chunks text_chunks = get_text_chunks(raw_text)
text_chunks = get_text_chunks(raw_text)
# create vector store
# create vector store embeddings = create_embeddings(text_chunks)
vectorstore = get_vectorstore(text_chunks)
# create conversation chain
# create conversation chain st.session_state.conversation = execute_conversation_chain(
st.session_state.conversation = get_conversation_chain( embeddings)
vectorstore)
hide_streamlit_style = """ hide_streamlit_style = """
<style> <style>
footer {visibility: hidden;} footer {visibility: hidden;}
</style> </style>
""" """
st.markdown(hide_streamlit_style, unsafe_allow_html=True) st.markdown(hide_streamlit_style, unsafe_allow_html=True)
logo = Image.open(r'assets/dsd_icon.png') logo = Image.open(r'assets/dsd_icon.png')
logo_path = './assets/dsd_icon.png' logo_path = './assets/dsd_icon.png'
def get_pdf_text(pdf_docs): def get_pdf_text(pdf_docs):
text = "" text = ""
for pdf in pdf_docs: for pdf in pdf_docs:
...@@ -75,6 +65,7 @@ def get_pdf_text(pdf_docs): ...@@ -75,6 +65,7 @@ def get_pdf_text(pdf_docs):
text += page.extract_text() text += page.extract_text()
return text return text
def get_text_chunks(text): def get_text_chunks(text):
text_splitter = CharacterTextSplitter( text_splitter = CharacterTextSplitter(
separator="\n", separator="\n",
...@@ -85,49 +76,37 @@ def get_text_chunks(text): ...@@ -85,49 +76,37 @@ def get_text_chunks(text):
chunks = text_splitter.split_text(text) chunks = text_splitter.split_text(text)
return chunks return chunks
def get_vectorstore(text_chunks):
def create_embeddings(text_chunks):
embeddings = OpenAIEmbeddings() embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings) vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
return vectorstore return vectorstore
# def similarity_search(self, query):
def get_conversation_chain(vectorstore):
def execute_conversation_chain(vectorstore):
llm = ChatOpenAI() llm = ChatOpenAI()
memory = ConversationBufferMemory( memory = ConversationBufferMemory(
memory_key='chat_history', return_messages=True) memory_key='chat_history', return_messages=True)
# Create a prompt template similar to Project2
prompt_template = """
You are a helpful assistant who provide information and answer questions based on the documents uploaded.
If the user have any questions related to the content within those documents,
please provide the answer to the best of your ability.
However, please note that you are only allowed to provide answers based on the information contained in the uploaded documents.
If user ask anything that is not covered within those documents, say 'I don't know,'
as I do not have access to information beyond what you've provided.
Do not ask any question to the user.
"""
messages = [
SystemMessagePromptTemplate.from_template(prompt_template)
]
prompt = ChatPromptTemplate.from_messages( messages )
conversation_chain = ConversationalRetrievalChain.from_llm( conversation_chain = ConversationalRetrievalChain.from_llm(
llm=llm, llm=llm,
retriever=vectorstore.as_retriever(), retriever=vectorstore.as_retriever(),
memory=memory, memory=memory
condense_question_prompt = prompt
) )
return conversation_chain return conversation_chain
def handle_user_input(user_question):
def handle_userinput(user_question):
response = st.session_state.conversation({'question': user_question}) response = st.session_state.conversation({'question': user_question})
st.session_state['question'].append(user_question) st.session_state.chat_history = response['chat_history']
st.session_state['answer'].append(response['answer'])
for i in range(len(st.session_state['answer'])): for i, message in enumerate(st.session_state.chat_history):
user_message_key = str(i) + '_user' if i % 2 == 0:
answer_message_key = str(i) + '_answer' st.write(user_template.replace(
"{{MSG}}", message.content), unsafe_allow_html=True)
message(st.session_state['question'][i], is_user=True, key=user_message_key) else:
message(st.session_state["answer"][i], key=answer_message_key) st.write(bot_template.replace(
"{{MSG}}", message.content), unsafe_allow_html=True)
if __name__ == '__main__': if __name__ == '__main__':
main() main()
...@@ -2,7 +2,6 @@ langchain==0.0.184 ...@@ -2,7 +2,6 @@ langchain==0.0.184
PyPDF2==3.0.1 PyPDF2==3.0.1
python-dotenv==1.0.0 python-dotenv==1.0.0
streamlit==1.18.1 streamlit==1.18.1
streamlit-chat==0.1.1
openai==0.27.6 openai==0.27.6
faiss-cpu==1.7.4 faiss-cpu==1.7.4
altair==4 altair==4
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment