updated datasets

131cd7b7 · zaid · b0250e71 · b0250e71 · 131cd7b7 · 131cd7b7
Commit 131cd7b7 authored Sep 21, 2023 by zaid
16 changed files
--- a/Chat with your PDF/install.ps1
+++ b/Chat with your PDF/install.ps1
-#run using ~"./install.ps1" in project folder
-# List of Python dependencies
-$dependencies = @(
-    "langchain==0.0.184",
-    "PyPDF2==3.0.1",
-    "python-dotenv==1.0.0",
-    "streamlit==1.18.1",
-    "openai==0.27.6",
-    "faiss-cpu==1.7.4",
-    "altair==4",
-    "tiktoken==0.4.0",
-    "Pillow==9.5.0",
-    "streamlit-chat==0.1.1"
-)
-# Loop through each dependency and install it
-foreach ($dependency in $dependencies) {
-    Write-Host "Installing $dependency"
-    pip install $dependency
-}
-Write-Host "All Python dependencies installed successfully."
--- a/Chat with your PDF/.gitignore
+++ b/Chat with your PDF/.gitignore
--- a/Chat with your PDF/.python-version
+++ b/Chat with your PDF/.python-version
--- a/Chat with your PDF/app.py
+++ b/Chat with your PDF/app.py
 import streamlit as st
-from streamlit_chat import message
-from dotenv import load_dotenv
 from PIL import Image
 from PyPDF2 import PdfReader
 from langchain.text_splitter import CharacterTextSplitter
@@ -9,64 +7,56 @@ from langchain.vectorstores import FAISS
 from langchain.chat_models import ChatOpenAI
 from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
-from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
+from htmlTemplates import css, bot_template, user_template
 import os
-# Load environment variables from .env file
-load_dotenv()
 def main():
-    st.set_page_config(page_title="Chat with multiple PDFs", page_icon=logo)
+    st.set_page_config(page_title="Chat with multiple PDFs",
+                       page_icon=logo)
+    st.write(css, unsafe_allow_html=True)
    if "conversation" not in st.session_state:
        st.session_state.conversation = None
-    if "question" not in st.session_state:
+    if "chat_history" not in st.session_state:
-        st.session_state.question = []
+        st.session_state.chat_history = None
-    if "answer" not in st.session_state:
-        st.session_state.answer = []
-    st.header("Chat with your Indexed PDFs")
-    response_container, container = st.container(), st.container()
-    with container:
-        user_question = st.text_input("Ask a question about your documents:")
+    st.header("Chat with multiple PDFs :books:")
-    with response_container:
+    user_question = st.text_input("Ask a question about your documents:")
-        if user_question:
+    if user_question:
-            handle_user_input(user_question)
+        handle_userinput(user_question)
    st.sidebar.image(logo, width=50)
    with st.sidebar:
-        OPENAI_API_KEY = st.text_input('Enter your OpenAI API key', type='password')
+        OPENAI_API_KEY=st.text_input('Enter your OpenAI API key',type='password')
-        if OPENAI_API_KEY:
+        os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
-            os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
+        st.subheader("Your documents")
-            st.subheader("Your documents")
+        pdf_docs = st.file_uploader(
-            pdf_docs = st.file_uploader(
+            "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
-                "Upload your PDFs here and click on 'Process'", accept_multiple_files=True, type=['pdf'])
+        if st.button("Process"):
-            if st.button("Process"):
+            with st.spinner("Processing"):
-                with st.spinner("Processing"):
+                # get pdf text
-                    # get pdf text
+                raw_text = get_pdf_text(pdf_docs)
-                    raw_text = get_pdf_text(pdf_docs)
+                # get the text chunks
-                    # get the text chunks
+                text_chunks = get_text_chunks(raw_text)
-                    text_chunks = get_text_chunks(raw_text)
+                # create vector store
-                    # create vector store
+                embeddings = create_embeddings(text_chunks)
-                    vectorstore = get_vectorstore(text_chunks)
+                # create conversation chain
-                    # create conversation chain
+                st.session_state.conversation = execute_conversation_chain(
-                    st.session_state.conversation = get_conversation_chain(
+                    embeddings)
-                        vectorstore)
    hide_streamlit_style = """
-        <style>
+            <style>
-        footer {visibility: hidden;}
+            footer {visibility: hidden;}
-        </style>
+            </style>
-    """
+            """
-    st.markdown(hide_streamlit_style, unsafe_allow_html=True)
+    st.markdown(hide_streamlit_style, unsafe_allow_html=True) 
 logo = Image.open(r'assets/dsd_icon.png')
 logo_path = './assets/dsd_icon.png'
 def get_pdf_text(pdf_docs):
    text = ""
    for pdf in pdf_docs:
@@ -75,6 +65,7 @@ def get_pdf_text(pdf_docs):
            text += page.extract_text()
    return text
 def get_text_chunks(text):
    text_splitter = CharacterTextSplitter(
        separator="\n",
@@ -85,49 +76,37 @@ def get_text_chunks(text):
    chunks = text_splitter.split_text(text)
    return chunks
-def get_vectorstore(text_chunks):
+def create_embeddings(text_chunks):
    embeddings = OpenAIEmbeddings()
    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
    return vectorstore
-# def similarity_search(self, query):
-def get_conversation_chain(vectorstore):
+def execute_conversation_chain(vectorstore):
    llm = ChatOpenAI()
    memory = ConversationBufferMemory(
        memory_key='chat_history', return_messages=True)
-    # Create a prompt template similar to Project2
-    prompt_template = """
-    You are a helpful assistant who provide information and answer questions based on the documents uploaded. 
-    If the user have any questions related to the content within those documents, 
-    please provide the answer to the best of your ability.
-    However, please note that you are only allowed to provide answers based on the information contained in the uploaded documents. 
-    If user ask anything that is not covered within those documents, say 'I don't know,' 
-    as I do not have access to information beyond what you've provided.
-    Do not ask any question to the user.
-    """
-    messages = [
-                SystemMessagePromptTemplate.from_template(prompt_template)
-    ]
-    prompt = ChatPromptTemplate.from_messages( messages )
    conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vectorstore.as_retriever(),
-        memory=memory,
+        memory=memory
-        condense_question_prompt = prompt
    )
    return conversation_chain
-def handle_user_input(user_question):
+def handle_userinput(user_question):
    response = st.session_state.conversation({'question': user_question})
-    st.session_state['question'].append(user_question)
+    st.session_state.chat_history = response['chat_history']
-    st.session_state['answer'].append(response['answer'])
-    for i in range(len(st.session_state['answer'])):
+    for i, message in enumerate(st.session_state.chat_history):
-        user_message_key = str(i) + '_user'
+        if i % 2 == 0:
-        answer_message_key = str(i) + '_answer'
+            st.write(user_template.replace(
+                "{{MSG}}", message.content), unsafe_allow_html=True)
-        message(st.session_state['question'][i], is_user=True, key=user_message_key)
+        else:
-        message(st.session_state["answer"][i], key=answer_message_key)
+            st.write(bot_template.replace(
+                "{{MSG}}", message.content), unsafe_allow_html=True)
 if __name__ == '__main__':
    main()
--- a/Chat with your PDF/assets/dsd_icon.png
+++ b/Chat with your PDF/assets/dsd_icon.png
--- a/Chat with your PDF/docs/PDF-LangChain.jpg
+++ b/Chat with your PDF/docs/PDF-LangChain.jpg
--- a/Chat with your PDF/htmlTemplates.py
+++ b/Chat with your PDF/htmlTemplates.py
--- a/Chat with your PDF/readme.md
+++ b/Chat with your PDF/readme.md
--- a/Chat with your PDF/requirements.txt
+++ b/Chat with your PDF/requirements.txt
@@ -2,7 +2,6 @@ langchain==0.0.184
 PyPDF2==3.0.1
 python-dotenv==1.0.0
 streamlit==1.18.1
-streamlit-chat==0.1.1
 openai==0.27.6
 faiss-cpu==1.7.4
 altair==4

--- a/data/10050-Medicare-and-You.pdf
+++ b/data/10050-Medicare-and-You.pdf
--- a/data/17 (2022), Your Federal Income Tax.pdf
+++ b/data/17 (2022), Your Federal Income Tax.pdf
--- a/data/2023_GPT4All_Technical_Report.pdf
+++ b/data/2023_GPT4All_Technical_Report.pdf
--- a/data/334 (2022), Tax Guide for Small Business.pdf
+++ b/data/334 (2022), Tax Guide for Small Business.pdf
--- a/data/554 (2022), Tax Guide for Seniors.pdf
+++ b/data/554 (2022), Tax Guide for Seniors.pdf
--- a/data/NIPS-2017-attention-is-all-you-need-Paper.pdf
+++ b/data/NIPS-2017-attention-is-all-you-need-Paper.pdf
--- a/data/nutrition_health (1).pdf
+++ b/data/nutrition_health (1).pdf