added data sources

b0250e71 · zaid · bd8f91ae · b0250e71 · b0250e71 · b0250e71
Commit b0250e71 authored Sep 21, 2023 by zaid
6 changed files
--- a/Chat with your PDF/app.py
+++ b/Chat with your PDF/app.py
@@ -4,44 +4,44 @@ from dotenv import load_dotenv
 from PIL import Image
 from PyPDF2 import PdfReader
 from langchain.text_splitter import CharacterTextSplitter
-from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
+from langchain.embeddings import OpenAIEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.chat_models import ChatOpenAI
 from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
-from htmlTemplates import css, bot_template, user_template
+from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
 import os

+# Load environment variables from .env file
+load_dotenv()
+
 def main():
-    st.set_page_config(page_title="Chat with multiple PDFs",
-                       page_icon=logo)
-    st.write(css, unsafe_allow_html=True)
+    st.set_page_config(page_title="Chat with multiple PDFs", page_icon=logo)

    if "conversation" not in st.session_state:
        st.session_state.conversation = None
-    if "question"not in st.session_state:
+    if "question" not in st.session_state:
        st.session_state.question = []
    if "answer" not in st.session_state:
        st.session_state.answer = []

-    
-    st.header("Chat with multiple PDFs :books:")
-    resonse_container, container = st.container(), st.container()
+    st.header("Chat with your Indexed PDFs")
+    response_container, container = st.container(), st.container()

    with container:
        user_question = st.text_input("Ask a question about your documents:")
-    with resonse_container:
+    with response_container:
        if user_question:
-            handle_userinput(user_question)
+            handle_user_input(user_question)

    st.sidebar.image(logo, width=50)
    with st.sidebar:
-        OPENAI_API_KEY = st.text_input('Enter your OpenAI API key',type='password')
+        OPENAI_API_KEY = st.text_input('Enter your OpenAI API key', type='password')
        if OPENAI_API_KEY:
            os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
            st.subheader("Your documents")
            pdf_docs = st.file_uploader(
-                "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
+                "Upload your PDFs here and click on 'Process'", accept_multiple_files=True, type=['pdf'])
            if st.button("Process"):
                with st.spinner("Processing"):
                    # get pdf text
@@ -56,16 +56,17 @@ def main():
                    # create conversation chain
                    st.session_state.conversation = get_conversation_chain(
                        vectorstore)
-                    

    hide_streamlit_style = """
-            <style>
-            footer {visibility: hidden;}
-            </style>
-            """
-    st.markdown(hide_streamlit_style, unsafe_allow_html=True) 
+        <style>
+        footer {visibility: hidden;}
+        </style>
+    """
+    st.markdown(hide_streamlit_style, unsafe_allow_html=True)
+
 logo = Image.open(r'assets/dsd_icon.png')
 logo_path = './assets/dsd_icon.png'
+
 def get_pdf_text(pdf_docs):
    text = ""
    for pdf in pdf_docs:
@@ -74,7 +75,6 @@ def get_pdf_text(pdf_docs):
            text += page.extract_text()
    return text

-
 def get_text_chunks(text):
    text_splitter = CharacterTextSplitter(
        separator="\n",
@@ -85,32 +85,47 @@ def get_text_chunks(text):
    chunks = text_splitter.split_text(text)
    return chunks

-
 def get_vectorstore(text_chunks):
    embeddings = OpenAIEmbeddings()
    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
    return vectorstore
+# def similarity_search(self, query):

 def get_conversation_chain(vectorstore):
    llm = ChatOpenAI()
    memory = ConversationBufferMemory(
        memory_key='chat_history', return_messages=True)
+    
+    # Create a prompt template similar to Project2
+    prompt_template = """
+    You are a helpful assistant who provide information and answer questions based on the documents uploaded. 
+    If the user have any questions related to the content within those documents, 
+    please provide the answer to the best of your ability.
+    However, please note that you are only allowed to provide answers based on the information contained in the uploaded documents. 
+    If user ask anything that is not covered within those documents, say 'I don't know,' 
+    as I do not have access to information beyond what you've provided.
+    Do not ask any question to the user.
+    """
+    messages = [
+                SystemMessagePromptTemplate.from_template(prompt_template)
+    ]
+    prompt = ChatPromptTemplate.from_messages( messages )
    conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vectorstore.as_retriever(),
-        memory=memory
+        memory=memory,
+        condense_question_prompt = prompt
    )
    return conversation_chain

-
-def handle_userinput(user_question):
+def handle_user_input(user_question):
    response = st.session_state.conversation({'question': user_question})
    st.session_state['question'].append(user_question)
    st.session_state['answer'].append(response['answer'])
    for i in range(len(st.session_state['answer'])):
        user_message_key = str(i) + '_user'
        answer_message_key = str(i) + '_answer'
-        
+
        message(st.session_state['question'][i], is_user=True, key=user_message_key)
        message(st.session_state["answer"][i], key=answer_message_key)


--- a/data/10050-Medicare-and-You.pdf
+++ b/data/10050-Medicare-and-You.pdf
--- a/data/17 (2022), Your Federal Income Tax.pdf
+++ b/data/17 (2022), Your Federal Income Tax.pdf
--- a/data/334 (2022), Tax Guide for Small Business.pdf
+++ b/data/334 (2022), Tax Guide for Small Business.pdf
--- a/data/554 (2022), Tax Guide for Seniors.pdf
+++ b/data/554 (2022), Tax Guide for Seniors.pdf
--- a/data/nutrition_health (1).pdf
+++ b/data/nutrition_health (1).pdf