Commit 2492a715 by zaid

updated resources

parent 131cd7b7
.env
\ No newline at end of file
# Social-Content-Generator
\ No newline at end of file
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from langchain import PromptTemplate
import streamlit as st
from PyPDF2 import PdfReader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores.redis import Redis
from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from docx import Document
from docx.shared import Inches
import io
from PIL import Image
import requests
from dotenv import load_dotenv
load_dotenv()
st.set_page_config(layout="wide",
page_title="Social Content Generator",
page_icon = Image.open('assets/dsd_icon.png'))
hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
os.getenv('OPENAI_API_KEY')
embeddings = OpenAIEmbeddings()
#Loading the PDF
def load_pdf_text(pdfs):
text = ""
for pdf in pdfs:
reader = PdfReader(pdf)
for page in reader.pages:
text += page.extract_text()
return text
#Splitting the text into chunks
def pdf_text_chunks(text):
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
chunks = splitter.split_text(text)
return chunks
#Embedding the given text
def get_db_from_chunks(text_chunks):
db = Redis.from_texts(text_chunks, embeddings, redis_url="redis://localhost:6379")
return db
# Get the answer to the question
def query_response(db, query):
# Search the vector database for the most similar chunks
documents = db.similarity_search(query, k=4)
# Get the text of the most similar chunks and concatenate them
content = " ".join([d.page_content for d in documents])
# Get the large language model (gpt-3.5-turbo)
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.9)
# Create the prompt template
prompt_template = """
Imagine you are a skilled blog writer tasked with creating an blog based on the content and data provided in the {documents}.
Start from the background information provided in the documents and use it to create a well-researched article around it and go in depth on the topic.
Your goal is to use only factual information from the documents to craft a comprehensive and informative blog post.
To enhance readability and length, create headings and subheadings based on the factual content within the documents.
If you encounter a situation where you don't have enough information to answer a question, simply state "I don't know."
Avoid using terms like "transcript" or "embeddings" etc; instead, refer to the source material as 'documents'
Ensure that the resulting blog is both lengthy and factually accurate.
"""
system_message_prompt = SystemMessagePromptTemplate.from_template(prompt_template)
user_template = "Answer the following question: {question}"
user_message_prompt = HumanMessagePromptTemplate.from_template(user_template)
# Create the chat prompt (the prompt that will be sent to the language model)
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, user_message_prompt])
# Create the chain (that will send the prompt to the language model and return the response)
chain = LLMChain(llm=llm, prompt=chat_prompt)
# Get the response from the chain
response = chain.run(question=query, documents=content)
return response
#Loading the model
def load_llm(max_tokens, prompt_template):
# Load the locally downloaded model here
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.9)
llm_chain = LLMChain(
llm=llm,
prompt=PromptTemplate.from_template(prompt_template)
)
print(llm_chain)
return llm_chain
def get_src_original_url(query):
url = 'https://api.pexels.com/v1/search'
headers = {
'Authorization': "2I5wwGXzrCpMdfNlwoGLOUvXkgKW2pJxx9Zy4EaadsvtbWLDdFE14ZcR",
}
params = {
'query': query,
'per_page': 1,
}
response = requests.get(url, headers=headers, params=params)
# Check if the request was successful (status code 200)
if response.status_code == 200:
data = response.json()
photos = data.get('photos', [])
if photos:
src_original_url = photos[0]['src']['original']
return src_original_url
else:
st.write("No photos found for the given query.")
else:
st.write(f"Error: {response.status_code}, {response.text}")
return None
def create_word_docx(user_input, paragraph, image_input, dojo_img):
# Create a new Word document
doc = Document()
# Brand Logo
image_stream = io.BytesIO()
dojo_img.save(image_stream, format='PNG')
image_stream.seek(0)
doc.add_picture(image_stream, width=Inches(4))
# Add the user input to the document
doc.add_heading(user_input, level=1)
doc.add_paragraph(paragraph)
# Add the image to the document
image_stream = io.BytesIO()
image_input.save(image_stream, format='PNG')
image_stream.seek(0)
doc.add_picture(image_stream, width=Inches(4)) # Adjust the width as needed
return doc
def main():
if 'db' not in st.session_state:
st.session_state.db = None
st.sidebar.image('assets\dsdojo.png')
st.sidebar.title("Blog Generator")
selection = st.sidebar.radio("**Select your Approach to generate the Blog**", ("Utilize Your Private Data", "Blog Powered by ChatGPT"))
if selection == "Utilize Your Private Data":
uploaded_files = st.sidebar.file_uploader("**Upload your files and press process**", type=["pdf"], accept_multiple_files=True)
if uploaded_files is not None:
if st.sidebar.button("Process"):
with st.spinner("Processing..."):
pdf_text = load_pdf_text(uploaded_files)
text_chunks = pdf_text_chunks(pdf_text)
db = get_db_from_chunks(text_chunks)
st.session_state.db = db
query = st.sidebar.text_input("Please enter the idea/topic for the blog you want to generate using the indexed data!")
img_input = st.sidebar.text_input("Please enter the topic for the image you want to fetch!")
if len(query) > 0 and len(img_input) > 0:
with st.spinner("Generating Content..."):
response = query_response(st.session_state.db, query)
if len(response) > 0:
st.info("Content has been been generated successfully!")
st.write(response, unsafe_allow_html = True)
else:
st.error("Content couldn't be generated!")
image_url = get_src_original_url(img_input)
st.image(image_url)
st.markdown("**Download the content**")
doc = create_word_docx(query, response, Image.open(requests.get(image_url, stream=True).raw), Image.open('assets/dsdojo.png'))
# Save the Word document to a BytesIO buffer
doc_buffer = io.BytesIO()
doc.save(doc_buffer)
doc_buffer.seek(0)
# Prepare the download link
st.download_button(
label='Download Blog',
data=doc_buffer,
file_name = query+'.docx',
mime='application/vnd.openxmlformats-officedocument.wordprocessingml.document'
)
query = ""
img_input = ""
else:
user_input = st.sidebar.text_input("Please enter the idea/topic for the article you want to generate!")
image_input = st.sidebar.text_input("Please enter the topic for the image you want to fetch!")
if len(user_input) > 0 and len(image_input) > 0:
with st.spinner("Generating Content..."):
prompt_template = """
Imagine you are a skilled blog writer tasked with creating an article/blog based on the {user_input}.
your task is to create a well-researched article around it.
Use factual information and reliable sources to support your content.
Feel free to structure your blog with headings and subheadings to enhance its readability and organization.
Ensure that the resulting blog is informative, engaging, and factually accurate, providing valuable insights on the chosen idea or topic.
"""
llm_call = load_llm(max_tokens=800, prompt_template=prompt_template)
print(llm_call)
result = llm_call(user_input)
st.subheader(user_input)
if len(result) > 0:
st.info("Content has been been generated successfully!")
st.write(result['text'], unsafe_allow_html = True)
else:
st.error("Content couldn't be generated!")
image_url = get_src_original_url(image_input)
st.image(image_url)
st.markdown("**Download the content**")
doc = create_word_docx(user_input, result['text'], Image.open(requests.get(image_url, stream=True).raw), Image.open('assets/dsdojo.png'))
# Save the Word document to a BytesIO buffer
doc_buffer = io.BytesIO()
doc.save(doc_buffer)
doc_buffer.seek(0)
# Prepare the download link
st.download_button(
label='Download Blog',
data=doc_buffer,
file_name= user_input+'.docx',
mime='application/vnd.openxmlformats-officedocument.wordprocessingml.document'
)
user_input = ""
image_input = ""
if __name__ == "__main__":
main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment