Commit fb6672b3 by Albar

Upload New File

parent 004e91de
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
# Installing required packages
get_ipython().system('pip install openai')
get_ipython().system('pip install nltk')
get_ipython().system('pip install pandas')
# In[1]:
# Importing libraries
import os
import openai
import io
import pandas as pd
# Download NLTK resources
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')
# Using Azure OpenAI, replace the keys below
openai.api_type = "azure"
openai.api_base = "INSERT URI HERE"
openai.api_version = "INSERT API VERSION HERE"
openai.api_key = "INSERT KEY HERE"
# In[3]:
# Example with a small dataset
data = {
'Full Name': ['Alice Johnson', 'Bob Smith', 'Charlie Brown', 'David Miller', 'Eva White'],
'Address': ['123 Main St', '456 Oak Ave', '789 Pine Ln', '101 Elm Blvd', '202 Cedar Dr'],
'Age': [25, 30, 22, 35, 28],
'reviews': [
"The food at this restaurant was absolutely amazing! Each dish was a delightful experience, and the service was top-notch.",
"Unfortunately, my experience at this restaurant was less than satisfactory. The service was slow, and the food arrived cold. I wouldn't recommend it.",
"I enjoyed the atmosphere of the restaurant, but the portions were disappointingly small. I left feeling hungry despite the tasty dishes.",
"The service was excellent, and the dishes were delicious. I will definitely come back to this restaurant in the future.",
"My visit to this restaurant was terrible. The staff was rude, and the food had no flavor. I won't be returning."
]
}
original_data = pd.DataFrame(data)
# In[4]:
original_data
# In[5]:
prompt = "You are an AI Assistant that generates 5 synthetic data points upon user request. Only return the markdown table. Do not return additional text. Create new reviews"
query = f"Generate 5 new synthetic data with similar distribution, pattern and sentiment to the following data. Write new reviews:\n\n{original_data.to_markdown(index=False)}"
# Make a request to OpenAI GPT-3.5.
response = openai.ChatCompletion.create(
engine="INSERT MODEL NAME HERE",
messages=[
{"role": "system", "content": prompt},
{"role": "user", "content": query}
],
temperature=0.7,
stop=None
)
# In[6]:
# Function to get sentiment score
def get_sentiment_score(text):
sid = SentimentIntensityAnalyzer()
sentiment_scores = sid.polarity_scores(text)
return sentiment_scores['compound']
# Add a new column 'sentiment_score' to the DataFrame
original_data['sentiment_score'] = original_data['reviews'].apply(get_sentiment_score)
# Display the DataFrame
original_data
# In[7]:
generated_text=response['choices'][0]['message']['content']
# In[8]:
generated_text
# In[9]:
cleaned_string = '\n'.join(['|' + '|'.join([cell.strip() for cell in row.split('|')[1:-1]]) + '|' for row in generated_text.strip().split('\n')])
# Use pandas to read the formatted string into a DataFrame
df = pd.read_csv(io.StringIO(cleaned_string), sep="|", index_col=False)
# In[10]:
df = df.dropna(axis=1, how='all')
df=df = df.drop(0, axis=0)
df['Age'] = pd.to_numeric(df['Age'], errors='coerce')
# In[11]:
df
# In[14]:
df['sentiment_score'] = df['reviews'].apply(get_sentiment_score)
df
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment