Commit 0a9fa80e by Muhammad Sabih Ur

Add new file

parents
Pipeline #6 failed
import requests
from bs4 import BeautifulSoup
import pandas as pd
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate, br',
'Connection': 'keep-alive',
}
data = []
def getQuestions(tag, pgno):
url = f'https://stackoverflow.com/questions/tagged/{tag}?tab=newest&page={pgno}&pagesize=50'
try:
r = requests.get(url, headers=header)
soup = BeautifulSoup(r.text, 'html.parser')
except Exception as e:
print(f"An error occurred: {e}")
questions = soup.find_all('div', {'class': 's-post-summary'})
# print(questions)
for item in questions:
question = {
'title': item.find('a', {'class': 's-link'}).text.strip(),
'description': item.find('div', {'class': 's-post-summary--content-excerpt'}).text.strip(),
'date': item.find('span', {'class': 'relativetime'})['title'],
'link': 'https://stackoverflow.com/' + item.find('a', {'class': 's-link'})['href'],
# print(vote)
# print(link)
# print(description)
# print(votes)
}
# print(question)
data.append(question)
return
# Total pages we have for python tag "42473"
for x in range(102, 201):
getQuestions('python', x)
df = pd.DataFrame(data)
# print(len(data))
print(df.head())
df.to_csv('F:\StacksOverflow\stacks3.csv', index=False)
print("Done")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment