Commit 5c0c3f65 by Muhammad Sabih Ur

Update stackoverflow.gitlab-ci.yml

parent fc82150e
......@@ -8,48 +8,46 @@ header = {
'Connection': 'keep-alive',
}
data = []
def getQuestions(tag, pgno):
url = f'https://stackoverflow.com/questions/tagged/{tag}?tab=newest&page={pgno}&pagesize=50'
data =[]
def getQuestions(tag,pgno):
url = f'https://stackoverflow.com/questions/tagged/{tag}?tab=frequent&page={pgno}&pagesize=50'
print(url)
try:
r = requests.get(url, headers=header)
soup = BeautifulSoup(r.text, 'html.parser')
r = requests.get(url,headers=header)
soup = BeautifulSoup(r.text,'html.parser')
except Exception as e:
print(f"An error occurred: {e}")
questions = soup.find_all('div', {'class': 's-post-summary'})
questions= soup.find_all('div',{'class':'s-post-summary'})
# print(questions)
for item in questions:
question = {
'title': item.find('a', {'class': 's-link'}).text.strip(),
'description': item.find('div', {'class': 's-post-summary--content-excerpt'}).text.strip(),
'date': item.find('span', {'class': 'relativetime'})['title'],
'link': 'https://stackoverflow.com/' + item.find('a', {'class': 's-link'})['href'],
# print(vote)
# print(link)
# print(description)
# print(votes)
'title' : item.find('a',{'class':'s-link'}).text.strip(),
'description' : item.find('div',{'class':'s-post-summary--content-excerpt'}).text.strip(),
'date' : item.find('span',{'class':'relativetime'})['title'] if item.find('span',{'class':'relativetime'}) else '',
'link' :'https://stackoverflow.com/' + item.find('a', {'class': 's-link'})['href'],
'votes': item.find_all('span', {'class': 's-post-summary--stats-item-number'})[0].text,
'views' : item.find_all('span', {'class': 's-post-summary--stats-item-number'})[2].text
# print(vote)
# print(link)
# print(description)
# print(votes)
}
# print(question)
data.append(question)
return
# Total pages we have for python tag "42473"
for x in range(102, 201):
getQuestions('python', x)
# 42473
for x in range(1,3860):
getQuestions('python',x)
df = pd.DataFrame(data)
# print(len(data))
print(df.head())
df.to_csv('F:\StacksOverflow\stacks3.csv', index=False)
print("Done")
df.to_csv('F:\StacksOverflow\ss_2.csv')
print("Done")
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment