Update stackoverflow.gitlab-ci.yml

5c0c3f65 · Muhammad Sabih Ur · fc82150e · 5c0c3f65
Commit 5c0c3f65 authored Apr 06, 2023 by Muhammad Sabih Ur
Hide whitespace changes
Inline Side-by-side

Showing with 28 additions and 29 deletions

stackoverflow.gitlab-ci.yml stackoverflow.gitlab-ci.yml +28 -29

No files found.
--- a/stackoverflow.gitlab-ci.yml
+++ b/stackoverflow.gitlab-ci.yml
@@ -8,48 +8,46 @@ header = {
 'Connection': 'keep-alive',
 }

-data = []

-
-def getQuestions(tag, pgno):
- url = f'https://stackoverflow.com/questions/tagged/{tag}?tab=newest&page={pgno}&pagesize=50'
+data =[]
+def getQuestions(tag,pgno):
+ url = f'https://stackoverflow.com/questions/tagged/{tag}?tab=frequent&page={pgno}&pagesize=50'
+ print(url)
 try:
- r = requests.get(url, headers=header)
- soup = BeautifulSoup(r.text, 'html.parser')
+ r = requests.get(url,headers=header)
+ soup = BeautifulSoup(r.text,'html.parser') 
 except Exception as e:
 print(f"An error occurred: {e}")
-
- questions = soup.find_all('div', {'class': 's-post-summary'})
-
+ 
+ questions= soup.find_all('div',{'class':'s-post-summary'})
+ 
 # print(questions)
 for item in questions:
 question = {
- 'title': item.find('a', {'class': 's-link'}).text.strip(),
-
- 'description': item.find('div', {'class': 's-post-summary--content-excerpt'}).text.strip(),
- 'date': item.find('span', {'class': 'relativetime'})['title'],
- 'link': 'https://stackoverflow.com/' + item.find('a', {'class': 's-link'})['href'],
-
-
- # print(vote)
- # print(link)
- # print(description)
- # print(votes)
+ 'title' : item.find('a',{'class':'s-link'}).text.strip(),
+
+ 'description' : item.find('div',{'class':'s-post-summary--content-excerpt'}).text.strip(),
+ 'date' : item.find('span',{'class':'relativetime'})['title'] if item.find('span',{'class':'relativetime'}) else '',
+ 'link' :'https://stackoverflow.com/' + item.find('a', {'class': 's-link'})['href'],
+ 'votes': item.find_all('span', {'class': 's-post-summary--stats-item-number'})[0].text,
+ 'views' : item.find_all('span', {'class': 's-post-summary--stats-item-number'})[2].text
+
+ 
+ # print(vote)
+ # print(link)
+ # print(description)
+ # print(votes)
 }
 # print(question)
 data.append(question)
 return
-
-
-# Total pages we have for python tag "42473"
-
-
-for x in range(102, 201):
- getQuestions('python', x)
+# 42473
+for x in range(1,3860):
+ getQuestions('python',x)


 df = pd.DataFrame(data)
 # print(len(data))
 print(df.head())
-df.to_csv('F:\StacksOverflow\stacks3.csv', index=False)
-print("Done")
+df.to_csv('F:\StacksOverflow\ss_2.csv')
+print("Done")
\ No newline at end of file