Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
Stackoverflow_scrapping
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Muhammad Sabih Ur
Stackoverflow_scrapping
Commits
5c0c3f65
Commit
5c0c3f65
authored
Apr 06, 2023
by
Muhammad Sabih Ur
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Update stackoverflow.gitlab-ci.yml
parent
fc82150e
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
28 additions
and
29 deletions
+28
-29
stackoverflow.gitlab-ci.yml
stackoverflow.gitlab-ci.yml
+28
-29
No files found.
stackoverflow.gitlab-ci.yml
View file @
5c0c3f65
...
...
@@ -8,48 +8,46 @@ header = {
'
Connection'
:
'
keep-alive'
,
}
data = []
def getQuestions(tag, pgno)
:
url = f'https://stackoverflow.com/questions/tagged/{tag}?tab=newest&page={pgno}&pagesize=50'
data =[]
def getQuestions(tag,pgno)
:
url = f'https://stackoverflow.com/questions/tagged/{tag}?tab=frequent&page={pgno}&pagesize=50'
print(url)
try
:
r = requests.get(url,
headers=header)
soup = BeautifulSoup(r.text,
'html.parser')
r = requests.get(url,headers=header)
soup = BeautifulSoup(r.text,
'html.parser')
except Exception as e
:
print(f"An error occurred
:
{
e
}
"
)
questions
=
soup.find_all('div',
{'class':
's-post-summary'})
questions
=
soup.find_all('div',{'class':
's-post-summary'})
#
print(questions)
for
item
in
questions:
question
=
{
'title':
item.find('a',
{'class':
's-link'}).text.strip(),
'description':
item.find('div',
{'class':
's-post-summary--content-excerpt'}).text.strip(),
'date':
item.find('span',
{'class':
'relativetime'})['title'],
'link':
'https://stackoverflow.com/'
+
item.find('a',
{'class':
's-link'})['href'],
#
print(vote)
#
print(link)
#
print(description)
#
print(votes)
'title'
:
item.find('a',{'class':'s-link'}).text.strip(),
'description'
:
item.find('div',{'class':'s-post-summary--content-excerpt'}).text.strip(),
'date'
:
item.find('span',{'class':'relativetime'})['title']
if
item.find('span',{'class':'relativetime'})
else
'',
'link'
:'https://stackoverflow.com/'
+
item.find('a',
{'class':
's-link'})['href'],
'votes':
item.find_all('span',
{'class':
's-post-summary--stats-item-number'})[0].text,
'views'
:
item.find_all('span',
{'class':
's-post-summary--stats-item-number'})[2].text
#
print(vote)
#
print(link)
#
print(description)
#
print(votes)
}
#
print(question)
data.append(question)
return
#
Total
pages
we
have
for
python
tag
"42473"
for x in range(102, 201)
:
getQuestions('python', x)
#
42473
for
x
in
range(1,3860):
getQuestions('python',x)
df
=
pd.DataFrame(data)
#
print(len(data))
print(df.head())
df.to_csv('F:\StacksOverflow\s
tacks3.csv', index=False
)
print("Done")
df.to_csv('F:
\
StacksOverflow
\
s
s_2.csv'
)
print("Done")
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment