Commit 2722d78a by Jonathan Kelly

Switchup scraping update

Code finished without comments or cleanup
parent 8746b005
{}
\ No newline at end of file
...@@ -18,13 +18,13 @@ ...@@ -18,13 +18,13 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 4,
"source": [ "source": [
"import requests\r\n", "import requests\r\n",
"from bs4 import BeautifulSoup\r\n", "from bs4 import BeautifulSoup\r\n",
"\r\n", "\r\n",
"\r\n", "\r\n",
"URL = \"https://www.switchup.org/bootcamps/data-science-dojo?page=1\"\r\n", "URL = \"https://www.switchup.org/bootcamps/data-science-dojo?page=\"\r\n",
"page = requests.get(URL)\r\n", "page = requests.get(URL)\r\n",
"soup = BeautifulSoup(page.content, \"html.parser\")" "soup = BeautifulSoup(page.content, \"html.parser\")"
], ],
...@@ -40,7 +40,7 @@ ...@@ -40,7 +40,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 5,
"source": [ "source": [
"n=1\r\n", "n=1\r\n",
"while True:\r\n", "while True:\r\n",
...@@ -56,7 +56,15 @@ ...@@ -56,7 +56,15 @@
"main_page_url_list = [URL + str(i) for i in range(i,n)]\r\n", "main_page_url_list = [URL + str(i) for i in range(i,n)]\r\n",
"print(main_page_url_list) # check" "print(main_page_url_list) # check"
], ],
"outputs": [], "outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"['https://www.switchup.org/bootcamps/data-science-dojo?page=1', 'https://www.switchup.org/bootcamps/data-science-dojo?page=2', 'https://www.switchup.org/bootcamps/data-science-dojo?page=3', 'https://www.switchup.org/bootcamps/data-science-dojo?page=4', 'https://www.switchup.org/bootcamps/data-science-dojo?page=5', 'https://www.switchup.org/bootcamps/data-science-dojo?page=6', 'https://www.switchup.org/bootcamps/data-science-dojo?page=7', 'https://www.switchup.org/bootcamps/data-science-dojo?page=8', 'https://www.switchup.org/bootcamps/data-science-dojo?page=9', 'https://www.switchup.org/bootcamps/data-science-dojo?page=10', 'https://www.switchup.org/bootcamps/data-science-dojo?page=11', 'https://www.switchup.org/bootcamps/data-science-dojo?page=12']\n"
]
}
],
"metadata": {} "metadata": {}
}, },
{ {
...@@ -65,7 +73,7 @@ ...@@ -65,7 +73,7 @@
"source": [ "source": [
"# for x, _ in enumerate(range(1,n)):\r\n", "# for x, _ in enumerate(range(1,n)):\r\n",
"# url = main_page_url_list[]\r\n", "# url = main_page_url_list[]\r\n",
"page = requests.get('https://www.switchup.org/bootcamps/data-science-dojo?page=2')\r\n", "page = requests.get(url)\r\n",
"soup = BeautifulSoup(page.content, \"html.parser\")\r\n", "soup = BeautifulSoup(page.content, \"html.parser\")\r\n",
"pretty_soup = soup.prettify()\r\n", "pretty_soup = soup.prettify()\r\n",
"with open(\"Review page \" + str(2) + \".txt\", \"w\", encoding=\"utf-8\") as f:\r\n", "with open(\"Review page \" + str(2) + \".txt\", \"w\", encoding=\"utf-8\") as f:\r\n",
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment