PS5 prices.ipynb 3.02 KB
Newer Older
Arham Noman committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Web scraping with BeautifulSoup"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Import required libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from bs4 import BeautifulSoup as soup\n",
    "from urllib.request import urlopen as uReq\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Create an object and turn an HTML page into a soup object"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Fetching the raw HTML page"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = \"playstation+5\"  # Put your search term here\n",
    "my_url = 'YOUR_EBAY_URL'.format(query) # Paste the url for an Ebay search from your browser and replace the query part with a {}\n",
    "my_url"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "uClient = uReq(my_url)\n",
    "page_html = uClient.read()\n",
    "uClient.close()\n",
    "\n",
    "page_soup = soup(page_html, 'html.parser')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Find all the relevant classes and extract data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "divs = page_soup.findAll('div',{'class':'s-item__details clearfix'})    # Find the HTML tag with the data we are looking for and fetch all instances of it\n",
    "prices = []\n",
    "\n",
    "for PS5 in divs: \n",
    "    price = PS5.find('span',{'class':\"s-item__price\"})\n",
    "    price = price.text[1:]\n",
    "    price = price.replace(',','')\n",
    "    prices.append(price)\n",
    "\n",
    "\n",
    "prices = pd.to_numeric(prices,errors='coerce')\n",
    "prices"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Use the scraped data to get insights into the market for PS5 consoles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import seaborn as sns\n",
    "sns.set()\n",
    "sns.boxplot(data=prices)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "8fe041c8bf3f6c634cfbe99daa1d0835869ac67f33d4cde2619e21380e56c003"
  },
  "kernelspec": {
   "display_name": "Python 3.9.7 64-bit",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.0"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}