Challenges: Scrape Top Indices Data from Yahoo Finance
Utilize Selenium to scrape the history of important market indices from Yahoo.
We'll cover the following
Problem statement
The task is to write a code to get the historical data (first 100 rows) of the top three major indices from Yahoo World-Indices News.
Expected output
A list of JSONs, each with:
{Date Open Close}
extracted from the table as shown on the screen above.
Points to remember:
Just scrape the first 50 rows and no more.
Write your code in
main.py
under the#todo
section.After running the code, it will test the output against the expected output using hidden test cases.
from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException, NoSuchElementException, ElementNotInteractableException from selenium.webdriver.support.wait import WebDriverWait def _scrape(): driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) driver.get("https://finance.yahoo.com/world-indices/") data = [] top_3 = driver.find_elements(By.CSS_SELECTOR, "tr > td:nth-child(1) > span > div > a") top_3_links = [x.get_attribute("href") for x in top_3[:3]] for link in top_3_links: driver.get(link+"/history") rows = [] while len(rows) < 100: rows=driver.find_elements(By.CSS_SELECTOR, "table > tbody > tr") driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") for row in rows[:100]: d = {"date": row.find_element(By.CSS_SELECTOR,'td:nth-child(1)').text, "open": row.find_element(By.CSS_SELECTOR,'td:nth-child(2)').text, "close": row.find_element(By.CSS_SELECTOR,'td:nth-child(5)').text} data.append(d) driver.close() return data def test(expected_output, output): # Test Case 1 expected_dates = set([x['date'] for x in expected_output]) output_dates = set([x['date'] for x in output]) if expected_dates == output_dates: print("Test case 1 succeeded") else: print(f"Test Case 1 (scraped dates) failed. Expected: {expected_dates}, Got: {output_dates}") return # Test Case 2 expected_open = set([x['open'] for x in expected_output]) output_open = set([x['open'] for x in output]) if expected_open == output_open: print("Test case 2 succeeded") else: print(f"Test Case 2 (scraped opens) failed. Expected: {expected_open}, Got: {output_open}") return # Test Case 3 expected_close = set([x['close'] for x in expected_output]) output_close = set([x['close'] for x in output]) if expected_close == output_close: print("Test case 3 succeeded") else: print(f"Test Case 3 (scraped closes) failed. Expected: {expected_close}, Got: {output_close}") return print("All test cases passed!")
Scraping market indecies historical data using Selenium
Get hands-on with 1300+ tech skills courses.