Challenge: Scrape PayPal FAQs Using Selenium and XPath
Scrape FAQs of PayPal using Selenium and XPath.
We'll cover the following
Problem statement
Write code below to get all the FAQs (just the questions) from the PayPal Help Center.
Expected output
A list of JSONs each with:
question
: The posted questionurl
: The URL of the question
Points to remember:
Just scrape the questions and no more.
Don't scrape the
Home
section as it has repeated questions from the other sections.Please include all the questions in all the side topics.
Write the code in
main.py
under the#TODO
section.After running the code, it will test the output against the expected output using hidden test cases.
from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException, NoSuchElementException, ElementNotInteractableException from selenium.webdriver.support.wait import WebDriverWait def _scrape(): driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) driver.get("https://www.paypal.com/us/cshelp/personal") data = [] try: topics = WebDriverWait(driver, 10).until( EC.presence_of_all_elements_located((By.XPATH, "//ul/li/a[contains(@href, 'topic')]"))) except TimeoutException: raise TimeoutException("Elements are not loaded") i = 0 while i < len(topics): urls = [x.get_attribute("href") for x in topics] try: driver.execute_script('arguments[0].click()', topics[i]) except ElementNotInteractableException: i+=1 continue try: questions = WebDriverWait(driver, 10).until( EC.presence_of_all_elements_located((By.XPATH, "//a[contains(@href, 'article')]"))) except TimeoutException: raise TimeoutException("Elements are not loaded") items = [{"question": x.get_attribute("innerHTML"), "url":x.get_attribute("href")} for x in questions] data.extend(items) i+=1 try: topics = WebDriverWait(driver, 10).until( EC.presence_of_all_elements_located((By.XPATH, "//ul/li/a[contains(@href, 'topic')]"))) except TimeoutException: raise TimeoutException("Elements are not loaded") driver.close() return data def test(expected_output, output): # Test Case 1 expected_questions = set([x['question'] for x in expected_output[:5]]) output_questions = set([x['question'] for x in output[:5]]) try: assert expected_questions == output_questions except: print(f"Test Case 1 (scraped Questions) failed. Expected: {expected_questions}, Got: {output_questions}\n") return # Test Case 2 expected_urls = set([x['url'] for x in expected_output[:5]]) output_urls = set([x['url'] for x in output[:5]]) try: assert expected_urls == output_urls except: print(f"Test Case 2 (scraped Urls) failed. Expected: {expected_urls}, Got: {output_urls}\n") return print("All test cases passed!")
Scraping PayPal FAQs using Selenium and XPaths
Get hands-on with 1300+ tech skills courses.