from bs4 import BeautifulSoup import bs4, csv import time from selenium import webdriver from selenium.common.exceptions import TimeoutException from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import webdriverwait browser = webdriver.Chrome () browser.get ('http://data.eastmoney.com/notices/') wait = webdriverwait (browser, 10) # browser.find_element_by_css_selector('#dt_1').click() table_emergence = wait.until (EC.presence_of_element_located ((By.CSS_SELECTOR, '#dt_1'))) ###################信息提取###################### info = [] def get_info(html): soup = BeautifulSoup (html, 'lxml') table = soup.find (name='table', attrs={'''id''': 'dt_1'}) trs = table.find ('tbody').children for tr in trs: if isinstance (tr, bs4.element.Tag): tds = tr.find_all ('td') code = tds[0].a.string name = tds[1].a.string title = tds[3].a.string title_type = tds[4].span.string time = tds[5].span.string sub_info = [code, name, title, title_type, time] info.append (sub_info) #############翻页操作###################### def next_page(page_number): try: wait = webdriverwait (browser, 20) inputs = wait.until (EC.presence_of_element_located ((By.CSS_SELECTOR, '#PageContgopage'))) submit = wait.until (EC.element_to_be_clickable ((By.CSS_SELECTOR, '#PageCont > a.btn_link'))) inputs.clear () inputs.send_keys (page_number) submit.click () wait.until (EC.text_to_be_present_in_element ((By.CSS_SELECTOR, '#PageCont > span.at'), str (page_number))) except TimeoutException: next_page (page_number) ######################保存数据################################## def save_data(data): with open ('股票数据.csv', 'w', newline='', encoding='utf-8') as f: writer = csv.writer (f) writer.writerow (['代码', '名称', '公告标题', '公告类型', '公告日期']) for a in data: print (a) writer.writerow (a) for i in range (0, 6): get_info (browser.page_source) next_page (i + 2) time.sleep (2) save_data (info) browser.close ()
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 [email protected] 举报,一经查实,本站将立刻删除。