-
Notifications
You must be signed in to change notification settings - Fork 0
/
CTN-link
23 lines (20 loc) · 953 Bytes
/
CTN-link
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# -*- coding: utf8 -*-
import HTMLParser
import requests
import csv
s = requests.Session()
s.get('http://kmw.chinatimes.com/member/login_ws.asp?lt=ip&SYS=KMW&on=20100401155455780&gid=')
url="http://kmw.chinatimes.com/member/news_search2/news_search_tw8.asp?query=%B3%B0%B0t%2B%A4%A4%B0%EA%B0t%B0%B8&attr=&from_date=19940101&to_date=20141231&data_set=7&area=tw&title_cnt=30&ymd=ymd&hite=1&help=%A5x%C6W%B7s%BBD&wrap=&len=0&SEIP=10.227.133.193&page_cnt="
#url為人工查詢后得到的第一頁的連結
class parseLinks(HTMLParser.HTMLParser):
def handle_starttag(self, tag, attrs):
if tag == 'a':
for name,value in attrs:
if name == 'href':
if value[0:7]=='/member':
print "http://kmw.chinatimes.com"+value
lParser = parseLinks()
for i in range(1,78): #搜尋結果共有77頁,故range以78為限
url1=url+str(i)
lParser.feed(s.get(url1).text)
lParser.close()