商标网信息爬取
Published in:2020-10-09 |

商标网信息爬取,网站http://wsgg.sbj.cnipa.gov.cn:9080/tmann/annInfoView/annSearchDG.html

商标网信息爬取

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import MySQLdb
import requests
num = 1713
n = 0
url = "http://wsgg.sbj.cnipa.gov.cn:9080/tmann/annInfoView/annSearchDG.html"
headers = {
'user-agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36'
}
for annnum in range(50):
data = {
'page':'1',
'rows':'400000',
'annNum':str(num),
'annType':'',
'tmType':'',
'coowner':'',
'recUserName':'',
'allowUserName':'',
'byAllowUserName':'',
'appId':'',
'appIdZhiquan':'',
'bfchangedAgengedName':'',
'changeLastName':'',
'transferUserName':'',
'acceptUserName':'',
'regName':'',
'tmName':'',
'intCls':'',
'fileType':'',
'totalYOrN':'true',
'appDateBegin':'',
'appDateEnd':'',
'agentName':'',
}
num -= 1
print(data)
res = requests.post(url,data=data,headers=headers).json()
# 1.连接数据库
conn = MySQLdb.connect(
host='localhost',
port=3306,
user='root',
password='123456',
db='spider',
charset='utf8'
)
cursor = conn.cursor()
for i in res['rows']:
try:
# sql1 = 'SELECT count(*) from brand1'
# cursor.execute(sql1)
# 2.准备sql语句
sql = 'INSERT IGNORE INTO brand1 VALUES(%s,%s,%s)'
# 3.执行sql
cursor.execute(sql, [i['tm_name'], i['reg_num'], i['reg_name']])
conn.commit()
n += 1
print(n)
except Exception as e:
print(f'存入数据失败,原因:{e}')
Prev:
58同城加密字体
Next:
自定义实现迭代器