1、爬取百度body存入txt
def get_baidu_hot():
url = "https://top.baidu.com/board?tab=realtime"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
response = requests.get(url, headers=headers)
response.encoding = "utf-8"
soup = BeautifulSoup(response.text, "html.parser")
txt=soup.find_all("body")
print(txt)
my_utils.write_file(txt)
2、读取txt正则匹配获取json
data=my_utils.read_file()
3、将json存入数据库
json2=my_utils.ana_baidu(data)
# 假设表名为 "users"
table_name = "users"
# 遍历 JSON 数据中的键值对,生成插入语句
insert_statements = []
for i in json2:
str=f"INSERT INTO {table_name} ("
for key, value in i.items():
str= str+f"`{key}`, "
print(str)
str=str[:-2]+") VALUES ("
str2=""
for key, value in i.items():
str2= str2+f"'{value}', "
str3=str+str2[:-2]+");"
print (str3)
my_sql.exe_sql(str3)
4、读取数据库信息生成词云
result_content=my_sql.query_sql("select `desc` from users order by create_time desc limit 50")
result_content=str(result_content)
result_content=result_content.replace("的","")
my_wcloud.create_cy(result_content)
生成词云:
代码:
javaDev/public_python
ssh:
git@gitee.com:wangchao_1/public_python.git