python BeautifulSoup4库爬取百度网首页上的文字内容

#python bs4库获取百度网首页上的文字内容
import requests
from bs4 import BeautifulSoup

#requests 读取网页内容res.content或文本res.text
url = "https://www.baidu.com"
res = requests.get(url).content.decode("utf8")
print(res)
#print(res.content.decode("utf8"))
# res = requests.get(url)
# print(res.text)

soup = BeautifulSoup(res,"html5lib")
# result = soup.a.string   #可获取一个a标签内文本
# print(result)

#读取a,class=mnav标签文本
print("## 读取a,class=mnav标签文本")
results = soup.find_all('a', {'class': 'mnav'})
for result in results:
    print(result.string)

#读取a,class=mnav标签文本,放入name列表中
print("## 读取a,class=mnav标签文本,放入name列表中")
name = []
items = soup.find_all("a",class_="mnav")
for item in items:
    #print(item.string)
    name.append(item.string)
print(name)

#正则表达式方法html中读取文本  
print("## 正则表达式方法html中读取文本  ")
import re
items = soup.find_all("a",class_="mnav")
for item in items:
    # print(type(item))
    pattern = r'>(.*?)<'
    result = re.findall(pattern, str(item))
    print(result)