import
requests
from
lxml
import
etree
#發(fā)送Request請求
url
=
'https://book.douban.com/subject/1054917/comments/'
head
=
{
'User-Agent'
:
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36'
}
#解析HTML
r
=
requests.get(url, headers
=
head)
s
=
etree.HTML(r.text)
comments
=
s.xpath(
'//div[@class="comment"]/p/text()'
)
#print(str(comments))#在寫代碼的時候可以將讀取的內容打印一下
'''
#保存數(shù)據(jù)open函數(shù)
with open('D:/PythonWorkSpace/TestData/pinglun.txt','w',encoding='utf-8') as f:#使用with open()新建對象f
for i in comments:
print(i)
f.write(i+'\n')#寫入數(shù)據(jù),文件保存在上面指定的目錄,加\n為了換行更方便閱讀
'''
#保存數(shù)據(jù)pandas函數(shù) 到CSV 和Excel
import
pandas as pd
df
=
pd.DataFrame(comments)
#print(df.head())#head()默認為前5行
df.to_csv(
'D:/PythonWorkSpace/TestData/PandasNumpyCSV.csv'
)
#df.to_excel('D:/PythonWorkSpace/TestData/PandasNumpyEx.xlsx')