九色国产,午夜在线视频,新黄色网址,九九色综合,天天做夜夜做久久做狠狠,天天躁夜夜躁狠狠躁2021a,久久不卡一区二区三区

打開(kāi)APP
userphoto
未登錄

開(kāi)通VIP,暢享免費(fèi)電子書(shū)等14項(xiàng)超值服

開(kāi)通VIP
爬蟲(chóng)學(xué)習(xí)(十二)

實(shí)踐項(xiàng)目————詩(shī)詞名句網(wǎng)《三國(guó)演義》小說(shuō)爬取

import os
import re
import time
import urllib.request
import urllib.parse
from bs4 import BeautifulSoup


def header():
# 三國(guó)演義網(wǎng)址
article_url = "http://www.shicimingju.com/book/sanguoyanyi.html"
# 模擬瀏覽器創(chuàng)建請(qǐng)求頭
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.96 Safari/537.36"}
# 創(chuàng)建請(qǐng)求對(duì)象
request = urllib.request.Request(article_url,headers=headers)
return request

# 發(fā)送請(qǐng)求
def main(request):
# 創(chuàng)建管理器對(duì)象對(duì)象
handler = urllib.request.HTTPHandler()
# 使用管理器對(duì)象構(gòu)建請(qǐng)求對(duì)象
opener = urllib.request.build_opener( handler )
# 使用opener進(jìn)行獲取響應(yīng)
response = opener.open( request )
return response

# 下載內(nèi)容
def download():
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.96 Safari/537.36"}
request = header()
response = main(request).read()
# 使用bs4對(duì)html進(jìn)行解析
article_main_html = BeautifulSoup(response,"lxml")
if not os.path.exists("三國(guó)演義"):
os.mkdir("三國(guó)演義")
# 獲取書(shū)名
# article_name =article_main_html.select(".book-header h1").text
# 獲取書(shū)名詳解
# article_details =article_main_html.select(".book-summary p").text
# 獲取章節(jié)鏈接
article_section = article_main_html.select(".book-mulu ul li a")
section_title_ls = []
section_url_ls = []
# 將章節(jié)和章節(jié)鏈接有序存入列表中
for section in article_section:
section_title_ls.append(section.text)
section_url_ls.append(section["href"])

# 分章節(jié)爬取章節(jié)內(nèi)容
for num in range(0,120):
# 同時(shí)取出章節(jié)名和章節(jié)url進(jìn)行請(qǐng)求數(shù)據(jù)
section_title = section_title_ls[num]
section_url = section_url_ls[num]
# 拼接完整的章節(jié)url
section_allurl = "http://www.shicimingju.com"+section_url
section_request = urllib.request.Request(section_allurl,headers=headers )
handler = urllib.request.HTTPHandler
opener =urllib.request.build_opener(handler)
# 請(qǐng)求章節(jié)數(shù)據(jù)
section_response = opener.open(section_request).read().decode("utf8")
# 使用bs4對(duì)html進(jìn)行解析
article_soup =BeautifulSoup(section_response,"lxml")
article_content = article_soup.select(".chapter_content")
# 構(gòu)建章節(jié)名并和文章組合
content = section_title+article_content[0].text
# 創(chuàng)建存儲(chǔ)文件名
filename ="三國(guó)演義"+".doc"
print("正在下載第%d章"%num)
# 將下載的數(shù)據(jù)寫(xiě)入文件中
filename_path = os.path.join("三國(guó)演義",filename)
with open(filename_path,"ab+") as tf:
tf.write(content.encode("utf8"))
tf.close()
# 防止暴力請(qǐng)求
time.sleep(2)

if __name__ == '__main__':
download()
 

百度音樂(lè)爬取案例

import os
import re
import time
import urllib.request
import urllib.parse
from bs4 import BeautifulSoup
import json

# 面向?qū)ο笈廊?shù)據(jù)
class BaiDuMusic( object ):

# 初始化輸入?yún)?shù)
def __init__(self, singer, page):
self.singer = singer
self.page = page
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.96 Safari/537.36"}

# 構(gòu)建請(qǐng)求頭信息
def header(self):
url = "http://music.taihe.com/search/song?"
data = {
"s": "1",
"key": self.singer,
"jump": "0",
"start": (self.page - 1) * 20,
"size": "20",
"third_type": "0",
}
# 解析參數(shù)
data = urllib.parse.urlencode( data )
singer_url = url + data
# 創(chuàng)建請(qǐng)求頭
request = urllib.request.Request( url=singer_url, headers=self.headers )
return request

# 創(chuàng)建管理器對(duì)象,請(qǐng)求數(shù)據(jù)
def requset(self):
request = self.header()
handler = urllib.request.HTTPHandler()
opener = urllib.request.build_opener( handler )
response = opener.open( request )
return response

# bs4解析數(shù)據(jù)
def paserSong(self):
response = self.requset()
singer_soup = BeautifulSoup( response, "lxml" )
pattern=re.compile(r'[\d]+')
# bs4匹配目標(biāo)標(biāo)簽<li>
songs_info =singer_soup.find_all(name="li", attrs={"data-albumid":pattern})
# 獲取<li>標(biāo)簽中的”data-songitem“屬性,并將屬性值轉(zhuǎn)成json格式
song_ls =[json.loads(li["data-songitem"]) for li in songs_info]
song_info=[(song_info["songItem"]["sname"],song_info["songItem"]["sid"]) for song_info in song_ls]
# print(song_info)
# 輸出結(jié)果如下,獲取歌曲id
# """[('只要平凡', 598740690), ('My Sunshine', 127018924), ('聽(tīng)', 123192697), ('微笑著勝利(慶祝建軍91周年網(wǎng)宣主題曲)', 601622060), ('Lost In The Stars', 268791350), ('Everything Will Say Goodbye', 285312563), ('《星辰》——電視劇《擇天記》片頭曲', 609686640), ('聽(tīng)', 123206622), ('Give You My World', 277779153), ('微笑著勝利(慶祝建軍91周年網(wǎng)宣主題曲)(伴奏)', 601622061), ('My Sunshine', 131096021), ('三生三世', 537883379), ('著魔', 53603708), ('三生三世', 537883380), ('Torches', 541943830), ('浩瀚', 124796979), ('逆戰(zhàn)', 14944589), ('劍心', 121223583), ('天下', 1103789), ('燕歸巢', 136982116)]"""
return song_info
def downloadSong(self):
if not os.path.exists('music'):
os.mkdir('music')
song_info =self.paserSong()
for song_detail in song_info:
song_info_id=song_detail[1]
song_info_name=song_detail[0]
print("正在下載第%s頁(yè)的:%s的《%s》"%(self.page,self.singer,song_info_name))
# 通過(guò)該API接口獲取歌曲信息的json格式數(shù)據(jù)
song_url ='http://tingapi.ting.baidu.com/v1/restserver/ting?method=baidu.ting.song.play&format=jsonp&callback=jQuery17202741599001012014_1513517333931&songid=%s&_=1513517334915'%song_info_id
# 獲取請(qǐng)求
request_song_detail =urllib.request.urlopen(song_url)
# 解析json歌曲數(shù)據(jù)
pattern_song =re.compile(r'\((.*)\)',re.S)
json_song_info=pattern_song.findall(request_song_detail.read().decode("utf8"))
# 將字符串?dāng)?shù)據(jù)轉(zhuǎn)化成json數(shù)據(jù),便于提取下載路徑
lrclink=json.loads(json_song_info[0])["songinfo"]["lrclink"]
file_link =json.loads(json_song_info[0])["bitrate"]["file_link"]
# 創(chuàng)建文件格式保存文件
filename_music=song_info_name+"_%s.mp3"%self.singer
filename_lrc =song_info_name+"_%s.lrc"%self.singer
song_path = os.path.join("music",filename_music)
lrc_path = os.path.join("music",filename_lrc)
try:
# 下載歌曲和歌詞數(shù)據(jù)
urllib.request.urlretrieve(lrclink,lrc_path)
urllib.request.urlretrieve( file_link, song_path )
time.sleep(1)
print("《%s》下載完成"%song_info_name)
except Exception as e:
print("因版權(quán)受限無(wú)法下載")




# 錄入爬取信息
def main():
singer = input( "請(qǐng)輸入爬取的歌手或是歌名:" )
start_page = int( input( "請(qǐng)輸入爬取的開(kāi)始頁(yè):" ) )
end_page = int( input( "請(qǐng)輸入爬取的終止頁(yè):" ) )
for page in range( start_page, end_page + 1 ):
baidumusic = BaiDuMusic( singer, page )
if page>end_page+1:
print("%s歌手的所有歌曲都已下載完畢"%singer)
baidumusic.downloadSong()

# 運(yùn)行
if __name__ == '__main__':
main()
本站僅提供存儲(chǔ)服務(wù),所有內(nèi)容均由用戶發(fā)布,如發(fā)現(xiàn)有害或侵權(quán)內(nèi)容,請(qǐng)點(diǎn)擊舉報(bào)。
打開(kāi)APP,閱讀全文并永久保存 查看更多類似文章
猜你喜歡
類似文章
用 Python 制作音樂(lè)聚合下載器
hha
Python模擬登陸163郵箱并獲取通訊錄
drf—— drf的請(qǐng)求與響應(yīng)
Py之urllib2:Python庫(kù)之urllib、urllib2、urllib3系列簡(jiǎn)介、安裝、使用方法之詳細(xì)攻略
python實(shí)現(xiàn)根據(jù)用戶輸入從電影網(wǎng)站獲取影片信息的方法
更多類似文章 >>
生活服務(wù)
熱點(diǎn)新聞
分享 收藏 導(dǎo)長(zhǎng)圖 關(guān)注 下載文章
綁定賬號(hào)成功
后續(xù)可登錄賬號(hào)暢享VIP特權(quán)!
如果VIP功能使用有故障,
可點(diǎn)擊這里聯(lián)系客服!

聯(lián)系客服