멜론사이트 크롤링2
멜론사이트 연습
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
chart_url = 'https://www.melon.com/chart/index.htm'
res = requests.get(chart_url)
res
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
}
res = requests.get(chart_url, headers=headers)
res
html = res.text
soup = BeautifulSoup(html, 'html.parser')
tr_tag_list = soup.select('#tb_list table tbody tr')
tr_tag_list
song_list = []
for tr_tag in tr_tag_list:
rank = tr_tag.select_one('.rank').text
song_tag = tr_tag.select_one('a[href*=playSong]')
song_id = re.findall(r'\d+', song_tag['href'])[1]
song_name = song_tag.text
artist_name = tr_tag.select('a[href*=goArtistDetail]')[1].text
album_name = tr_tag.select('a[href*=goAlbumDetail]')[1].text
print(rank,song_id, song_name, album_name)
song_list.append({
'id': song_id,
'name' : song_name,
'artist_name' : artist_name,
'album_name' : album_name
})
song_list
[song['id'] for song in song_list]
song_comma_list = ','.join([song['id'] for song in song_list])
song_comma_list
# https://www.melon.com/commonlike/getSongLike.json?contsIds=31093710%2C31085237%2C31113240%2C31093710%2C31085237%2C31113240
song_like_url = 'https://www.melon.com/commonlike/getSongLike.json'
res = requests.get(song_like_url, headers=headers, params = {'contsIds': song_comma_list})
res
res.json()
res.json().keys()
res.json()['contsLike']
song_like_dict = {}
for cont in res.json()['contsLike']:
id = cont['CONTSID']
like = cont['SUMMCNT']
song_like_dict[id] = like
song_like_dict
## 위와 같은 표현
{cont['CONTSID']:cont['SUMMCNT'] for cont in res.json()['contsLike']}
pandas
res = requests.get(chart_url, headers = headers)
html = res.text
pd.read_html(html)
type(pd.read_html(html))
type(pd.read_html(html)[0])
chart_pd = pd.read_html(html)[0]
chart_pd
Leave a Comment