BeautifulSoup 예제 2
beautifulsoup 문법
from bs4 import BeautifulSoup
import re
html = """
<html><body>
<ul>
<li id="naver"><a href="http://www.naver.com">naver</a></li>
<li><a href="http://www.daum.net">daum</a></li>
<li><a href="https://www.google.com">google</a></li>
<li><a href="https://www.tistory.com">tistory</a></li>
</ul>
</body></html>
"""
soup = BeautifulSoup(html, 'html.parser')
test = soup.find('a',string='naver')
test
<a href="http://www.naver.com">naver</a>
test2 = soup.find(id='naver').string
test2
'naver'
정규표현식
li = soup.find_all(href=re.compile(r"^https://"))
print(li)
[<a href="https://www.google.com">google</a>, <a href="https://www.tistory.com">tistory</a>]
for e in li:
print(e['href'])
https://www.google.com
https://www.tistory.com
- 잘사용하지는 않는 편임. css selector 사용
css selector 연습
fp = open('food-list.html',encoding="utf-8")
soup = BeautifulSoup(fp, "html.parser")
soup
<html>
<body>
<div id="foods">
<h1>안주 및 주류</h1>
<ul id="fd-list">
<li class="food hot" data-lo="ko">닭도리탕</li>
<li class="food" data-lo="jp">돈까스</li>
<li class="food hot" data-lo="ko">삼겹살</li>
<li class="food" data-lo="us">스테이크</li>
</ul>
<ul id="ac-list">
<li class="alcohol" data-lo="ko">소주</li>
<li class="alcohol" data-lo="us">맥주</li>
<li class="alcohol" data-lo="ko">막걸리</li>
<li class="alcohol high" data-lo="cn">양주</li>
<li class="alcohol" data-lo="ko">동동주</li>
</ul>
</div>
<body>
</body></body></html>
print(soup.select_one("li:nth-of-type(8)").string)
양주
print(soup.select_one("#ac-list > li:nth-of-type(4)").string)
양주
print(soup.select("#ac-list > li[data-lo='cn']")[0].string)
양주
print(soup.select("#ac-list > li.alcohol.high")[0].string) # 두개의 클래스가 동시에 있을 때는 띄어쓰기가 아니라 .으로 연결
양주
param = {"data-lo":"cn", "class":"alcohol"}
print(soup.find('li',param).string)
양주
print(soup.find(id='ac-list').find("li",param).string)
양주
for ac in soup.find_all("li"):
if ac['data-lo'] == 'us':
print('data-lo == us',ac.string)
data-lo == us 스테이크
data-lo == us 맥주
fp.close()
cars_data="""
<ul id="cars">
<li id="ge">Genesis</li>
<li id="av">Avante</li>
<li id="so">Sonata</li>
<li id="gr">Grandeur</li>
<li id="tu">Tucson</li>
</ul>
"""
fp = open('cars.html',encoding="utf-8")
soup = BeautifulSoup(fp, "html.parser")
soup
<ul id="cars">
<li id="ge">Genesis</li>
<li id="av">Avante</li>
<li id="so">Sonata</li>
<li id="gr">Grandeur</li>
<li id="tu">Tucson</li>
</ul>
def car_func(selector):
print("car_func",soup.select_one(selector).string)
car_func("#gr")
car_func Grandeur
car_func("li#gr")
car_func Grandeur
car_func("ul > li#gr")
car_func Grandeur
car_func("#cars #gr")
car_func Grandeur
car_func("li[id='gr']")
car_func Grandeur
print(soup.select("li")[3].string)
Grandeur
print(soup.find_all("li")[3].string)
Grandeur
람다식을 이용
car_lambda = lambda q : print("car_lambda",soup.select_one(q).string)
car_lambda("ul > li#gr")
car_lambda Grandeur
Leave a Comment