BeautifulSoup 예제 2

beautifulsoup 문법

from bs4 import BeautifulSoup
import re
html = """
<html><body>
  <ul>
    <li id="naver"><a href="http://www.naver.com">naver</a></li>
    <li><a href="http://www.daum.net">daum</a></li>
    <li><a href="https://www.google.com">google</a></li>
    <li><a href="https://www.tistory.com">tistory</a></li>
  </ul>
</body></html>
"""

soup = BeautifulSoup(html, 'html.parser')
test = soup.find('a',string='naver')
test
  <a href="http://www.naver.com">naver</a>
test2 = soup.find(id='naver').string
test2
  'naver'

정규표현식

li = soup.find_all(href=re.compile(r"^https://"))
print(li)
  [<a href="https://www.google.com">google</a>, <a href="https://www.tistory.com">tistory</a>]

for e in li:
    print(e['href'])
  https://www.google.com
https://www.tistory.com

  • 잘사용하지는 않는 편임. css selector 사용

css selector 연습

fp = open('food-list.html',encoding="utf-8")
soup = BeautifulSoup(fp, "html.parser")
soup
  <html>
<body>
<div id="foods">
<h1>안주 및 주류</h1>
<ul id="fd-list">
<li class="food hot" data-lo="ko">닭도리탕</li>
<li class="food" data-lo="jp">돈까스</li>
<li class="food hot" data-lo="ko">삼겹살</li>
<li class="food" data-lo="us">스테이크</li>
</ul>
<ul id="ac-list">
<li class="alcohol" data-lo="ko">소주</li>
<li class="alcohol" data-lo="us">맥주</li>
<li class="alcohol" data-lo="ko">막걸리</li>
<li class="alcohol high" data-lo="cn">양주</li>
<li class="alcohol" data-lo="ko">동동주</li>
</ul>
</div>
<body>
</body></body></html>
print(soup.select_one("li:nth-of-type(8)").string)
  양주

print(soup.select_one("#ac-list > li:nth-of-type(4)").string)
  양주

print(soup.select("#ac-list > li[data-lo='cn']")[0].string)
  양주

print(soup.select("#ac-list > li.alcohol.high")[0].string) # 두개의 클래스가 동시에 있을 때는 띄어쓰기가 아니라 .으로 연결
  양주

param = {"data-lo":"cn", "class":"alcohol"}
print(soup.find('li',param).string)
  양주

print(soup.find(id='ac-list').find("li",param).string)
  양주

for ac in soup.find_all("li"):
    if ac['data-lo'] == 'us':
        print('data-lo == us',ac.string)
  data-lo == us 스테이크
data-lo == us 맥주

fp.close()

cars_data="""
<ul id="cars">
  <li id="ge">Genesis</li>
  <li id="av">Avante</li>
  <li id="so">Sonata</li>
  <li id="gr">Grandeur</li>
  <li id="tu">Tucson</li>
</ul>
"""
fp = open('cars.html',encoding="utf-8")
soup = BeautifulSoup(fp, "html.parser")
soup
  <ul id="cars">
<li id="ge">Genesis</li>
<li id="av">Avante</li>
<li id="so">Sonata</li>
<li id="gr">Grandeur</li>
<li id="tu">Tucson</li>
</ul>
def car_func(selector):
    print("car_func",soup.select_one(selector).string)

car_func("#gr")
  car_func Grandeur

car_func("li#gr")
  car_func Grandeur

car_func("ul > li#gr")
  car_func Grandeur

car_func("#cars #gr")
  car_func Grandeur

car_func("li[id='gr']")
  car_func Grandeur

print(soup.select("li")[3].string)
  Grandeur

print(soup.find_all("li")[3].string)
  Grandeur

람다식을 이용

car_lambda = lambda q : print("car_lambda",soup.select_one(q).string)
car_lambda("ul > li#gr")
  car_lambda Grandeur

Tags:

Updated:

Leave a Comment