BeautifulSoup practice

html1=”””
<!DOCTYPE html>
<html lang=”en” xmlns=”http://www.w3.org/1999/xhtml”>
<head>
<meta charset=”utf-8″ />
<title>My first web page is < /title>
<meta name=”generator” content=”EverEdit” />
<meta name=”author” content=”” />
<meta name=”keywords” content=”” />
<meta name=”description” content=”” />
</head>
<body>
<div class=”rows”>
<a href=”http://www.baidu.com/” target=”_blank”>
<div class=”col-xs-12 col-sm-6 col-md-4 col-lg-2 vfsd-div vfsd-div-color1″>
<span class=”vfsd_a_title”>Baidu < /span>
</div>
</a>
<a href=”http://www.google.com/” target=”_blank”>
<div class=”col-xs-12 col-sm-6 col-md-4 col-lg-2 vfsd-div vfsd-div-color3″>
<span class=”vfsd_a_title”>Google</span>
</div>
</a>
<a href=”http://www.oschina.net/” target=”_blank”>
<div class=”col-xs-12 col-sm-6 col-md-4 col-lg-2 vfsd-div vfsd-div-color2″>
<span class=”vfsd_a_title”>Stack Overflow</span>
</div>
</a>
</div>
<p class=”col-xs-12 col-sm-6 col-md-4 col-lg-2 vfsd-div vfsd-div-color2″>Hello
<span class=”vfsd_a_title”>CSDN</span>
</p>
<p class=”col-xs-12 col-sm-6 col-md-4 col-lg-2 vfsd-div vfsd-div-color2″>
<span class=”vfsd_a_title”>FaceBook</span>
</p>
<p class=”nmn” id=”nmn1″>
<span class=”vfsd_a_title”>Open source China < /span>
</p>
</body>
</html>
“””

from bs4 import BeautifulSoup
soup = BeautifulSoup(html1,’lxml’)

print(soup.title)

####################Output:

<title>My first web page is < /title>

print(soup.title.string)

####################Output:

My first web page

print(soup.head)

####################Output:

<head>
<meta charset="utf-8"/>
<title>My first web page is < /title>< meta content= "EverEdit" name= "generator" />< meta content= "" name= "AuThor "/>< meta content= "" name= "Keywords" />< meta content= "" name= "description" /></head>

 

for i,child in enumerate(soup.div.children):
  print(i,child)

 

####################Output:

['\n', <a href="http://www.baidu.com/" target="_blank">
<div class="col-xs-12 col-sm-6 col-md-4 col-lg-2 vfsd-div vfsd-div-color1">
<span class="vfsd_a_title">Baidu < /span>< /div>< /a>'\n', < a href= "http://www.google.com/" target= "_blank" &gT;< div class= "col-xs-12 col-sm-6 col-md-4 col-lg-2 vfsd-div vfsd-div-color3" >< span class="Vfsd_a_title" > Google< /span>< /div>< /a>'\n', < a href= "http://www.oschina.Net/ "target=" _blank ">";< div class= "col-xs-12 col-sm-6 col-md-4 col-lg-2 vfsd-div vfsd-div-colo"R2 ">< span class= "vfsd_a_title" > Stack Overflow< /span>< /div>< /a>'\n']

 

 

Leave a Reply

Your email address will not be published. Required fields are marked *