Data Anaylsis/python basic

웹툰 크롤링

난리브루스! 2020. 1. 21. 08:59
import requests
import lxml.html
import sqlite3
import time
import re
from bs4 import BeautifulSoup
con = sqlite3.connect('C:/Users/Admin/Downloads/sqlite-tools-win32-x86-3300100/sqlite-tools-win32-x86-3300100/testDB')
cur = con.cursor()
cur.execute('drop table if exists webtoon;')
cur.execute('create table webtoon(title text, author text, contents text, genre text, age text)')


session = requests.Session()
res = session.get('https://comic.naver.com/webtoon/weekday.nhn')
root = lxml.html.fromstring(res.content)
root.make_links_absolute(res.url)
for a in root.cssselect('.thumb a'):
    url = a.get('href')
    print('URL:', url)
    time.sleep(1)
    res = session.get(url)
    root = lxml.html.fromstring(res.content)
    title = root.cssselect('.detail h2')[0].text.strip()
    author = root.cssselect('.detail h2 span.wrt_nm')[0].text.strip()
    contents = root.cssselect('.detail p')[0].text.strip()
    genre = root.cssselect('.detail p span.genre')[0].text
    age = root.cssselect('.detail p span.age')[0].text
    print(title)
    print(author)
    print(description)
    print(genre)
    print(age)
    cur.execute('insert into webtoon values(:title, :author, :contents, :genre, :age)',
               {'title':title, 'author':author,'contents':contents,'genre':genre,'age':age})
con.commit()
con.close()