import requests
import lxml.html
import sqlite3
import time
import re
from bs4 import BeautifulSoup
con = sqlite3.connect('C:/Users/Admin/Downloads/sqlite-tools-win32-x86-3300100/sqlite-tools-win32-x86-3300100/testDB')
cur = con.cursor()
cur.execute('drop table if exists webtoon;')
cur.execute('create table webtoon(title text, author text, contents text, genre text, age text)')


session = requests.Session()
res = session.get('https://comic.naver.com/webtoon/weekday.nhn')
root = lxml.html.fromstring(res.content)
root.make_links_absolute(res.url)
for a in root.cssselect('.thumb a'):
    url = a.get('href')
    print('URL:', url)
    time.sleep(1)
    res = session.get(url)
    root = lxml.html.fromstring(res.content)
    title = root.cssselect('.detail h2')[0].text.strip()
    author = root.cssselect('.detail h2 span.wrt_nm')[0].text.strip()
    contents = root.cssselect('.detail p')[0].text.strip()
    genre = root.cssselect('.detail p span.genre')[0].text
    age = root.cssselect('.detail p span.age')[0].text
    print(title)
    print(author)
    print(description)
    print(genre)
    print(age)
    cur.execute('insert into webtoon values(:title, :author, :contents, :genre, :age)',
               {'title':title, 'author':author,'contents':contents,'genre':genre,'age':age})
con.commit()
con.close()

'Data Anaylsis > python basic' 카테고리의 다른 글

데이터베이스 기초  (0) 2020.01.21
rss 크롤링  (0) 2020.01.21
정규표현식  (0) 2020.01.09
클래스  (0) 2020.01.06
파이썬기초(5)  (0) 2019.12.31

+ Recent posts