(크롤링) 코드 예문
# 빗썸 조회 크롤링
import requests
import json
import time
import csv
from datetime import datetime
count = int(input("몇 번 조회할까요? "))
stocks = []
for n in range(0, count):
response = requests.get("https://api.bithumb.com/public/ticker/BTC")
content = response.content
data = json.loads(content)
date = datetime.fromtimestamp(int(data['data']['date']) / 1000)
open1 = data['data']['opening_price']
close = data['data']['closing_price']
max = data['data']['max_price']
min = data['data']['min_price']
volume = data['data']['volume_1day']
stocks.append([date, open1, close, max, min, volume])
time.sleep(5)
"""
file = open('stocks.csv', 'w', newline = '')
csvfile = csv.writer(file)
for stock in stocks:
csvfile.writerow(stock)
file.close()
"""
with open('stocks.csv', 'w', newline = '') as file:
csvfile = csv.writer(file)
for stock in stocks:
csvfile.writerow(stock)
"""
with open('stocks.csv', 'w', newline = '') as file:
data = json.dumps(stocks, indent = ' ')
file.write(data)
"""
# index.html 문서 내용중 프로그래밍 언어, 운영체제 목록을 목록(list)화하기
import requests
from bs4 import BeautifulSoup
# 파일을 이용하여 크롤링을 진행하는 과정
file = open("index\\index.html", "r")
data = file.read()
file.close()
# 웹사이트를 통해 크롤링을 진행하는 과정
response = requests.get("http://192.168.101.200")
data = response.content
html = BeautifulSoup(data, "html.parser")
lists = []
for tag in ["ul", "ol"]:
li_list = html.find(tag).find_all("li")
temp_list = []
for li in li_list:
temp_list.append(li.text)
lists.append(temp_list)
print(lists)
print("프로그래밍 언어 목록 : {}".format(lists[0]))
print("운영체제 목록 : {}".format(lists[1]))
# table1.html 문서의 테이블(표)를 2차원 목록화하기
import requests
from bs4 import BeautifulSoup
response = requests.get("http://192.168.101.200/table3.html")
response.encoding = 'eucKR'
data = response.text
html = BeautifulSoup(data, 'html.parser')
tr_list = html.find("body").find_all("tr")
lists = []
for tr in tr_list:
temp_list = []
td_list = tr.find_all("td")
for td in td_list:
temp_list.append(td.text)
lists.append(temp_list)
for item in lists:
print(item)
# professors.html 문서의 테이블(표)를 2차원 목록화하기
response = requests.get("http://192.168.101.200/professors.html")
data = response.text
html = BeautifulSoup(data, 'html.parser')
tr_list = html.find("table").find("tbody").find_all("tr")
professors = []
for tr in tr_list:
no = tr.find("td", {"class":"number"}).text
name = tr.find("td", {"class":"professor"}).text
lecture = tr.find("td", {"class":"lecture"}).text
grade = tr.find("td", {"class":"grade"}).text
eval = tr.find("td", {"class":"evaluation"}).text
professors.append([no, name, lecture, grade, eval])
file = open("professors.csv", "w", newline = "")
csvfile = csv.writer(file)
for professor in professors:
csvfile.writerow(professor)
file.close()
# www.mnet.com 사이트에서 순위 50위까지 크롤링하기
import requests
from bs4 import BeautifulSoup
import csv
import os
def findinfo(tr):
rank = int(tr.find("td", {"class":"MMLItemRank"}).find("span").text.strip("위"))
title = tr.find("td", {"class":"MMLItemTitle"}).find("a", {"class":"MMLI_Song"}).text
try:
artist = tr.find("td", {"class":"MMLItemTitle"}).find("a", {"class":"MMLIInfo_Artist"}).text
except:
artist = "None"
try:
album = tr.find("td", {"class":"MMLItemTitle"}).find("a", {"class":"MMLIInfo_Album"}).text.replace("\xa0", " ").replace("\u2013", "-")
except:
album = "None"
info = {"rank":rank, "title":title, "artist":artist, "album":album}
return info
def saveimage(rank, date, tr):
img_url = tr.find("img")["src"]
filename = "{}\{:03}.jpeg".format(date, rank)
response = requests.get(img_url)
image = response.content
with open(filename, "wb") as file:
file.write(image)
date = input("언제[YYYYmmdd] 가요순위를 크롤링할까요? ")
pages = int(input("총 페이지[50/page]를 크롤링할까요? "))
os.system("md {}".format(date))
chart = []
for page in range(1, pages + 1):
url = "http://www.mnet.com/chart/TOP100/{}?pNum={}".format(date, page)
response = requests.get(url)
response.encoding = 'UTF-8'
data = response.text
html = BeautifulSoup(data, 'html.parser')
tr_list = html.find("table").find("tbody").find_all("tr")
for tr in tr_list:
info = findinfo(tr)
saveimage(info["rank"], date, tr)
chart.append([info["rank"], info["title"], info["artist"], info["album"]])
filename = "{}\chart-{}.csv".format(date, date)
file = open(filename, "w", newline = "")
csvfile = csv.writer(file)
for item in chart:
csvfile.writerow(item)
file.close()
# 네이버 속보페이지 크롤링하기
import requests
from bs4 import BeautifulSoup
date = input("언제[YYYYmmdd] 기사를 크롤링할까요? ")
pages = int(input("총 몇페이지[20/page]를 크롤링할까요? "))
for page in range(1, pages + 1):
url = "https://news.naver.com/main/list.nhn?mode=LSD&mid=sec&sid1=001&date={}&page={}".format(date, page)
response = requests.get(url)
text = response.text
html = BeautifulSoup(text, 'html.parser')
li_list = html.find("div", {"class":"list_body"}).find_all("li")
for li in li_list:
title = li.find("dt", "").text.strip("\n\r\t ")
try:
img_url = li.find("dt", {"class":"photo"}).find("img")['src'].split("?")[0]
except:
img_url = "None"
body = li.find("dd").find("span", {"class":"lede"}).text
writer = li.find("dd").find("span", {"class":"writing"}).text
print("기사제목 : {}".format(title))
print("기사사진 : {}".format(img_url))
print("기사본문 : {}".format(body))
print("기사제공 : {}".format(writer))
print("--------------------------------------------------------------------------------------")
'[개발노트] > # Python' 카테고리의 다른 글
ubuntu & Wordporess (2015.ver) (0) | 2018.10.18 |
---|---|
(기초2) 구문 예시 (0) | 2018.09.11 |
(기초1) 구문 예시 (0) | 2018.09.11 |
(기초1) 기호, 함수 (0) | 2018.09.10 |
(설치&참고유틸) (0) | 2018.09.09 |