PCDuckHu's Story


# 빗썸 조회 크롤링


import requests

import json

import time

import csv

from datetime import datetime



count = int(input("몇 번 조회할까요? "))

stocks = []


for n in range(0, count):

response = requests.get("https://api.bithumb.com/public/ticker/BTC")

content = response.content

data = json.loads(content)

date = datetime.fromtimestamp(int(data['data']['date']) / 1000)

open1 = data['data']['opening_price']

close = data['data']['closing_price']

max = data['data']['max_price']

min = data['data']['min_price']

volume = data['data']['volume_1day']

stocks.append([date, open1, close, max, min, volume])

time.sleep(5)


"""

file = open('stocks.csv', 'w', newline = '')

csvfile = csv.writer(file)

for stock in stocks:

csvfile.writerow(stock)

file.close()

"""


with open('stocks.csv', 'w', newline = '') as file:

csvfile = csv.writer(file)

for stock in stocks:

csvfile.writerow(stock)


"""

with open('stocks.csv', 'w', newline = '') as file:

data = json.dumps(stocks, indent = '    ')

file.write(data)

"""



# index.html 문서 내용중 프로그래밍 언어, 운영체제 목록을 목록(list)화하기


import requests

from bs4 import BeautifulSoup



# 파일을 이용하여 크롤링을 진행하는 과정

file = open("index\\index.html", "r")

data = file.read()

file.close()



# 웹사이트를 통해 크롤링을 진행하는 과정


response = requests.get("http://192.168.101.200")

data = response.content


html = BeautifulSoup(data, "html.parser")

lists = []


for tag in ["ul", "ol"]:

li_list = html.find(tag).find_all("li")

temp_list = []

for li in li_list:

temp_list.append(li.text)

lists.append(temp_list)

print(lists)

print("프로그래밍 언어 목록 : {}".format(lists[0]))

print("운영체제 목록 : {}".format(lists[1]))






# table1.html 문서의 테이블(표)를 2차원 목록화하기


import requests

from bs4 import BeautifulSoup



response = requests.get("http://192.168.101.200/table3.html")

response.encoding = 'eucKR'

data = response.text

html = BeautifulSoup(data, 'html.parser')

tr_list = html.find("body").find_all("tr")

lists = []


for tr in tr_list:

temp_list = []

td_list = tr.find_all("td")

for td in td_list:

temp_list.append(td.text)

lists.append(temp_list)


for item in lists:

print(item)




# professors.html 문서의 테이블(표)를 2차원 목록화하기


response = requests.get("http://192.168.101.200/professors.html")

data = response.text

html = BeautifulSoup(data, 'html.parser')

tr_list = html.find("table").find("tbody").find_all("tr")

professors = []


for tr in tr_list:

no = tr.find("td", {"class":"number"}).text

name = tr.find("td", {"class":"professor"}).text

lecture = tr.find("td", {"class":"lecture"}).text

grade = tr.find("td", {"class":"grade"}).text

eval = tr.find("td", {"class":"evaluation"}).text

professors.append([no, name, lecture, grade, eval])


file = open("professors.csv", "w", newline = "")

csvfile = csv.writer(file)


for professor in professors:

csvfile.writerow(professor)


file.close()




# www.mnet.com 사이트에서 순위 50위까지 크롤링하기



import requests

from bs4 import BeautifulSoup

import csv

import os


def findinfo(tr):

rank = int(tr.find("td", {"class":"MMLItemRank"}).find("span").text.strip("위"))

title = tr.find("td", {"class":"MMLItemTitle"}).find("a", {"class":"MMLI_Song"}).text

try:

artist = tr.find("td", {"class":"MMLItemTitle"}).find("a", {"class":"MMLIInfo_Artist"}).text

except:

artist = "None"

try:

album = tr.find("td", {"class":"MMLItemTitle"}).find("a", {"class":"MMLIInfo_Album"}).text.replace("\xa0", " ").replace("\u2013", "-")

except:

album = "None"

info = {"rank":rank, "title":title, "artist":artist, "album":album}

return info


def saveimage(rank, date, tr):

img_url = tr.find("img")["src"]

filename = "{}\{:03}.jpeg".format(date, rank)

response = requests.get(img_url)

image = response.content

with open(filename, "wb") as file:

file.write(image)


date = input("언제[YYYYmmdd] 가요순위를 크롤링할까요? ")

pages = int(input("총 페이지[50/page]를 크롤링할까요? "))

os.system("md {}".format(date))


chart = []


for page in range(1, pages + 1):

url = "http://www.mnet.com/chart/TOP100/{}?pNum={}".format(date, page)

response = requests.get(url)

response.encoding = 'UTF-8'

data = response.text

html = BeautifulSoup(data, 'html.parser')

tr_list = html.find("table").find("tbody").find_all("tr")


for tr in tr_list:

info = findinfo(tr)

saveimage(info["rank"], date, tr)

chart.append([info["rank"], info["title"], info["artist"], info["album"]])


filename = "{}\chart-{}.csv".format(date, date)

file = open(filename, "w", newline = "")

csvfile = csv.writer(file)


for item in chart:

csvfile.writerow(item)


file.close()






# 네이버 속보페이지 크롤링하기



import requests

from bs4 import BeautifulSoup


date = input("언제[YYYYmmdd] 기사를 크롤링할까요? ")

pages = int(input("총 몇페이지[20/page]를 크롤링할까요? "))


for page in range(1, pages + 1):

url = "https://news.naver.com/main/list.nhn?mode=LSD&mid=sec&sid1=001&date={}&page={}".format(date, page)

response = requests.get(url)

text = response.text

html = BeautifulSoup(text, 'html.parser')


li_list = html.find("div", {"class":"list_body"}).find_all("li")

for li in li_list:

title = li.find("dt", "").text.strip("\n\r\t ")

try:

img_url = li.find("dt", {"class":"photo"}).find("img")['src'].split("?")[0]

except:

img_url = "None"

body = li.find("dd").find("span", {"class":"lede"}).text 

writer = li.find("dd").find("span", {"class":"writing"}).text

print("기사제목 : {}".format(title))

print("기사사진 : {}".format(img_url))

print("기사본문 : {}".format(body))

print("기사제공 : {}".format(writer))

print("--------------------------------------------------------------------------------------")































'[개발노트] > # Python' 카테고리의 다른 글

ubuntu & Wordporess (2015.ver)  (0) 2018.10.18
(기초2) 구문 예시  (0) 2018.09.11
(기초1) 구문 예시  (0) 2018.09.11
(기초1) 기호, 함수  (0) 2018.09.10
(설치&참고유틸)  (0) 2018.09.09