from bs4 import BeautifulSoup
import requests
import urllib.parse
address = 'https://www.google.co.kr/search?num=100&ie=UTF-8&q=site:koreasw.org/wp-content/+아두이노+pdf'
resp = requests.get(address)
resp.raise_for_status()
resp.encoding='euc-kr'
html = resp.text
#html = bs.prettify()
bs = bs4.BeautifulSoup(html, 'html.parser')
for tags in bs.select('div.jfp3ef a'):
target= tags.get('href')
target= target.lstrip('/url?q=')
target= target.split('&')[0]
target= target.replace("%2520", " ")
target = urllib.parse.unquote(target) # 한글 화일명일 때
target = urllib.parse.unquote(target) # 한글 화일명일 때
print(target)
from bs4 import BeautifulSoup
import requests
address = 'https://www.google.co.kr/search?num=100&ie=UTF-8&q=site:http://www.unicitypartner.cz/pdf/ pdf'
source = requests.get(address)
html = source.text
soup = BeautifulSoup(html,'lxml')
for tag in soup.select('h3[class=r]'):
target = tag.a['href']
target= target.lstrip('/url?q=')
target= target.split('&')[0]
target= target.replace("%2520", " ")
print(target)
from bs4 import BeautifulSoup
import requests
address = 'https://www.google.co.kr/search?num=100&ie=UTF-8&q=site:www.asamanthinketh.net/files/ pdf'
resp = requests.get(address)
resp.raise_for_status()
resp.encoding='euc-kr'
html = resp.text
#html = bs.prettify()
bs = bs4.BeautifulSoup(html, 'html.parser')
for tags in bs.select('div.jfp3ef a'):
target= tags.get('href')
target= target.lstrip('/url?q=')
target= target.split('&')[0]
target= target.replace("%2520", " ")
print(target)
from bs4 import BeautifulSoup
import requests
address = 'https://www.google.co.kr/search?num=3000&start=100&ie=UTF-8&q=site:https://cdn-learn.adafruit.com/downloads/pdf/'
# num = 3000 전체 검색 갯수
# start =100 : 페이지당 검색결과 100 으로 설정 했을시
# 두번째 페이지 시작
# =200 : 세번째 페이지
resp = requests.get(address)
resp.raise_for_status()
resp.encoding='euc-kr'
html = resp.text
#html = bs.prettify()
bs = bs4.BeautifulSoup(html, 'html.parser')
for tags in bs.select('div.jfp3ef a'): #div.jfp3ef : 얖 테그의 클래스 속성
target= tags.get('href')
target= target.lstrip('/url?q=')
target= target.split('&')[0]
target= target.replace("%2520", " ")
print(target)
from bs4 import BeautifulSoup
import requests
address = 'https://www.google.co.kr/search?num=3000&start=100&ie=UTF-8&q=site:https://cdn-learn.adafruit.com/downloads/pdf/'
resp = requests.get(address)
resp.raise_for_status()
resp.encoding='euc-kr'
html = resp.text
#html = bs.prettify()
bs = bs4.BeautifulSoup(html, 'html.parser')
for tags in bs.select('div.jfp3ef a'):
target= tags.get('href')
target= target.lstrip('/url?q=')
target= target.split('&')[0]
target= target.replace("%2520", " ")
if '%' in target:
target = target.split('%') # 검색화일 뒤에 붙는 것 제거 %3Ftimestamp%3D1561616644
target = target[0] # 검색화일 뒤에 붙는 것 제거 %3Ftimestamp%3D1563452703
print(target)
'[ 기타 활동 ] > 파이썬' 카테고리의 다른 글
python에서 tinyDB사용하기 (0) | 2018.07.13 |
---|---|
MYSQL사용하기 (0) | 2018.07.13 |
정규 표현식 (Regular Expression) (0) | 2018.05.16 |
파이썬에서 엑셀 사용하기 (0) | 2018.05.16 |
Selenium 사용하기 (0) | 2018.05.16 |