from urllib.request import urlretrieve
import urllib.parse
from urllib.parse import urlencode, urlparse, parse_qs
import webbrowser
from bs4 import BeautifulSoup
import requests
address = 'https://www.google.co.kr/search?num=10&ie=UTF-8&q=site:pdf.th7.cn/down/files+arduino+pdf'
# Default Google search address start
file = open( "OCR.txt", "rt" )
# Open text document that contains the question
word = file.read()
file.close()
myList = [item for item in word.split('\n')]
newString = ' '.join(myList)
# The question is on multiple lines so this joins them together with proper spacing
#print(newString)
qstr = urllib.parse.quote_plus(newString)
# Encode the string
newWord = address + qstr
# Combine the base and the encoded query
# print(newWord)
source = requests.get(address)
html = source.text
soup = BeautifulSoup(html, 'lxml')
for tag in soup.select('h3[class=r]'):
target = tag.a['href']
target= target.lstrip('/url?q=')
target= target.split('&')[0]
target= target.replace("%2520", " ")
print(target)
'[ 기타 활동 ] > 파이썬' 카테고리의 다른 글
특정폴더의 PDF 화일 받기 (0) | 2018.05.16 |
---|---|
파이썬 urllib를 사용하여 URL에서 이미지를 다운로드하지만 HTTP 오류 403 수신 : 금지됨 (0) | 2018.05.16 |
Python으로 파일 다운로드 (0) | 2018.05.16 |
Google Search 사용법 (0) | 2018.05.15 |
뷰티플수프 문서 (0) | 2018.05.15 |