pipelining

Python http.client has pipelining disabled in the most hard coded fashion possible with python, making implementing pipelining deliberately difficult. BUT I COULD GET ALL THE PREVIEW THUMBS AS A ONE BIG BLOB NOT LEETLE PIECES
This commit is contained in:
user 2015-09-21 18:31:06 +00:00
parent fdf52ebd86
commit 8077a32b99
1 changed files with 126 additions and 67 deletions

View File

@ -1,9 +1,9 @@
#!/usr/bin/python3 #!/usr/bin/python3
from http.client import HTTPConnection,HTTPException from http.client import HTTPConnection,HTTPException,BadStatusLine
import json import json
import base64 import base64
from contextlib import closing from contextlib import closing
import sys,os,shutil import sys,os,shutil,time
def die(message,code=23): def die(message,code=23):
print(message,file=sys.stderr) print(message,file=sys.stderr)
@ -12,7 +12,8 @@ def die(message,code=23):
server = "minetest.fensta.bplaced.net" server = "minetest.fensta.bplaced.net"
skinsdir = "u_skins/textures/" skinsdir = "u_skins/textures/"
metadir = "u_skins/meta/" metadir = "u_skins/meta/"
curskin = 0 curskin = 580
curpage = 59
pages = None pages = None
def replace(location,base,encoding=None,path=None): def replace(location,base,encoding=None,path=None):
@ -34,8 +35,67 @@ def maybeReplace(location,base,encoding=None):
return replace(location,base,encoding=encoding,path=path)(handle) return replace(location,base,encoding=encoding,path=path)(handle)
return deco return deco
c = HTTPConnection(server) class Pipeline(list):
def addpage(page): "Gawd why am I being so elaborate?"
def __init__(self, threshold=10):
"threshold is how many requests in parallel to pipeline"
self.threshold = threshold
self.sent = True
def __enter__(self,*a):
self.reopen()
def __exit__(self):
self.drain()
def reopen(self):
self.c = HTTPConnection(server)
self.send()
def append(self,url,recv,diemessage):
super().append((url,recv,diemessage))
if len(self) > self.threshold:
self.send()
self.drain()
def trydrain(self):
for url,recv,diemessage in self:
try:
recv(self.c)
except BadStatusLine as e:
return False
except HTTPException as e:
die(diemessage+' (url='+url+')')
self.clear()
return True
def drain(self):
print('draining pipeline...')
assert self.sent, "Can't drain without sending the requests!"
self.sent = False
while trydrain() is not True:
self.c.close()
print('derped requesting',url)
print('drain failed, trying again')
time.sleep(1)
self.reopen()
def trysend(self):
for url,_,diemessage in pipeline:
try:
self.c.request("GET", url)
except BadStatusLine:
return False
except HTTPException as e:
die(diemessage)
return True
def send(self):
if self.sent: return
print('filling pipeline...')
while self.tryresend() is not True:
self.c.close()
print('derped resending')
time.sleep(1)
self.reopen()
self.sent = True
with Pipeline() as pipeline:
# two connections is okay, right? one for json, one for preview images
c = HTTPConnection(server)
def addpage(page):
global curskin, pages global curskin, pages
print("Page: " + str(page)) print("Page: " + str(page))
r = 0 r = 0
@ -63,7 +123,7 @@ def addpage(page):
if os.path.exists(preview): if os.path.exists(preview):
print('skin',curskin,'already retrieved') print('skin',curskin,'already retrieved')
continue continue
print('updating skin',curskin) print('updating skin',curskin,'id',s["id"])
foundOne = True foundOne = True
@maybeReplace(skinsdir, "character_" + str(curskin) + ".png") @maybeReplace(skinsdir, "character_" + str(curskin) + ".png")
def go(f): def go(f):
@ -77,26 +137,25 @@ def addpage(page):
f.write(str(s["author"]) + '\n') f.write(str(s["author"]) + '\n')
f.write(str(s["license"])) f.write(str(s["license"]))
url = "/skins/1/" + str(s["id"]) + ".png" url = "/skins/1/" + str(s["id"]) + ".png"
try: def tryget(c):
c.request("GET", url)
with closing(c.getresponse()) as r: with closing(c.getresponse()) as r:
if r.status != 200: if r.status != 200:
print("Error", r.status) print("Error", r.status)
continue return
@replace(skinsdir,previewbase,path=preview) @replace(skinsdir,previewbase,path=preview)
def go(f): def go(f):
shutil.copyfileobj(r,f) shutil.copyfileobj(r,f)
except HTTPException as e:
die("Couldn't get {} because of a {} (url={})".format( pipeline.append(url,tryget,
"Couldn't get {} because of a {}".format(
s["id"], s["id"],
e, e))
url))
if not foundOne: if not foundOne:
print("No skins updated on this page. Seems we're done?") print("No skins updated on this page. Seems we're done?")
raise SystemExit #raise SystemExit
addpage(1) addpage(1)
curpage = 1 while pages > curpage:
while pages > curpage:
curpage = curpage + 1 curpage = curpage + 1
addpage(curpage) addpage(curpage)
print("Skins have been updated!") print("Skins have been updated!")