pipelining

Python http.client has pipelining disabled in the most hard coded fashion possible with python, making implementing pipelining deliberately difficult. BUT I COULD GET ALL THE PREVIEW THUMBS AS A ONE BIG BLOB NOT LEETLE PIECES
This commit is contained in:
user 2015-09-24 20:39:48 +00:00
parent 8f7e189e0a
commit 9027231f7f
1 changed files with 125 additions and 66 deletions

View File

@ -1,9 +1,9 @@
#!/usr/bin/python3
from http.client import HTTPConnection,HTTPException
from http.client import HTTPConnection,HTTPException,BadStatusLine
import json
import base64
from contextlib import closing
import sys,os,shutil
import sys,os,shutil,time
def die(message,code=23):
print(message,file=sys.stderr)
@ -13,6 +13,7 @@ server = "minetest.fensta.bplaced.net"
skinsdir = "u_skins/textures/"
metadir = "u_skins/meta/"
curskin = 0
curpage = 1
pages = None
def replace(location,base,encoding=None,path=None):
@ -34,69 +35,127 @@ def maybeReplace(location,base,encoding=None):
return replace(location,base,encoding=encoding,path=path)(handle)
return deco
c = HTTPConnection(server)
def addpage(page):
global curskin, pages
print("Page: " + str(page))
r = 0
try:
c.request("GET", "/api/get.json.php?getlist&page=" + str(page) + "&outformat=base64")
r = c.getresponse()
except Exception:
if r != 0:
if r.status != 200:
die("Error", r.status)
return
data = r.read().decode()
l = json.loads(data)
if not l["success"]:
die("Success != True")
r = 0
pages = int(l["pages"])
foundOne = False
for s in l["skins"]:
# make sure to increment this, even if the preview exists!
curskin = curskin + 1
previewbase = "character_" + str(curskin) + "_preview.png"
preview = os.path.join(skinsdir, previewbase)
if os.path.exists(preview):
print('skin',curskin,'already retrieved')
continue
print('updating skin',curskin)
foundOne = True
@maybeReplace(skinsdir, "character_" + str(curskin) + ".png")
def go(f):
f.write(base64.b64decode(bytes(s["img"], 'utf-8')))
f.close()
@maybeReplace(metadir, "character_" + str(curskin) + ".txt",
encoding='utf-8')
def go(f):
f.write(str(s["name"]) + '\n')
f.write(str(s["author"]) + '\n')
f.write(str(s["license"]))
url = "/skins/1/" + str(s["id"]) + ".png"
class Pipeline(list):
"Gawd why am I being so elaborate?"
def __init__(self, threshold=10):
"threshold is how many requests in parallel to pipeline"
self.threshold = threshold
self.sent = True
def __enter__(self,*a):
self.reopen()
def __exit__(self):
self.drain()
def reopen(self):
self.c = HTTPConnection(server)
self.send()
def append(self,url,recv,diemessage):
super().append((url,recv,diemessage))
if len(self) > self.threshold:
self.send()
self.drain()
def trydrain(self):
for url,recv,diemessage in self:
try:
recv(self.c)
except BadStatusLine as e:
return False
except HTTPException as e:
die(diemessage+' (url='+url+')')
self.clear()
return True
def drain(self):
print('draining pipeline...')
assert self.sent, "Can't drain without sending the requests!"
self.sent = False
while trydrain() is not True:
self.c.close()
print('derped requesting',url)
print('drain failed, trying again')
time.sleep(1)
self.reopen()
def trysend(self):
for url,_,diemessage in pipeline:
try:
self.c.request("GET", url)
except BadStatusLine:
return False
except HTTPException as e:
die(diemessage)
return True
def send(self):
if self.sent: return
print('filling pipeline...')
while self.tryresend() is not True:
self.c.close()
print('derped resending')
time.sleep(1)
self.reopen()
self.sent = True
with Pipeline() as pipeline:
# two connections is okay, right? one for json, one for preview images
c = HTTPConnection(server)
def addpage(page):
global curskin, pages
print("Page: " + str(page))
r = 0
try:
c.request("GET", url)
with closing(c.getresponse()) as r:
c.request("GET", "/api/get.json.php?getlist&page=" + str(page) + "&outformat=base64")
r = c.getresponse()
except Exception:
if r != 0:
if r.status != 200:
print("Error", r.status)
continue
@replace(skinsdir,previewbase,path=preview)
def go(f):
shutil.copyfileobj(r,f)
except HTTPException as e:
die("Couldn't get {} because of a {} (url={})".format(
s["id"],
e,
url))
if not foundOne:
print("No skins updated on this page. Seems we're done?")
raise SystemExit
addpage(1)
curpage = 1
while pages > curpage:
curpage = curpage + 1
addpage(curpage)
print("Skins have been updated!")
die("Error", r.status)
return
data = r.read().decode()
l = json.loads(data)
if not l["success"]:
die("Success != True")
r = 0
pages = int(l["pages"])
foundOne = False
for s in l["skins"]:
# make sure to increment this, even if the preview exists!
curskin = curskin + 1
previewbase = "character_" + str(curskin) + "_preview.png"
preview = os.path.join(skinsdir, previewbase)
if os.path.exists(preview):
print('skin',curskin,'already retrieved')
continue
print('updating skin',curskin,'id',s["id"])
foundOne = True
@maybeReplace(skinsdir, "character_" + str(curskin) + ".png")
def go(f):
f.write(base64.b64decode(bytes(s["img"], 'utf-8')))
f.close()
@maybeReplace(metadir, "character_" + str(curskin) + ".txt",
encoding='utf-8')
def go(f):
f.write(str(s["name"]) + '\n')
f.write(str(s["author"]) + '\n')
f.write(str(s["license"]))
url = "/skins/1/" + str(s["id"]) + ".png"
def tryget(c):
with closing(c.getresponse()) as r:
if r.status != 200:
print("Error", r.status)
return
@replace(skinsdir,previewbase,path=preview)
def go(f):
shutil.copyfileobj(r,f)
pipeline.append(url,tryget,
"Couldn't get {} because of a {}".format(
s["id"],
e))
if not foundOne:
print("No skins updated on this page. Seems we're done?")
#raise SystemExit
addpage(1)
while pages > curpage:
curpage = curpage + 1
addpage(curpage)
print("Skins have been updated!")