pipelining

Python http.client has pipelining disabled in the most hard coded fashion possible with python, making implementing pipelining deliberately difficult. BUT I COULD GET ALL THE PREVIEW THUMBS AS A ONE BIG BLOB NOT LEETLE PIECES
This commit is contained in:
user 2015-09-21 18:31:06 +00:00
parent fdf52ebd86
commit 8077a32b99
1 changed files with 126 additions and 67 deletions

View File

@ -1,9 +1,9 @@
#!/usr/bin/python3 #!/usr/bin/python3
from http.client import HTTPConnection,HTTPException from http.client import HTTPConnection,HTTPException,BadStatusLine
import json import json
import base64 import base64
from contextlib import closing from contextlib import closing
import sys,os,shutil import sys,os,shutil,time
def die(message,code=23): def die(message,code=23):
print(message,file=sys.stderr) print(message,file=sys.stderr)
@ -12,7 +12,8 @@ def die(message,code=23):
server = "minetest.fensta.bplaced.net" server = "minetest.fensta.bplaced.net"
skinsdir = "u_skins/textures/" skinsdir = "u_skins/textures/"
metadir = "u_skins/meta/" metadir = "u_skins/meta/"
curskin = 0 curskin = 580
curpage = 59
pages = None pages = None
def replace(location,base,encoding=None,path=None): def replace(location,base,encoding=None,path=None):
@ -34,69 +35,127 @@ def maybeReplace(location,base,encoding=None):
return replace(location,base,encoding=encoding,path=path)(handle) return replace(location,base,encoding=encoding,path=path)(handle)
return deco return deco
c = HTTPConnection(server) class Pipeline(list):
def addpage(page): "Gawd why am I being so elaborate?"
global curskin, pages def __init__(self, threshold=10):
print("Page: " + str(page)) "threshold is how many requests in parallel to pipeline"
r = 0 self.threshold = threshold
try: self.sent = True
c.request("GET", "/api/get.json.php?getlist&page=" + str(page) + "&outformat=base64") def __enter__(self,*a):
r = c.getresponse() self.reopen()
except Exception: def __exit__(self):
if r != 0: self.drain()
if r.status != 200: def reopen(self):
die("Error", r.status) self.c = HTTPConnection(server)
return self.send()
def append(self,url,recv,diemessage):
data = r.read().decode() super().append((url,recv,diemessage))
l = json.loads(data) if len(self) > self.threshold:
if not l["success"]: self.send()
die("Success != True") self.drain()
r = 0 def trydrain(self):
pages = int(l["pages"]) for url,recv,diemessage in self:
foundOne = False try:
for s in l["skins"]: recv(self.c)
# make sure to increment this, even if the preview exists! except BadStatusLine as e:
curskin = curskin + 1 return False
previewbase = "character_" + str(curskin) + "_preview.png" except HTTPException as e:
preview = os.path.join(skinsdir, previewbase) die(diemessage+' (url='+url+')')
if os.path.exists(preview): self.clear()
print('skin',curskin,'already retrieved') return True
continue def drain(self):
print('updating skin',curskin) print('draining pipeline...')
foundOne = True assert self.sent, "Can't drain without sending the requests!"
@maybeReplace(skinsdir, "character_" + str(curskin) + ".png") self.sent = False
def go(f): while trydrain() is not True:
f.write(base64.b64decode(bytes(s["img"], 'utf-8'))) self.c.close()
f.close() print('derped requesting',url)
print('drain failed, trying again')
@maybeReplace(metadir, "character_" + str(curskin) + ".txt", time.sleep(1)
encoding='utf-8') self.reopen()
def go(f): def trysend(self):
f.write(str(s["name"]) + '\n') for url,_,diemessage in pipeline:
f.write(str(s["author"]) + '\n') try:
f.write(str(s["license"])) self.c.request("GET", url)
url = "/skins/1/" + str(s["id"]) + ".png" except BadStatusLine:
return False
except HTTPException as e:
die(diemessage)
return True
def send(self):
if self.sent: return
print('filling pipeline...')
while self.tryresend() is not True:
self.c.close()
print('derped resending')
time.sleep(1)
self.reopen()
self.sent = True
with Pipeline() as pipeline:
# two connections is okay, right? one for json, one for preview images
c = HTTPConnection(server)
def addpage(page):
global curskin, pages
print("Page: " + str(page))
r = 0
try: try:
c.request("GET", url) c.request("GET", "/api/get.json.php?getlist&page=" + str(page) + "&outformat=base64")
with closing(c.getresponse()) as r: r = c.getresponse()
except Exception:
if r != 0:
if r.status != 200: if r.status != 200:
print("Error", r.status) die("Error", r.status)
continue return
@replace(skinsdir,previewbase,path=preview)
def go(f): data = r.read().decode()
shutil.copyfileobj(r,f) l = json.loads(data)
except HTTPException as e: if not l["success"]:
die("Couldn't get {} because of a {} (url={})".format( die("Success != True")
s["id"], r = 0
e, pages = int(l["pages"])
url)) foundOne = False
if not foundOne: for s in l["skins"]:
print("No skins updated on this page. Seems we're done?") # make sure to increment this, even if the preview exists!
raise SystemExit curskin = curskin + 1
addpage(1) previewbase = "character_" + str(curskin) + "_preview.png"
curpage = 1 preview = os.path.join(skinsdir, previewbase)
while pages > curpage: if os.path.exists(preview):
curpage = curpage + 1 print('skin',curskin,'already retrieved')
addpage(curpage) continue
print("Skins have been updated!") print('updating skin',curskin,'id',s["id"])
foundOne = True
@maybeReplace(skinsdir, "character_" + str(curskin) + ".png")
def go(f):
f.write(base64.b64decode(bytes(s["img"], 'utf-8')))
f.close()
@maybeReplace(metadir, "character_" + str(curskin) + ".txt",
encoding='utf-8')
def go(f):
f.write(str(s["name"]) + '\n')
f.write(str(s["author"]) + '\n')
f.write(str(s["license"]))
url = "/skins/1/" + str(s["id"]) + ".png"
def tryget(c):
with closing(c.getresponse()) as r:
if r.status != 200:
print("Error", r.status)
return
@replace(skinsdir,previewbase,path=preview)
def go(f):
shutil.copyfileobj(r,f)
pipeline.append(url,tryget,
"Couldn't get {} because of a {}".format(
s["id"],
e))
if not foundOne:
print("No skins updated on this page. Seems we're done?")
#raise SystemExit
addpage(1)
while pages > curpage:
curpage = curpage + 1
addpage(curpage)
print("Skins have been updated!")