PyPyは本当に速いのか?
先日のjson速度計測を使いまわして試してみた。
#PyPyではujsonがコンパイル通らずインストール不可でした
速いのはyamlだけ。PyPyはC言語でモジュールを書いた部分が混ざると逆に遅くなるとか?
PyPy専用に書けば速くなるのかもしれないけど、すべてのモジュールを書き直すのも無駄なのでPyPyを使うとしたら標準モジュールでやりきれるとこまでかな。実装しなおすならRPythonかせめてCythonで書く方が有益な気がします。
この前のとは動作速度がえらい違うのはへちょいノートPCで動かしたせいです。
サンプルデータ出力
import os import json import gzip import time import yaml import msgpack import bson dic = {} for rootdir, dirs, files in os.walk('/home/shive'): rootdir = os.path.abspath(rootdir) node = {} for name in files: path = os.path.join(rootdir, name) node[name] = { 'name': name, 'size': int(os.path.getsize(path)), 'mtime': time.ctime(os.path.getmtime(path)), 'ctime': time.ctime(os.path.getctime(path)), } dic[rootdir] = node dirs[:] = [s for s in dirs if s[0] != '.'] with gzip.open('data.json.gz', 'wb') as fp: json.dump(dic, fp, ensure_ascii=False) with gzip.open('data.yaml.gz', 'wb') as fp: yaml.safe_dump(dic, fp) with gzip.open('data.mpac.gz', 'wb') as fp: msgpack.dump(dic, fp) with gzip.open('data.bson.gz', 'wb') as fp: fp.write(bson.BSON.encode(dic))
計測コードは前回とほぼ同じ
import sys import gzip import timeit import json import yaml import bson import pymongo import msgpack try: import ujson except ImportError: ujson = None #======================================================================================================================= def json_loads(dump): return json.loads(dump) #======================================================================================================================= def ujson_loads(dump): return ujson.loads(dump) #======================================================================================================================= def yaml_loads(dump): return yaml.load(dump) #======================================================================================================================= def cyaml_loads(dump): return yaml.load(dump, Loader=yaml.CLoader) #======================================================================================================================= def bson_loads(dump): return bson.BSON(dump).decode() #======================================================================================================================= def msgpack_loads(dump): return msgpack.loads(dump) #======================================================================================================================= def test_load(): """load時間計測""" print '[load]' dump = gzip.open('data.json.gz', 'rb').read() print 'json.loads: %.5f sec' % (timeit.timeit(stmt=lambda : json_loads(dump), number=40) / 40) dump = gzip.open('data.yaml.gz', 'rb').read() print 'yaml.load: %.5f sec' % (timeit.timeit(stmt=lambda : yaml_loads(dump), number=1) / 1) if yaml.__with_libyaml__: dump = gzip.open('data.yaml.gz', 'rb').read() print 'yaml.load(libyaml): %.5f sec' % (timeit.timeit(stmt=lambda : cyaml_loads(dump), number=1) / 1) else: print 'yaml.load(libyaml):', '-' dump = gzip.open('data.bson.gz', 'rb').read() print 'bson.BSON.decode: %.5f sec' % (timeit.timeit(stmt=lambda : bson_loads(dump), number=110) / 110) dump = gzip.open('data.json.gz', 'rb').read() if ujson: print 'ujson.loads: %.5f sec' % (timeit.timeit(stmt=lambda : ujson_loads(dump), number=100) / 100) else: print 'ujson.loads: -' dump = gzip.open('data.mpac.gz', 'rb').read() print 'msgpack.loads: %.5f sec' % (timeit.timeit(stmt=lambda : msgpack_loads(dump), number=250) / 250) print #======================================================================================================================= def json_dumps(data): return json.dumps(data) #======================================================================================================================= def ujson_dumps(data): return ujson.dumps(data) #======================================================================================================================= def yaml_dumps(data): return yaml.dump(data) #======================================================================================================================= def cyaml_dumps(data): return yaml.dump(data, Dumper=yaml.CDumper) #======================================================================================================================= def bson_dumps(data): return bson.BSON.encode(data) #======================================================================================================================= def msgpack_dumps(data): return msgpack.dumps(data) #======================================================================================================================= def test_dump(): """dump時間計測""" dump = gzip.open('data.mpac.gz', 'rb').read() data = msgpack.loads(dump) print '[dump]' print 'json.dumps: %.5f sec' % (timeit.timeit(stmt=lambda : json_dumps(data), number=75) / 75) print 'yaml.dump: %.5f sec' % (timeit.timeit(stmt=lambda : yaml_dumps(data), number=1) / 1) if yaml.__with_libyaml__: print 'yaml.dump(libyaml): %.5f sec' % (timeit.timeit(stmt=lambda : cyaml_dumps(data), number=1) / 1) else: print 'yaml.dump(libyaml):', '-' print 'bson.BSON.encode: %.5f sec' % (timeit.timeit(stmt=lambda : bson_dumps(data), number=18) / 18) if ujson: print 'ujson.dumps: %.5f sec' % (timeit.timeit(stmt=lambda : ujson_dumps(data), number=116) / 116) else: print 'ujson.dumps: -' print 'msgpack.dumps: %.5f sec' % (timeit.timeit(stmt=lambda : msgpack.dumps(data), number=166) / 166) print #======================================================================================================================= def validationcheck(): """dump/loadがちゃんと可逆になっているかの動作確認""" print '[data]' dump = gzip.open('data.json.gz', 'rb').read() dump = json_dumps(json_loads(dump)) dump_len = len(dump) print 'data.json:', dump_len, 'bytes' dump = json_dumps(json_loads(dump)) assert len(dump) == dump_len, len(dump) if ujson: dump = ujson_dumps(ujson_loads(dump)) ### jsonとujsonとで出力の仕方が微妙に違うので再dumpしてからチェック dump_len = len(dump) print 'data.json:', dump_len, 'bytes', '(ujson)' dump = ujson_dumps(ujson_loads(dump)) assert len(dump) == dump_len, len(dump) else: print 'data.json: - (ujson)' dump = gzip.open('data.yaml.gz', 'rb').read() dump = yaml_dumps(yaml_loads(dump)) dump_len = len(dump) print 'data.yaml:', dump_len, 'bytes' dump = yaml_dumps(yaml_loads(dump)) assert len(dump) == dump_len if yaml.__with_libyaml__: dump = cyaml_dumps(cyaml_loads(dump)) dump_len = len(dump) print 'data.yaml:', dump_len, 'bytes', '(libyaml)' dump = cyaml_dumps(cyaml_loads(dump)) assert len(dump) == dump_len else: print 'data.yaml: - (libyaml)' dump = gzip.open('data.bson.gz', 'rb').read() dump_len = len(dump) print 'data.bson:', dump_len, 'bytes' dump = bson_dumps(bson_loads(dump)) assert len(dump) == dump_len dump = gzip.open('data.mpac.gz', 'rb').read() dump_len = len(dump) print 'data.mpac:', dump_len, 'bytes' dump = msgpack_dumps(msgpack_loads(dump)) assert len(dump) == dump_len print #======================================================================================================================= def version(): print '[version]' print 'sys.version:', sys.version print 'json.__version__:', json.__version__ print 'yaml.__version__:', yaml.__version__, 'libyaml=' + str(yaml.__with_libyaml__) print 'pymongo.version(bson):', pymongo.version if ujson: print 'ujson.__version__:', ujson.__version__ else: print 'ujson.__version__: -' print 'msgpack.version:', msgpack.version print #======================================================================================================================= def main(): version() validationcheck() test_load() test_dump() return 0 #======================================================================================================================= if __name__ == '__main__': sys.exit(main() or 0)
CPythonで実行
[version] sys.version: 2.7.3 (default, Aug 1 2012, 05:16:07) [GCC 4.6.3] json.__version__: 2.0.9 yaml.__version__: 3.10 libyaml=False pymongo.version(bson): 2.4.2 ujson.__version__: 1.30 msgpack.version: (0, 3, 0) [data] data.json: 2719381 bytes data.json: 2563594 bytes (ujson) data.yaml: 2594612 bytes data.yaml: - (libyaml) data.bson: 2610551 bytes data.mpac: 2180995 bytes [load] json.loads: 0.27414 sec yaml.load: 55.24576 sec yaml.load(libyaml): - bson.BSON.decode: 0.11302 sec ujson.loads: 0.11755 sec msgpack.loads: 0.06876 sec [dump] json.dumps: 0.15120 sec yaml.dump: 36.16442 sec yaml.dump(libyaml): - bson.BSON.encode: 0.19599 sec ujson.dumps: 0.09113 sec msgpack.dumps: 0.07143 sec
PyPyで実行
[version] sys.version: 2.7.2 (341e1e3821ff, Jun 07 2012, 15:40:31) [PyPy 1.9.0 with GCC 4.4.3] json.__version__: 2.0.9 yaml.__version__: 3.10 libyaml=False pymongo.version(bson): 2.4.2 ujson.__version__: - msgpack.version: (0, 3, 0) [data] data.json: 2719381 bytes data.json: - (ujson) data.yaml: 2594612 bytes data.yaml: - (libyaml) data.bson: 2610551 bytes data.mpac: 2180995 bytes [load] json.loads: 0.60501 sec yaml.load: 8.02591 sec yaml.load(libyaml): - bson.BSON.decode: 0.38399 sec ujson.loads: - msgpack.loads: 0.26650 sec [dump] json.dumps: 0.26044 sec yaml.dump: 8.60209 sec yaml.dump(libyaml): - bson.BSON.encode: 0.25331 sec ujson.dumps: - msgpack.dumps: 0.13120 sec