40 lines
1.3 KiB
Python
Executable File
40 lines
1.3 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
from __future__ import absolute_import, division, print_function
|
|
import codecs
|
|
from operator import itemgetter
|
|
import sys
|
|
|
|
|
|
if __name__ == '__main__':
|
|
# Get frequency counts of each byte.
|
|
freqs = [0] * 256 # byte |--> frequency
|
|
for fpath in sys.argv[1:]:
|
|
with codecs.open(fpath, 'r', 'utf-8') as fin:
|
|
for line in fin:
|
|
for byte in line.strip().encode('utf-8'):
|
|
freqs[byte] += 1
|
|
|
|
# Create the inverse mapping.
|
|
orders = [0] * 256 # byte |--> sort index, descending
|
|
sort_by_freq = sorted(zip(range(256), freqs),
|
|
key=itemgetter(1), reverse=True)
|
|
for sort_idx, byte in enumerate(map(itemgetter(0), sort_by_freq)):
|
|
orders[byte] = sort_idx
|
|
|
|
# Now write Rust.
|
|
olines = ['pub const COMMON_INPUTS: [u8; 256] = [']
|
|
for byte in range(256):
|
|
olines.append(' %3d, // %r' % (orders[byte], chr(byte)))
|
|
olines.append('];')
|
|
olines.append('')
|
|
olines.append('pub const COMMON_INPUTS_INV: [u8; 256] = [')
|
|
for sort_idx in range(256):
|
|
byte = orders.index(sort_idx)
|
|
if byte <= 127:
|
|
olines.append(' b%r,' % chr(byte))
|
|
else:
|
|
olines.append(" b'\\x%x'," % byte)
|
|
olines.append('];')
|
|
print('\n'.join(olines))
|