Yuu

概要

在Android的FOTA更新包中,因为现在大家都去用payload.bin了,所以旧的Block更新的Sparse Image转Raw Image工具就没人更新了
之前留意到Brotli压缩版本的还有人写工具,但是最近发现在一些低性能电视盒子上使用的是lzma压缩方式就没有人在意了。
所以把这整个工具重构了一遍,使其更加好用且性能更好。

库安装

pip3 install Brotli

代码

#!/usr/bin/env python3
import sys
import os
import lzma
import brotli
from pathlib import Path
from typing import List, Tuple, BinaryIO

BLOCK_SIZE = 4096

def isTransferList(file_path: str, sample_size: int = 1024) -> bool:
    try:
        with open(file_path, 'rb') as f:
            return b'\0' not in f.read(sample_size)
    except IOError:
        return False

def decompressLzmaDat(input_file: str, output_file: str):
    with lzma.open(input_file, 'rb') as lzma_file:
        with open(output_file, 'wb') as decompressed_file:
            decompressed_file.write(lzma_file.read())

def decompressBrotliDat(input_file: str, output_file: str):
    with open(input_file, 'rb') as br_file:
        with open(output_file, 'wb') as decompressed_file:
            decompressed_file.write(brotli.decompress(br_file.read()))

def rangeSet(src: str) -> List[Tuple[int, int]]:
    src_set = [int(item) for item in src.split(',')]
    if len(src_set) != src_set[0] + 1:
        raise ValueError(f'Error parsing data to rangeSet: {src}')
    return [(src_set[i], src_set[i+1]) for i in range(1, len(src_set), 2)]

def parseTransferList(path: str) -> Tuple[int, int, List]:
    if not isTransferList(path):
        raise ValueError(f"The file '{path}' does not appear to be a valid transfer list file.")

    with open(path, 'r') as trans_list:
        version = int(trans_list.readline())
        new_blocks = int(trans_list.readline())

        if version >= 2:
            trans_list.readline()
            trans_list.readline()

        commands = []
        for line in trans_list:
            cmd, *params = line.split()
            if cmd in ['erase', 'new', 'zero']:
                commands.append([cmd, rangeSet(params[0])])
            elif not cmd[0].isdigit():
                raise ValueError(f'Command "{cmd}" is not valid.')

    return version, new_blocks, commands

def processFile(new_data_file: BinaryIO, output_img: BinaryIO, commands: List, max_file_size: int):
    for command in commands:
        if command[0] == 'new':
            for block in command[1]:
                begin, end = block
                block_count = end - begin
                print(f'Copying {block_count} blocks into position {begin}...')

                output_img.seek(begin * BLOCK_SIZE)
                output_img.write(new_data_file.read(block_count * BLOCK_SIZE))
        else:
            print(f'Skipping command {command[0]}...')

    if output_img.tell() < max_file_size:
        output_img.truncate(max_file_size)

def main(transfer_list_file: str, new_data_file: str, output_image_file: str):

    version, new_blocks, commands = parseTransferList(transfer_list_file)

    android_versions = {
        1: 'Android 5.0',
        2: 'Android 5.1',
        3: 'Android 6.0',
        4: 'Android 7.0 or Higher'
    }
    print(f'{android_versions.get(version, "Unknown")} Version Image Detected')

    output_img_path = Path(output_image_file)
    if output_img_path.exists():
        raise FileExistsError(f'Output file "{output_img_path}" already exists')

    decompressed_file = None
    if 'lzma' in new_data_file.lower():
        print("LZMA file detected. Decompressing...")
        decompressed_file = new_data_file + '.decompressed'
        decompressLzmaDat(new_data_file, decompressed_file)
        new_data_file = decompressed_file
        print("Decompression Completed!")
    elif new_data_file.lower().endswith('.br'):
        print("Brotli file detected. Decompressing...")
        decompressed_file = new_data_file + '.decompressed'
        decompressBrotliDat(new_data_file, decompressed_file)
        new_data_file = decompressed_file
        print("Decompression Completed!")

    with open(new_data_file, 'rb') as new_data, output_img_path.open('wb') as output_img:
        all_block_sets = [i for command in commands for i in command[1]]
        max_file_size = max(pair[1] for pair in all_block_sets) * BLOCK_SIZE

        processFile(new_data, output_img, commands, max_file_size)

    print(f'Done! Output image: {output_img_path.resolve()}')

    if decompressed_file:
        os.remove(decompressed_file)
        print("Temporary decompressed file removed.")

if __name__ == '__main__':
    if len(sys.argv) < 3:
        print('Usage: sdat2img_v2.py <transfer.list> <dat|dat.lzma|dat.br> [raw.img]')
        print('<transfer.list>:Transfer List File')
        print('<dat|dat.lzma|dat.br>:New Dat File (Support Uncompressed, LZMA or Brotli)')
        print('[raw.img]:Output File Name of Raw Image\n')
        sys.exit(1)

    transfer_list_file = sys.argv[1]
    new_data_file = sys.argv[2]
    
    if len(sys.argv) > 3:
        output_image_file = sys.argv[3]
    else:
        base_name = os.path.basename(sys.argv[1]).split('.')[0]
        output_image_file = f"{base_name}.raw.img"

    try:
        main(transfer_list_file, new_data_file, output_image_file)
    except Exception as e:
        print(f"An error occurred: {e}", file=sys.stderr)
        sys.exit(1)

可执行文件

For Windows x64



更新日志

💼首页
V01R:HTML基础页面
V02R:修改CSS
V03R:修改CSS V2

🔢Calculator
V01R: 基本计算功能
V02R: 新增10步计算流程回溯功能 新增黑暗模式支持
V03R: 新增触摸屏设备的上下滑动回溯计算流程的操作手势
V04R: 新增依据算式长度自动调整输出区域字体大小功能
V05R: 调整CSS布局
V06R: 新增浮点功能
V07R: 新增单数字删除功能

🈯Chinese Converter
V01R: 基本简繁转换功能
V02R: 新增基于台湾术语表的简繁转换功能

📑Document
V01R: 基本文本输入功能
V02R: 新增黑暗模式支持
V03R: 新增LocalStorage存储功能

🛠️Formatter
V01R: 基本JSON格式化压缩功能
V02R: 新增对XML的支持
V03R: 新增行号功能

🔑Help
V01R: 首次发布
V02R: 修改帮助文稿以负责新功能
V03R: 调整CSS布局

⌨️Playground
V01R: 首次发布
V02R: 新增黑暗模式支持

📊Presentation
V01R: 基本演示功能
V02R: 调整为整页型布局 新增一键新增页面功能 新增全屏演示功能
V03R: 新增LocalStorage存储功能

🤳🏽QR Code
V01R: 首次发布
V02R: 调整CSS布局

📈Spreadsheet
V01R: 首次发布
V02R: 新增SUM自动求和函数 新增黑暗模式支持
V03R: 新增AVERAGE/COUNT/COUNTA/COUNTIF/SUMIF/LEFT/RIGHT/MID/LEN/TRIM函数集
V04R: 函数算法优化
V05R: 函数算法优化_V2 新增LocalStorage存储功能
V06B: 新增单元格拖拽功能

🖼️Whiteboard
V01R: 首次发布
V02R: 新增黑暗模式支持 新增触摸屏设备支持
V03R: 完善黑暗模式支持 新增LocalStorage存储功能


近期在公司遇到一个应用和数据库间查询时的性能优化问题,在跟同事讨论解决方案时,最终选定了线性回归模型的办法。这篇短文旨在探讨利用决策树和线性回归模型来优化深度优先搜索(DFS)算法性能的Demo并且执行性能评估用于来用于其他选型参考比较。

树的构建与数据生成

尝试定义了一个简单的树结构和一个生成比较大的树的方法

class TreeNode:
    def __init__(self, value):
        self.value = value
        self.children = []

def createTreeBesar(depth, breadth):
    def addChildren(node, currentDepth):
        if currentDepth < depth:
            for _ in range(breadth):
                child = TreeNode(random.randint(1, 100))
                node.children.append(child)
                addChildren(child, currentDepth + 1)
    root = TreeNode(random.randint(1, 100))
    addChildren(root, 1)
    return root

产生样本数据

def generateSampleData():
    data = []
    for _ in range(10000):
        value = random.randint(1, 1000)
        priority = random.random()
        data.append([value, priority])
    data = np.array(data)
    X = data[:, :-1]
    y = data[:, -1]
    return X, y

模型的训练与加载

为避免浪费每次的运行时间和适合性能评估,将保存模型

def trainPriorityModel():
    X, y = generateSampleData()
    model = DecisionTreeRegressor()
    model.fit(X, y)
    joblib.dump(model, 'priorityModel.pkl')
    return model

def trainIndexModel(data):
    values = [node.value for node in data]
    positions = list(range(len(data)))
    model = LinearRegression()
    model.fit(np.array(values).reshape(-1, 1), positions)
    joblib.dump(model, 'indexModel.pkl')
    return model

def loadModel(filePath, trainFunc):
    if os.path.exists(filePath):
        return joblib.load(filePath)
    else:
        return trainFunc()

深度优先搜索和性能评估

def standardDfs(node, visited):
    if node is None or node in visited:
        return
    visited.add(node)
    for child in node.children:
        standardDfs(child, visited)

def indexedDfs(node, visited, indexModel, data):
    if node is None or node in visited:
        return
    visited.add(node)
    for child in node.children:
        locatedNode = locateNode(indexModel, child.value, data)
        indexedDfs(locatedNode, visited, indexModel, data)

def evaluatePerformance(treeRoot, priorityModel, indexModel, data):
    startTime = time.time()
    visitedStandard = set()
    standardDfs(treeRoot, visitedStandard)
    standardTime = time.time() - startTime
    print(f"Standard DFS Run Time: {standardTime:.6f} SEC")

    startTime = time.time()
    visitedIndexed = set()
    indexedDfs(treeRoot, visitedIndexed, indexModel, data)
    indexedTime = time.time() - startTime
    print(f"Indexed DFS Run Time: {indexedTime:.6f} SEC")

结果

结果好像很好的样子?
indexed_dfs_opt


感谢@whc2001提供的C#版本源代码
用于解压某电子琴学习机的数据包文件
闲着没事移植去了Python,方便使用

import os
import struct
from typing import List, Tuple

Encoding = 'gbk'
key = None

def getKey(file_data: bytes) -> bytes:
    key_a = file_data[1024:1024+32]
    key_b = file_data[1056:1056+32]
    ret = bytearray(32)
    for i in range(32):
        ret[i] = (key_b[i] - key_a[i]) % 256
    return ret


def getDir(file_data: bytes) -> List[Tuple[str, int, int]]:
    ret = []
    dir_num = struct.unpack('I', file_data[252:256])[0]
    for i in range(dir_num):
        offset = 1280 + i * 128
        item = bytearray(file_data[offset:offset+128])
        for j in range(len(item)):
            item[j] = (item[j] - key[j % 32]) % 256
        dir_name = item[:120].decode(Encoding).rstrip('\x00')
        dir_data_length = struct.unpack('I', item[120:124])[0] << 7
        dir_data_offset = struct.unpack('I', item[124:128])[0]
        ret.append((dir_name, dir_data_length, dir_data_offset))
    return ret


def getFile(file_data: bytes, dir_data_offset: int, dir_data_length: int) -> List[Tuple[str, int, int]]:
    ret = []
    for i in range(dir_data_length // 128):
        offset = dir_data_offset + i * 128
        item = bytearray(file_data[offset:offset+128])
        for j in range(len(item)):
            item[j] = (item[j] - key[j % 32]) % 256
        file_name = item[:64].decode(Encoding).rstrip('\x00')
        song_data_offset = struct.unpack('I', item[64:68])[0]
        song_data_length = struct.unpack('I', item[68:72])[0]
        ret.append((file_name, song_data_offset, song_data_length))
    return ret


def getSong(file_data: bytes, song_data_offset: int, song_data_length: int) -> bytes:
    return file_data[song_data_offset:song_data_offset+song_data_length]

def main(input_path: str, output_path: str):
    global key
    if not os.path.exists(input_path):
        print(f"File Not Found in {input_path}")
        exit(1)
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    with open(input_path, 'rb') as f:
        data = f.read()
    key = getKey(data)
    dirs = getDir(data)
    for dir_name, dir_data_length, dir_data_offset in dirs:
        dir_path = os.path.join(output_path, dir_name)
        os.makedirs(dir_path, exist_ok=True)
        songs = getFile(data, dir_data_offset, dir_data_length)
        for file_name, song_data_offset, song_data_length in songs:
            print(f"{dir_name} -> {file_name}")
            song_data = getSong(data, song_data_offset, song_data_length)
            with open(os.path.join(dir_path, f"{file_name}.mid"), 'wb') as f:
                f.write(song_data)

if __name__ == "__main__":
    import sys
    if len(sys.argv) != 3:
        print("Usage: PianoCatSongDataExtractor.py <INPUT_FILE> <OUTPUT_FOLDER>")
        exit(1)
    main(sys.argv[1], sys.argv[2])