lz77压缩算法
创建:2023-10-27 16:47
更新:2023-10-27 17:12
#pragma once

#include <stdio.h>

template<int window = 4096, int cache = 64>
class lz77 {
    struct element {
        unsigned short start;
        unsigned char len;
        unsigned char code;
    };

    template<int _window, int _cache>
    struct windowcache {
        static const int size = _window + _cache;
        unsigned char buffer[size] = {0};
        int head = 0;    // 标记头所在位置

        void push(unsigned char c) {
            if (head == size) {
                head = 0;
            }
            buffer[head++] = c;
        }

        inline unsigned char get(int i) {
            return buffer[(head - i - 1 + size) % size];
        }

        element walk(int cut = 0) {
            element max = {0};
            element cur;
            int cache_left = _cache - cut - 1;     // 为了算法书写简单,这里确保缓冲区保留至少一个
            for (int i = 0; i < _window; i++) {    // window左边距离
                cur.len = 0;
                cur.start = i;
                for (int j = 0; j < cache_left; j++) {    // cache左边距离
                    // int w = size - i - j - 1;
                    // int c = _cache - 1 - j;
                    // if (w >= _cache && this->get(w) == this->get(c)) {
                    //     cur.len++;
                    // } else {
                    //     break;
                    // }
                    if (size - i - j - 1 >= _cache && buffer[(head + i + j) % size] == buffer[(head + j - _cache + size) % size]) {
                        cur.len++;
                    } else {
                        break;
                    }
                }
                if (max.len < cur.len) {
                    max = cur;
                }
            }
            // max.code = this->get(_cache - max.len - 1);
            max.code = buffer[(head - _cache + max.len + size) % size];
            return max;
        }
    };

public:
    struct exchange {
        void* target;
        void (*copy)(void* target, unsigned int offset, void* buffer, unsigned int len);
    };

    static unsigned int compress(unsigned int input_len, exchange reader, exchange writer) {
        windowcache<window, cache> wch;

        // 填充缓冲区
        int cut = 0;
        unsigned char _c = 0;
        for (unsigned int i = 0; i < cache; i++) {
            if (i < input_len) {
                reader.copy(reader.target, i, &_c, 1);
                wch.push(_c);
            } else {
                wch.push(0);
                cut++;
            }
        }

        unsigned int offset = cache;
        unsigned int out_size = 0;
        element get = {0};
        while (cut < cache) {
            get = wch.walk(cut);
            writer.copy(writer.target, out_size, &get, sizeof(get));
            out_size += sizeof(get);

            for (int j = 0; j < get.len + 1; j++) {
                if (offset + j < input_len) {
                    reader.copy(reader.target, offset + j, &_c, 1);
                    wch.push(_c);
                } else {
                    wch.push(0);
                    cut++;
                }
            }
            offset += get.len + 1;
        }

        return out_size;
    }

    static unsigned int decompress(unsigned int input_len, exchange reader, exchange writer) {
        if (input_len == 0) {
            return 0;
        }
        windowcache<window, 0> wch;

        element get;
        unsigned int offset = 0;
        unsigned int out_size = 0;
        unsigned char _c = 0;
        while (offset < input_len) {
            reader.copy(reader.target, offset, &get, sizeof(get));
            offset += sizeof(get);
            if (get.len > 0) {
                int p = window - get.start - 1;
                for (int i = 0; i < get.len; i++) {
                    _c = wch.get(p);
                    wch.push(_c);
                    writer.copy(writer.target, out_size, &_c, 1);
                    out_size += 1;
                }
            }
            wch.push(get.code);
            writer.copy(writer.target, out_size, &get.code, 1);
            out_size += 1;
        }
        return out_size;
    }
};
input len : 158208
read: 0 ms
output len: 27388
compress 393 ms
    393.129761 kb/s
write: 0 ms
decompress: 1 ms
    154500.000000 kb/s