libStatGen Software
1
|
00001 /* 00002 * Copyright (C) 2010 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #include "MiniDeflate.h" 00019 00020 // Convenient constants and macros 00021 // 00022 #define EMPTY_KEY 123 00023 #define uchar unsigned char 00024 00025 #ifndef min 00026 #define min(a,b) (((a)<(b))?(a):(b)) 00027 #endif 00028 00029 MiniDeflate::MiniDeflate() 00030 { 00031 buffer = new uchar [BUFFER_SIZE + 5]; 00032 hash_keys = new uchar [HASH_SIZE]; 00033 hash_values = new uchar * [HASH_SIZE * HASH_DEPTH]; 00034 } 00035 00036 MiniDeflate::~MiniDeflate() 00037 { 00038 delete [] buffer; 00039 delete [] hash_keys; 00040 delete [] hash_values; 00041 } 00042 00043 void MiniDeflate::EvaluateMatch(unsigned char * in, int len, int hash, 00044 unsigned char * & best_pos, int & best_match) 00045 { 00046 int max = min(len, 0xFFFF + 66); 00047 00048 for (int i = HASH_DEPTH; i > 0; i--) 00049 // Check each possible match (up to HASH_DEPTH) 00050 { 00051 uchar * pos = hash_values[hash * HASH_DEPTH + ((hash_keys[hash] + i) % HASH_DEPTH)]; 00052 00053 if (pos == NULL || in - pos >= 0x4001) break; 00054 00055 int match = 0; 00056 00057 while (match < max && pos[match] == in[match]) 00058 match++; 00059 00060 if (match > best_match) 00061 { 00062 best_match = match; 00063 best_pos = pos; 00064 } 00065 } 00066 00067 // If string seems pretty unique, add to hash table 00068 if (best_match < OKAY_MATCH) 00069 { 00070 int delta = hash_keys[hash] = (uchar)((hash_keys[hash] + 1) & 7); 00071 hash_values[hash * 8 + delta] = in; 00072 } 00073 } 00074 00075 void MiniDeflate::QuoteLiterals(unsigned char * & in, int literal, 00076 unsigned char * & out, int & buffer_len, 00077 FILE * output) 00078 { 00079 if (buffer_len < 0) 00080 { 00081 fwrite(buffer, out - buffer, 1, output); 00082 buffer_len = BUFFER_SIZE; 00083 out = buffer; 00084 } 00085 00086 while (buffer_len < literal) 00087 { 00088 literal -= buffer_len; 00089 while (buffer_len--) 00090 { 00091 *out = *in; 00092 in++; 00093 out++; 00094 } 00095 fwrite(buffer, BUFFER_SIZE, 1, output); 00096 buffer_len = BUFFER_SIZE; 00097 out = buffer; 00098 } 00099 00100 while (literal--) 00101 { 00102 *out = *in; 00103 in++; 00104 out++; 00105 buffer_len--; 00106 } 00107 } 00108 00109 void MiniDeflate::OutputLiterals(unsigned char * & in, int literal, 00110 unsigned char * & out, int & buffer_len, 00111 FILE * output) 00112 { 00113 while (literal > 0) 00114 if (literal < 16) 00115 { 00116 *out = (char) literal; 00117 out++; 00118 buffer_len--; 00119 QuoteLiterals(in, literal, out, buffer_len, output); 00120 break; 00121 } 00122 else if (literal < 31) 00123 { 00124 *out = 15; 00125 out++; 00126 buffer_len--; 00127 QuoteLiterals(in, 15, out, buffer_len, output); 00128 *out = (uchar)(literal - 15); 00129 out++; 00130 buffer_len--; 00131 QuoteLiterals(in, literal - 15, out, buffer_len, output); 00132 break; 00133 } 00134 else 00135 { 00136 int length = min(literal, 0xFFFF + 31); 00137 literal -= length; 00138 length -= 31; 00139 00140 *out = 0; 00141 out++; 00142 *out = (uchar)(length >> 8); 00143 out++; 00144 *out = (uchar)(length & 0xFF); 00145 out++; 00146 buffer_len -= 3; 00147 00148 QuoteLiterals(in, length + 31, out, buffer_len, output); 00149 } 00150 } 00151 00152 00153 void MiniDeflate::Deflate(FILE * output, void * void_input, size_t len) 00154 { 00155 uchar * in = (uchar *) void_input; 00156 uchar * out = (uchar *) buffer; 00157 int buffer_len = BUFFER_SIZE; 00158 00159 for (int i = 0; i < HASH_SIZE; i++) hash_keys[i] = EMPTY_KEY; 00160 00161 uchar * in2 = in; 00162 00163 while (len > 2) 00164 { 00165 // Hash the current input value 00166 int hash = ((in[0] << 16) | (in[1] << 8) | in[2]) % HASH_SIZE; 00167 00168 if (hash_keys[hash] != EMPTY_KEY) 00169 // Possible matches in hash table 00170 { 00171 int best_match = 0; 00172 uchar * best_pos = NULL; 00173 00174 EvaluateMatch(in, len, hash, best_pos, best_match); 00175 00176 // If there are no decent matches 00177 if (best_match < 3) 00178 { 00179 in++; 00180 len--; 00181 continue; 00182 } 00183 00184 // Try look ahead if match isn't great 00185 while (best_match < OKAY_MATCH && len > 3) 00186 { 00187 // Peek to see if we could get a better match 00188 int next_hash = ((in[1] << 16) | (in[2] << 8) | in[3]) % HASH_SIZE; 00189 00190 if (hash_keys[next_hash] == EMPTY_KEY) break; 00191 00192 int next_match = 0; 00193 uchar * next_pos = NULL; 00194 00195 EvaluateMatch(in + 1, len - 1, next_hash, next_pos, next_match); 00196 00197 // Didn't find a better match 00198 if (next_match <= best_match + 1) break; 00199 00200 // Found a better match, so try again 00201 in++; 00202 len--; 00203 best_match = next_match; 00204 best_pos = next_pos; 00205 } 00206 00207 int best_offset = in - best_pos - 1; 00208 00209 // This is where we output stuff 00210 // Check if we have some literals to output first 00211 OutputLiterals(in2, in - in2, out, buffer_len, output); 00212 00213 in2 = in += best_match; 00214 len -= best_match; 00215 00216 if (best_match < 17 && best_offset < 0x1000) 00217 { 00218 *out = (uchar)(((best_match - 1) << 4) | (best_offset >> 8)); 00219 out++; 00220 *out = (uchar)(best_offset & 0xFF); 00221 out++; 00222 buffer_len -= 2; 00223 } 00224 else if (best_match < 66) 00225 { 00226 *out = (uchar)(16 | (best_offset >> 10)); 00227 out++; 00228 *out = (uchar)((best_offset >> 2) & 0xFF); 00229 out++; 00230 *out = (uchar)((best_offset << 6) | (best_match - 2)); 00231 out++; 00232 buffer_len -= 3; 00233 } 00234 else 00235 { 00236 *out = (uchar)(16 | (best_offset >> 10)); 00237 out++; 00238 *out = (uchar)((best_offset >> 2) & 0xFF); 00239 out++; 00240 *out = (uchar)(best_offset << 6); 00241 out++; 00242 best_match -= 66; 00243 *out = (uchar)(best_match >> 8); 00244 out++; 00245 *out = (uchar)(best_match & 0xFF); 00246 out++; 00247 buffer_len -= 5; 00248 } 00249 00250 if (buffer_len <= 0) 00251 { 00252 fwrite(buffer, out - buffer, 1, output); 00253 buffer_len = BUFFER_SIZE; 00254 out = buffer; 00255 } 00256 } 00257 // Never seen this sequence before 00258 else 00259 { 00260 hash_keys[hash] = 0; 00261 for (int i = 1; i < HASH_DEPTH; i++) hash_values[hash * 8 + i] = NULL; 00262 hash_values[hash * 8] = in; 00263 in++; 00264 len--; 00265 } 00266 } 00267 00268 // Check if we have some trailing literals to output 00269 in += len; 00270 OutputLiterals(in2, in - in2, out, buffer_len, output); 00271 00272 // Flush output 00273 if (out != buffer) fwrite(buffer, out - buffer, 1, output); 00274 } 00275 00276 void MiniDeflate::CiteLiteral(unsigned char * & out, int literal, 00277 unsigned char * & in, int & buffer_len, 00278 FILE * input) 00279 { 00280 while (buffer_len < literal) 00281 { 00282 literal -= buffer_len; 00283 while (buffer_len--) 00284 { 00285 *out = *in; 00286 in++; 00287 out++; 00288 } 00289 buffer_len = fread(buffer + 5, 1, BUFFER_SIZE, input); 00290 in = buffer + 5; 00291 } 00292 00293 while (literal--) 00294 { 00295 *out = *in; 00296 in++; 00297 out++; 00298 buffer_len--; 00299 } 00300 } 00301 00302 00303 void MiniDeflate::Inflate(FILE * input, void * void_output, size_t len) 00304 { 00305 uchar * out = (uchar *) void_output; 00306 uchar * in = (uchar *) buffer + 5; 00307 int buffer_len = BUFFER_SIZE; 00308 00309 buffer_len = fread(buffer + 5, 1, BUFFER_SIZE, input); 00310 00311 while (len) 00312 { 00313 int match_len = *in >> 4; 00314 00315 // Matching a literal 00316 if (match_len == 0) 00317 { 00318 match_len = *in & 0x0F; 00319 in++, buffer_len--; 00320 00321 // If match_len == 0 then string is longer than 30 characters 00322 // Strings of 16 - 30 characters are encoded as two short strings 00323 if (match_len == 0) 00324 { 00325 match_len = (in[0] << 8) + in[1] + 31; 00326 in += 2; 00327 buffer_len -= 2; 00328 } 00329 00330 CiteLiteral(out, match_len, in, buffer_len, input); 00331 len -= match_len; 00332 } 00333 // Long match, 14 bit offset 00334 else if (match_len == 1) 00335 { 00336 int offset = (((in[0] & 0x0F) << 10) | (in[1] << 2) | (in[2] >> 6)) + 1; 00337 match_len = (in[2] & 0x3F) + 2; 00338 in += 3; 00339 buffer_len -= 3; 00340 00341 if (match_len == 2) 00342 { 00343 match_len = ((in[0] << 8) | in[1]) + 66; 00344 in += 2; 00345 buffer_len -= 2; 00346 } 00347 00348 uchar * match_pos = out - offset; 00349 len -= match_len; 00350 while (match_len--) 00351 { 00352 *out = *match_pos; 00353 out++, match_pos++; 00354 } 00355 } 00356 // Typical short match 00357 else 00358 { 00359 int offset = (((in[0] & 0x0F) << 8) | in[1]) + 1; 00360 in += 2; 00361 buffer_len -= 2; 00362 00363 uchar * match_pos = out - offset; 00364 len -= ++match_len; 00365 while (match_len--) 00366 { 00367 *out = *match_pos; 00368 out++, match_pos++; 00369 } 00370 } 00371 00372 if (buffer_len < 5) 00373 { 00374 uchar * in2 = (uchar *) buffer + 5 - buffer_len; 00375 while (in2 != buffer + 5) 00376 { 00377 *in2 = *in; 00378 in2++; 00379 in++; 00380 } 00381 00382 in = buffer + 5 - buffer_len; 00383 buffer_len += fread(buffer + 5, 1, BUFFER_SIZE, input); 00384 } 00385 } 00386 00387 if (buffer_len) fseek(input, -buffer_len, SEEK_CUR); 00388 } 00389 00390 00391