module GZip where import Prelude hiding (read, unzip) import Control.Monad.State import Control.Monad.Reader (ReaderT) import Data.Array import Data.Bits import Data.Char import Data.List hiding (unzip) import Data.Word import qualified Brianweb.Stream.Input as IS import Brianweb.Control.Monad.Fail import Brianweb.Stream.Consumer type Reader m a = Consumer m Word8 a read :: MonadFail m => Reader m Word8 read = consume read2 :: MonadFail m => Reader m Word16 read2 = do lo <- read hi <- read return ((fromIntegral hi `shiftL` 8) .|. fromIntegral lo) read2' :: MonadFail m => Reader m Word16 read2' = do hi <- read lo <- read return ((fromIntegral hi `shiftL` 8) .|. fromIntegral lo) read4 :: MonadFail m => Reader m Word32 read4 = do lo <- read2 hi <- read2 return ((fromIntegral hi `shiftL` 16) .|. fromIntegral lo) read4' :: MonadFail m => Reader m Word32 read4' = do hi <- read2' lo <- read2' return ((fromIntegral hi `shiftL` 16) .|. fromIntegral lo) readc :: MonadFail m => Reader m Char readc = do x <- read return (chr (fromIntegral x)) expect :: (MonadFail m, Show a, Eq a) => m a -> a -> m () expect act x = do y <- act if x /= y then mfail $ "Expected: " ++ show x ++ " actual: " ++ show y else return () many :: MonadFail m => Int -> m a -> m [a] many 0 _ = return [] many n act = do x <- act xs <- many (n - 1) act return (x:xs) manyUntil :: (MonadFail m, Eq a) => a -> Reader m a -> Reader m [a] manyUntil term act = do x <- act if x == term then return [] else do xs <- manyUntil term act return (x:xs) data Member = Member { m_text :: Bool, m_mtime :: Word32, m_xfl :: Word8, m_os :: Word8, m_extra :: Maybe [Word8], m_fname :: Maybe String, m_comment :: Maybe String, m_crc16 :: Maybe Word16, m_data :: [Word8], m_crc32 :: Word32, m_size :: Word32 } deriving (Eq,Show) fTEXT,fHCRC,fEXTRA,fFNAME,fCOMMENT :: Int fTEXT = 0 fHCRC = 1 fEXTRA = 2 fFNAME = 3 fCOMMENT = 4 ungzip :: MonadFail m => Reader m [Member] ungzip = do x <- consume' case x of Just 31 -> do x <- ungzip' xs <- ungzip return (x:xs) Just _ -> mfail "illegal gzip file" Nothing -> return [] ungzip' :: MonadFail m => Reader m Member ungzip' = do expect read 139 expect read 8 flg <- read mtime <- read4 xfl <- read os <- read extra <- whenBit flg fEXTRA (read2 >>= \len -> many (fromIntegral len) read) fname <- whenBit flg fFNAME (manyUntil '\0' readc) comment <- whenBit flg fCOMMENT (manyUntil '\0' readc) crc16 <- whenBit flg fHCRC read2 payload <- deflate crc32_ <- read4 size <- read4 when (crc32 payload /= crc32_) (mfail "crc mismatch") return Member { m_text=testBit flg fTEXT, m_mtime=mtime, m_extra=extra, m_fname=fname, m_comment=comment, m_crc16=crc16, m_xfl=xfl, m_os=os, m_data=payload, m_crc32=crc32_, m_size=size} where whenBit flg bit act | testBit flg bit = act >>= \res -> return (Just res) | otherwise = return Nothing deflate :: MonadFail m => Reader m [Word8] deflate = evalStateT (deflate' []) [] type BitReader m a = StateT [Bool] (ReaderT (IS.Stream m Word8) m) a readBit :: MonadFail m => BitReader m Bool readBit = do buf <- get case buf of (x:xs) -> put xs >> return x [] -> do byte <- lift read put (explode byte) readBit readBits :: MonadFail m => Int -> BitReader m Int readBits 0 = return 0 readBits n = do lsb <- readBit rest <- readBits (n - 1) return ((rest `shiftL` 1) + fromEnum lsb) explode' :: Bits a => a -> Int -> [Bool] explode' x y = map (testBit x) [0.. y - 1] explode :: Bits a => a -> [Bool] explode x = explode' x (bitSize x) deflate' :: MonadFail m => [Word8] -> BitReader m [Word8] deflate' acc = do bfinal <- readBit btype <- readBits 2 case btype of 0 -> do put [] len <- lift read2 nlen <- lift read2 when (nlen /= complement len) (mfail "nlen /= len") xs <- many (fromIntegral len) (lift read) if bfinal then return (reverse xs ++ acc) else deflate' (reverse xs ++ acc) 1 -> block bfinal fixedLitTable fixedDistTable acc 2 -> do hlitlen <- readBits 5 hdistlen <- readBits 5 hclen <- readBits 4 hc <- many (hclen + 4) (readBits 3) let hc_table = mkTable (case hc of [x16,x17,x18,x0,x8,x7,x9,x6,x10,x5,x11,x4,x12,x3,x13,x2,x14,x1,x15] -> [x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,x16,x17,x18] [x16,x17,x18,x0,x8,x7,x9,x6,x10,x5,x11,x4,x12,x3,x13,x2,x14] -> [x0,0, x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,0, x16,x17,x18] [x16,x17,x18,x0,x8,x7,x9,x6,x10,x5,x11,x4,x12,x3,x13,x2] -> [x0,0, x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,0, 0, x16,x17,x18] [x16,x17,x18,x0,x8,x7,x9,x6,x10,x5,x11,x4,x12,x3,x13] -> [x0,0, 0, x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,0, 0, x16,x17,x18] [x16,x17,x18,x0,x8,x7,x9,x6,x10,x5,x11,x4,x12,x3] -> [x0,0, 0, x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,0, 0, 0, x16,x17,x18] _ -> error ("hc: " ++ show hc)) -- mfail (show hc_table) hlit <- hc_decode hc_table (hlitlen + 257) init_prev hdist <- hc_decode hc_table (hdistlen + 1) init_prev block bfinal (mkTable hlit) (mkTable hdist) acc _ -> mfail $ "btype: " ++ show btype where init_prev = error "hc_decode: init_prev" hc_decode _ n _ | n < 0 = error "hc_decode: n < 0" hc_decode _ 0 _ = return [] hc_decode table n prev = do x <- decode table case x of 16 -> do n' <- liftM (+3) (readBits 2) xs <- hc_decode table (n - n') prev return (replicate n' prev ++ xs) 17 -> do n' <- liftM (+3) (readBits 3) xs <- hc_decode table (n - n') 0 return (replicate n' 0 ++ xs) 18 -> do n' <- liftM (+11) (readBits 7) xs <- hc_decode table (n - n') 0 return (replicate n' 0 ++ xs) _ | x < 16 -> liftM (x:) (hc_decode table (n - 1) x) block :: MonadFail m => Bool -> Table -> Table -> [Word8] -> BitReader m [Word8] block final lit_table dist_table acc = do x <- decode lit_table -- mfail (show x) if x == 256 then if {- error $ show -} final then return (reverse acc) else deflate' acc else if x < 256 then block final lit_table dist_table (fromIntegral x:acc) else do length <- case x of 257 -> return 3 258 -> return 4 259 -> return 5 260 -> return 6 261 -> return 7 262 -> return 8 263 -> return 9 264 -> return 10 265 -> liftM (+11) (readBits 1) 266 -> liftM (+13) (readBits 1) 267 -> liftM (+15) (readBits 1) 268 -> liftM (+17) (readBits 1) 269 -> liftM (+19) (readBits 2) 270 -> liftM (+23) (readBits 2) 271 -> liftM (+27) (readBits 2) 272 -> liftM (+31) (readBits 2) 273 -> liftM (+35) (readBits 3) 274 -> liftM (+43) (readBits 3) 275 -> liftM (+51) (readBits 3) 276 -> liftM (+59) (readBits 3) 277 -> liftM (+67) (readBits 4) 278 -> liftM (+83) (readBits 4) 279 -> liftM (+99) (readBits 4) 280 -> liftM (+115) (readBits 4) 281 -> liftM (+131) (readBits 5) 282 -> liftM (+163) (readBits 5) 283 -> liftM (+195) (readBits 5) 284 -> liftM (+227) (readBits 5) 285 -> return 258 _ -> mfail ("length: " ++ show x) dist_code <- decode dist_table dist <- case dist_code of 0 -> return 1 1 -> return 2 2 -> return 3 3 -> return 4 4 -> liftM (+5) (readBits 1) 5 -> liftM (+7) (readBits 1) 6 -> liftM (+9) (readBits 2) 7 -> liftM (+13) (readBits 2) 8 -> liftM (+17) (readBits 3) 9 -> liftM (+25) (readBits 3) 10 -> liftM (+33) (readBits 4) 11 -> liftM (+49) (readBits 4) 12 -> liftM (+65) (readBits 5) 13 -> liftM (+97) (readBits 5) 14 -> liftM (+129) (readBits 6) 15 -> liftM (+193) (readBits 6) 16 -> liftM (+257) (readBits 7) 17 -> liftM (+385) (readBits 7) 18 -> liftM (+513) (readBits 8) 19 -> liftM (+769) (readBits 8) 20 -> liftM (+1025) (readBits 9) 21 -> liftM (+1537) (readBits 9) 22 -> liftM (+2049) (readBits 10) 23 -> liftM (+3073) (readBits 10) 24 -> liftM (+4097) (readBits 11) 25 -> liftM (+6145) (readBits 11) 26 -> liftM (+8193) (readBits 12) 27 -> liftM (+12289) (readBits 12) 28 -> liftM (+16385) (readBits 13) 29 -> liftM (+24577) (readBits 13) _ -> mfail ("dist: " ++ show dist_code) let copy = lazyTake length (drop dist acc') acc' = copy ++ acc block final lit_table dist_table acc' lazyTake :: Int -> [a] -> [a] lazyTake n _ | n <= 0 = [] lazyTake n ~(x:xs) = x : lazyTake (n - 1) xs decode :: MonadFail m => Table -> BitReader m Int decode Invalid = mfail "decode: Invalid" decode (Leaf v) = return v decode (Branch l r) = do x <- readBit if not x then decode l else decode r alpha :: [Int] alpha = [3, 3, 3, 3,3, 2, 4, 4] step1 :: [Int] -> [Int] step1 xs = [length (filter (==i) xs) | i <- [1.. maximum xs]] step2 :: [Int] -> [Int] step2 = init . scanl (\x acc -> (x + acc) * 2) 0 step3 :: [Int] -> [Int] -> [Int] step3 [] _ = [] step3 (0:xs) next_codes = error "step3" : step3 xs next_codes step3 (x:xs) next_codes = next_code : step3 xs (before ++ [next_code + 1] ++ after) where (before,next_code:after) = splitAt (x-1) next_codes mkTable' :: [Int] -> [([Bool],Int)] mkTable' alpha = zipWith3 (\len code val -> (reverse (explode' code len), val)) alpha (step3 alpha (step2 (step1 alpha))) [0..] data Table = Branch { b_left :: Table, b_right :: Table} | Leaf { l_val :: Int } | Invalid deriving Show mkTable :: [Int] -> Table mkTable = node . filter (not.null.fst) . mkTable' where node [] = Invalid node [([],v)] = Leaf v node xs = let (left,right) = partition (not.head.fst) xs in Branch (arm left) (arm right) arm xs = node [(tail ys,z) | (ys,z) <- xs] fixedLitTable :: Table fixedLitTable = mkTable $ [8 | _ <- [0..143]] ++ [9 | _ <- [144..255]] ++ [7 | _ <- [256..279]] ++ [8 | _ <- [280..287]] fixedDistTable :: Table fixedDistTable = mkTable [5 | _ <- [0..31]] crc32_polynomial :: Word32 crc32_polynomial = 0xEDB88320 crc32_table :: Array Word8 Word32 crc32_table = listArray (0,255) (map f [0..255]) where f = step . step . step . step . step . step . step . step step acc = (acc `shiftR` 1) `xor` (negate (acc .&. 1) .&. crc32_polynomial) crc32 :: [Word8] -> Word32 crc32 = complement . foldl' f (-1) where f acc x = (acc `shiftR` 8) `xor` (crc32_table ! (x `xor` fromIntegral acc)) data ZipMember = ZipMember { zm_fname :: String, zm_data :: [Word8] } deriving (Eq,Show) unzip :: MonadFail m => Reader m [ZipMember] unzip = do x <- consume' case x of Just 0x50 -> do expect read 0x4b x <- read2 if x == 0x0403 then do x <- unzip' xs <- unzip return (x:xs) else return [] Just _ -> mfail "illegal zip file" Nothing -> return [] unzip' :: MonadFail m => Reader m ZipMember unzip' = do ver <- read2 when (ver > 20) (mfail "unsupported zip version") gpb <- read2 when ((gpb .&. 0xfffc) /= 0) (mfail "unsupported flags") expect read2 8 _mod_time <- read2 _mod_date <- read2 crc <- read4 comp_size <- read4 size <- read4 fnlen <- read2 exlen <- read2 fn <- many (fromIntegral fnlen) readc extra <- many (fromIntegral exlen) read payload <- deflate when (length payload /= fromIntegral size) (mfail "decompressed size mismatch") when (crc32 payload /= crc) (mfail "crc mismatch") return ZipMember { zm_fname=fn, zm_data=payload}