@@ -25,21 +25,17 @@ import Distribution.Types.Version
2525import qualified Distribution.Utils.ShortText as S
2626
2727import qualified Codec.Archive.Tar as Tar
28- import qualified Codec.Archive.Tar.Entry as Tar
29- import Control.Monad ( join
30- , liftM2
31- )
3228import qualified Data.ByteString.Lazy as BSL
3329import Data.List ( maximumBy
3430 , sortBy
3531 )
3632import Data.Maybe ( isNothing )
3733import Data.Ord ( comparing )
38- import qualified Data.TarIndex as T
3934import qualified Data.Time.Clock as CL
4035import GHC.Float ( int2Float )
4136import System.FilePath ( isExtensionOf )
42- import qualified System.IO as SIO
37+
38+ -- import Debug.Trace (trace)
4339
4440data Scorer = Scorer
4541 { maximumS :: ! Float
@@ -119,7 +115,7 @@ rankIO _ _ _ _ _ _ _ Nothing = return (Scorer (118 + 16 + 4 + 1) 0)
119115rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do
120116 temp <- temporalScore pkg lastUploads versionList recentDownloads
121117 versS <- versionScore versionList vers lastUploads pkg
122- codeS <- codeScore documentLines srcLines
118+ codeS <- codeScore documSize srcLines
123119 return $ temp <> versS <> codeS <> authorScore maintainers pkg
124120
125121 where
@@ -134,45 +130,34 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do
134130 $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs)
135131 srcLines = do
136132 Right (path, _, _) <- packageTarball tarCache pkgI
137- filterLines (isExtensionOf " .hs" ) . Tar. read <$> BSL. readFile path
133+ filterLines (isExtensionOf " .hs" ) countLines
134+ . Tar. read
135+ <$> BSL. readFile path
136+ documSize = do
137+ path <- documentPath
138+ case path of
139+ Nothing -> return 0
140+ Just pth ->
141+ filterLines (isExtensionOf " .html" ) countSize
142+ . Tar. read
143+ <$> BSL. readFile pth
138144
139- filterLines f = Tar. foldEntries (countLines f) 0 (const 0 )
145+ filterLines f g = Tar. foldEntries (g f) 0 (const 0 )
140146 countLines :: (FilePath -> Bool ) -> Tar. Entry -> Float -> Float
141147 countLines f entry l = if not . f . Tar. entryPath $ entry then l else lns
142148 where
143149 ! lns = case Tar. entryContent entry of
144150 (Tar. NormalFile str _) -> l + (int2Float . length $ BSL. split 10 str)
145151 _ -> l
152+ countSize :: (FilePath -> Bool ) -> Tar. Entry -> Float -> Float
153+ countSize f entry l = if not . f . Tar. entryPath $ entry then l else s
154+ where
155+ ! s = case Tar. entryContent entry of
156+ (Tar. NormalFile _ siz) -> l + fromInteger (toInteger siz)
157+ _ -> l
146158
147159 documentBlob :: IO (Maybe BlobStorage. BlobId )
148- documentBlob = queryDocumentation docs pkgId
149- documentIndex = documentBlob >>= mapM (cachedTarIndex tarCache)
150- documentationEntr = do
151- index <- documentIndex
152- path <- documentPath
153- return $ liftM2 (,) path (join $ liftM2 T. lookup index path)
154- documentLines :: IO Float
155- documentLines = documentationEntr >>= filterLinesTar (const True )
156-
157- filterLinesTar
158- :: (FilePath -> Bool ) -> Maybe (FilePath , T. TarIndexEntry ) -> IO Float
159- filterLinesTar f (Just (path, T. TarFileEntry offset)) =
160- if f path then getLines path offset else return 0
161- filterLinesTar f (Just (_, T. TarDir dir)) =
162- sum <$> mapM (filterLinesTar f . Just ) dir
163- filterLinesTar _ _ = return 0
164-
165- -- TODO if size is too big give it a good score and do not read the file
166- getLines path offset = do
167- handle <- SIO. openFile path SIO. ReadMode
168- SIO. hSeek handle SIO. AbsoluteSeek (fromIntegral $ offset * 512 )
169- header <- BSL. hGet handle 512
170- case Tar. read header of
171- (Tar. Next Tar. Entry { Tar. entryContent = Tar. NormalFile _ siz } _) -> do
172- body <- BSL. hGet handle (fromIntegral siz)
173- return $ int2Float . length . BSL. split 10 $ body
174- _ -> return 0
175-
160+ documentBlob = queryDocumentation docs pkgId
176161 documentPath = do
177162 blob <- documentBlob
178163 return $ BlobStorage. filepath (serverBlobStore env) <$> blob
0 commit comments