1+ pub ( crate ) mod encode;
2+
13use std:: collections:: hash_map:: Entry ;
24use std:: collections:: { BTreeMap , VecDeque } ;
35
4- use base64:: prelude:: * ;
56use rustc_data_structures:: fx:: { FxHashMap , FxIndexMap } ;
67use rustc_middle:: ty:: TyCtxt ;
78use rustc_span:: def_id:: DefId ;
@@ -18,12 +19,33 @@ use crate::html::format::join_with_double_colon;
1819use crate :: html:: markdown:: short_markdown_summary;
1920use crate :: html:: render:: { self , IndexItem , IndexItemFunctionType , RenderType , RenderTypeId } ;
2021
22+ use encode:: { bitmap_to_string, write_vlqhex_to_string} ;
23+
2124/// The serialized search description sharded version
2225///
2326/// The `index` is a JSON-encoded list of names and other information.
2427///
2528/// The desc has newlined descriptions, split up by size into 128KiB shards.
2629/// For example, `(4, "foo\nbar\nbaz\nquux")`.
30+ ///
31+ /// There is no single, optimal size for these shards, because it depends on
32+ /// configuration values that we can't predict or control, such as the version
33+ /// of HTTP used (HTTP/1.1 would work better with larger files, while HTTP/2
34+ /// and 3 are more agnostic), transport compression (gzip, zstd, etc), whether
35+ /// the search query is going to produce a large number of results or a small
36+ /// number, the bandwidth delay product of the network...
37+ ///
38+ /// Gzipping some standard library descriptions to guess what transport
39+ /// compression will do, the compressed file sizes can be as small as 4.9KiB
40+ /// or as large as 18KiB (ignoring the final 1.9KiB shard of leftovers).
41+ /// A "reasonable" range for files is for them to be bigger than 1KiB,
42+ /// since that's about the amount of data that can be transferred in a
43+ /// single TCP packet, and 64KiB, the maximum amount of data that
44+ /// TCP can transfer in a single round trip without extensions.
45+ ///
46+ /// [1]: https://en.wikipedia.org/wiki/Maximum_transmission_unit#MTUs_for_common_media
47+ /// [2]: https://en.wikipedia.org/wiki/Sliding_window_protocol#Basic_concept
48+ /// [3]: https://learn.microsoft.com/en-us/troubleshoot/windows-server/networking/description-tcp-features
2749pub ( crate ) struct SerializedSearchIndex {
2850 pub ( crate ) index : String ,
2951 pub ( crate ) desc : Vec < ( usize , String ) > ,
@@ -342,9 +364,9 @@ pub(crate) fn build_index<'tcx>(
342364 associated_item_disambiguators : & ' a Vec < ( usize , String ) > ,
343365 // A list of shard lengths encoded as vlqhex. See the comment in write_vlqhex_to_string
344366 // for information on the format.
345- descindex : String ,
367+ desc_index : String ,
346368 // A list of items with no description. This is eventually turned into a bitmap.
347- emptydesc : Vec < u32 > ,
369+ empty_desc : Vec < u32 > ,
348370 }
349371
350372 struct Paths {
@@ -476,36 +498,28 @@ pub(crate) fn build_index<'tcx>(
476498 crate_data. serialize_field ( "q" , & full_paths) ?;
477499 crate_data. serialize_field ( "i" , & parents) ?;
478500 crate_data. serialize_field ( "f" , & functions) ?;
479- crate_data. serialize_field ( "D" , & self . descindex ) ?;
501+ crate_data. serialize_field ( "D" , & self . desc_index ) ?;
480502 crate_data. serialize_field ( "p" , & paths) ?;
481503 crate_data. serialize_field ( "b" , & self . associated_item_disambiguators ) ?;
482- let mut buf = Vec :: new ( ) ;
483- let mut strbuf = String :: new ( ) ;
484- write_bitmap_to_bytes ( & deprecated, & mut buf) . unwrap ( ) ;
485- BASE64_STANDARD . encode_string ( & buf, & mut strbuf) ;
486- crate_data. serialize_field ( "c" , & strbuf) ?;
487- strbuf. clear ( ) ;
488- buf. clear ( ) ;
489- write_bitmap_to_bytes ( & self . emptydesc , & mut buf) . unwrap ( ) ;
490- BASE64_STANDARD . encode_string ( & buf, & mut strbuf) ;
491- crate_data. serialize_field ( "e" , & strbuf) ?;
504+ crate_data. serialize_field ( "c" , & bitmap_to_string ( & deprecated) ) ?;
505+ crate_data. serialize_field ( "e" , & bitmap_to_string ( & self . empty_desc ) ) ?;
492506 if has_aliases {
493507 crate_data. serialize_field ( "a" , & self . aliases ) ?;
494508 }
495509 crate_data. end ( )
496510 }
497511 }
498512
499- let ( emptydesc , desc) = {
500- let mut emptydesc = Vec :: new ( ) ;
513+ let ( empty_desc , desc) = {
514+ let mut empty_desc = Vec :: new ( ) ;
501515 let mut result = Vec :: new ( ) ;
502516 let mut set = String :: new ( ) ;
503517 let mut len: usize = 0 ;
504- let mut itemindex : u32 = 0 ;
518+ let mut item_index : u32 = 0 ;
505519 for desc in std:: iter:: once ( & crate_doc) . chain ( crate_items. iter ( ) . map ( |item| & item. desc ) ) {
506520 if desc == "" {
507- emptydesc . push ( itemindex ) ;
508- itemindex += 1 ;
521+ empty_desc . push ( item_index ) ;
522+ item_index += 1 ;
509523 continue ;
510524 }
511525 if set. len ( ) >= DESC_INDEX_SHARD_LEN {
@@ -516,23 +530,23 @@ pub(crate) fn build_index<'tcx>(
516530 }
517531 set. push_str ( & desc) ;
518532 len += 1 ;
519- itemindex += 1 ;
533+ item_index += 1 ;
520534 }
521535 result. push ( ( len, std:: mem:: replace ( & mut set, String :: new ( ) ) ) ) ;
522- ( emptydesc , result)
536+ ( empty_desc , result)
523537 } ;
524538
525- let descindex = {
526- let mut descindex = String :: with_capacity ( desc. len ( ) * 4 ) ;
539+ let desc_index = {
540+ let mut desc_index = String :: with_capacity ( desc. len ( ) * 4 ) ;
527541 for & ( len, _) in desc. iter ( ) {
528- write_vlqhex_to_string ( len. try_into ( ) . unwrap ( ) , & mut descindex ) ;
542+ write_vlqhex_to_string ( len. try_into ( ) . unwrap ( ) , & mut desc_index ) ;
529543 }
530- descindex
544+ desc_index
531545 } ;
532546
533547 assert_eq ! (
534548 crate_items. len( ) + 1 ,
535- desc. iter( ) . map( |( len, _) | * len) . sum:: <usize >( ) + emptydesc . len( )
549+ desc. iter( ) . map( |( len, _) | * len) . sum:: <usize >( ) + empty_desc . len( )
536550 ) ;
537551
538552 // The index, which is actually used to search, is JSON
@@ -546,8 +560,8 @@ pub(crate) fn build_index<'tcx>(
546560 paths: crate_paths,
547561 aliases: & aliases,
548562 associated_item_disambiguators: & associated_item_disambiguators,
549- descindex ,
550- emptydesc ,
563+ desc_index ,
564+ empty_desc ,
551565 } )
552566 . expect( "failed serde conversion" )
553567 // All these `replace` calls are because we have to go through JS string for JSON content.
@@ -559,237 +573,6 @@ pub(crate) fn build_index<'tcx>(
559573 SerializedSearchIndex { index, desc }
560574}
561575
562- pub ( crate ) fn write_vlqhex_to_string ( n : i32 , string : & mut String ) {
563- let ( sign, magnitude) : ( bool , u32 ) =
564- if n >= 0 { ( false , n. try_into ( ) . unwrap ( ) ) } else { ( true , ( -n) . try_into ( ) . unwrap ( ) ) } ;
565- // zig-zag encoding
566- let value: u32 = ( magnitude << 1 ) | ( if sign { 1 } else { 0 } ) ;
567- // Self-terminating hex use capital letters for everything but the
568- // least significant digit, which is lowercase. For example, decimal 17
569- // would be `` Aa `` if zig-zag encoding weren't used.
570- //
571- // Zig-zag encoding, however, stores the sign bit as the last bit.
572- // This means, in the last hexit, 1 is actually `c`, -1 is `b`
573- // (`a` is the imaginary -0), and, because all the bits are shifted
574- // by one, `` A` `` is actually 8 and `` Aa `` is -8.
575- //
576- // https://rust-lang.github.io/rustc-dev-guide/rustdoc-internals/search.html
577- // describes the encoding in more detail.
578- let mut shift: u32 = 28 ;
579- let mut mask: u32 = 0xF0_00_00_00 ;
580- // first skip leading zeroes
581- while shift < 32 {
582- let hexit = ( value & mask) >> shift;
583- if hexit != 0 || shift == 0 {
584- break ;
585- }
586- shift = shift. wrapping_sub ( 4 ) ;
587- mask = mask >> 4 ;
588- }
589- // now write the rest
590- while shift < 32 {
591- let hexit = ( value & mask) >> shift;
592- let hex = char:: try_from ( if shift == 0 { '`' } else { '@' } as u32 + hexit) . unwrap ( ) ;
593- string. push ( hex) ;
594- shift = shift. wrapping_sub ( 4 ) ;
595- mask = mask >> 4 ;
596- }
597- }
598-
599- // checked against roaring-rs in
600- // https://gitlab.com/notriddle/roaring-test
601- pub fn write_bitmap_to_bytes ( domain : & [ u32 ] , mut out : impl std:: io:: Write ) -> std:: io:: Result < ( ) > {
602- // https://arxiv.org/pdf/1603.06549.pdf
603- let mut keys = Vec :: < u16 > :: new ( ) ;
604- let mut containers = Vec :: < Container > :: new ( ) ;
605- enum Container {
606- /// number of ones, bits
607- Bits ( Box < [ u64 ; 1024 ] > ) ,
608- /// list of entries
609- Array ( Vec < u16 > ) ,
610- /// list of (start, len-1)
611- Run ( Vec < ( u16 , u16 ) > ) ,
612- }
613- impl Container {
614- fn popcount ( & self ) -> u32 {
615- match self {
616- Container :: Bits ( bits) => bits. iter ( ) . copied ( ) . map ( |x| x. count_ones ( ) ) . sum ( ) ,
617- Container :: Array ( array) => {
618- array. len ( ) . try_into ( ) . expect ( "array can't be bigger than 2**32" )
619- }
620- Container :: Run ( runs) => {
621- runs. iter ( ) . copied ( ) . map ( |( _, lenm1) | u32:: from ( lenm1) + 1 ) . sum ( )
622- }
623- }
624- }
625- fn push ( & mut self , value : u16 ) {
626- match self {
627- Container :: Bits ( bits) => bits[ value as usize >> 6 ] |= 1 << ( value & 0x3F ) ,
628- Container :: Array ( array) => {
629- array. push ( value) ;
630- if array. len ( ) >= 4096 {
631- let array = std:: mem:: replace ( array, Vec :: new ( ) ) ;
632- * self = Container :: Bits ( Box :: new ( [ 0 ; 1024 ] ) ) ;
633- for value in array {
634- self . push ( value) ;
635- }
636- }
637- }
638- Container :: Run ( runs) => {
639- if let Some ( r) = runs. last_mut ( )
640- && r. 0 + r. 1 + 1 == value
641- {
642- r. 1 += 1 ;
643- } else {
644- runs. push ( ( value, 0 ) ) ;
645- }
646- }
647- }
648- }
649- fn try_make_run ( & mut self ) -> bool {
650- match self {
651- Container :: Bits ( bits) => {
652- let mut r: u64 = 0 ;
653- for ( i, chunk) in bits. iter ( ) . copied ( ) . enumerate ( ) {
654- let next_chunk =
655- i. checked_add ( 1 ) . and_then ( |i| bits. get ( i) ) . copied ( ) . unwrap_or ( 0 ) ;
656- r += !chunk & u64:: from ( ( chunk << 1 ) . count_ones ( ) ) ;
657- r += !next_chunk & u64:: from ( ( chunk >> 63 ) . count_ones ( ) ) ;
658- }
659- if ( 2 + 4 * r) < 8192 {
660- let bits = std:: mem:: replace ( bits, Box :: new ( [ 0 ; 1024 ] ) ) ;
661- * self = Container :: Run ( Vec :: new ( ) ) ;
662- for ( i, bits) in bits. iter ( ) . copied ( ) . enumerate ( ) {
663- if bits == 0 {
664- continue ;
665- }
666- for j in 0 ..64 {
667- let value = ( u16:: try_from ( i) . unwrap ( ) << 6 ) | j;
668- if bits & ( 1 << j) != 0 {
669- self . push ( value) ;
670- }
671- }
672- }
673- true
674- } else {
675- false
676- }
677- }
678- Container :: Array ( array) if array. len ( ) <= 5 => false ,
679- Container :: Array ( array) => {
680- let mut r = 0 ;
681- let mut prev = None ;
682- for value in array. iter ( ) . copied ( ) {
683- if value. checked_sub ( 1 ) != prev {
684- r += 1 ;
685- }
686- prev = Some ( value) ;
687- }
688- if 2 + 4 * r < 2 * array. len ( ) + 2 {
689- let array = std:: mem:: replace ( array, Vec :: new ( ) ) ;
690- * self = Container :: Run ( Vec :: new ( ) ) ;
691- for value in array {
692- self . push ( value) ;
693- }
694- true
695- } else {
696- false
697- }
698- }
699- Container :: Run ( _) => true ,
700- }
701- }
702- }
703- let mut key: u16 ;
704- let mut domain_iter = domain. into_iter ( ) . copied ( ) . peekable ( ) ;
705- let mut has_run = false ;
706- while let Some ( entry) = domain_iter. next ( ) {
707- key = ( entry >> 16 ) . try_into ( ) . expect ( "shifted off the top 16 bits, so it should fit" ) ;
708- let value: u16 = ( entry & 0x00_00_FF_FF ) . try_into ( ) . expect ( "AND 16 bits, so it should fit" ) ;
709- let mut container = Container :: Array ( vec ! [ value] ) ;
710- while let Some ( entry) = domain_iter. peek ( ) . copied ( ) {
711- let entry_key: u16 =
712- ( entry >> 16 ) . try_into ( ) . expect ( "shifted off the top 16 bits, so it should fit" ) ;
713- if entry_key != key {
714- break ;
715- }
716- domain_iter. next ( ) . expect ( "peeking just succeeded" ) ;
717- container
718- . push ( ( entry & 0x00_00_FF_FF ) . try_into ( ) . expect ( "AND 16 bits, so it should fit" ) ) ;
719- }
720- keys. push ( key) ;
721- has_run = container. try_make_run ( ) || has_run;
722- containers. push ( container) ;
723- }
724- // https://github.com/RoaringBitmap/RoaringFormatSpec
725- use byteorder:: { WriteBytesExt , LE } ;
726- const SERIAL_COOKIE_NO_RUNCONTAINER : u32 = 12346 ;
727- const SERIAL_COOKIE : u32 = 12347 ;
728- const NO_OFFSET_THRESHOLD : u32 = 4 ;
729- let size: u32 = containers. len ( ) . try_into ( ) . unwrap ( ) ;
730- let start_offset = if has_run {
731- out. write_u32 :: < LE > ( SERIAL_COOKIE | ( ( size - 1 ) << 16 ) ) ?;
732- for set in containers. chunks ( 8 ) {
733- let mut b = 0 ;
734- for ( i, container) in set. iter ( ) . enumerate ( ) {
735- if matches ! ( container, & Container :: Run ( ..) ) {
736- b |= 1 << i;
737- }
738- }
739- out. write_u8 ( b) ?;
740- }
741- if size < NO_OFFSET_THRESHOLD {
742- 4 + 4 * size + ( ( size + 7 ) / 8 )
743- } else {
744- 4 + 8 * size + ( ( size + 7 ) / 8 )
745- }
746- } else {
747- out. write_u32 :: < LE > ( SERIAL_COOKIE_NO_RUNCONTAINER ) ?;
748- out. write_u32 :: < LE > ( containers. len ( ) . try_into ( ) . unwrap ( ) ) ?;
749- 4 + 4 + 4 * size + 4 * size
750- } ;
751- for ( & key, container) in keys. iter ( ) . zip ( & containers) {
752- // descriptive header
753- let key: u32 = key. into ( ) ;
754- let count: u32 = container. popcount ( ) - 1 ;
755- out. write_u32 :: < LE > ( ( count << 16 ) | key) ?;
756- }
757- if !has_run || size >= NO_OFFSET_THRESHOLD {
758- // offset header
759- let mut starting_offset = start_offset;
760- for container in & containers {
761- out. write_u32 :: < LE > ( starting_offset) ?;
762- starting_offset += match container {
763- Container :: Bits ( _) => 8192u32 ,
764- Container :: Array ( array) => u32:: try_from ( array. len ( ) ) . unwrap ( ) * 2 ,
765- Container :: Run ( runs) => 2 + u32:: try_from ( runs. len ( ) ) . unwrap ( ) * 4 ,
766- } ;
767- }
768- }
769- for container in & containers {
770- match container {
771- Container :: Bits ( bits) => {
772- for chunk in bits. iter ( ) {
773- out. write_u64 :: < LE > ( * chunk) ?;
774- }
775- }
776- Container :: Array ( array) => {
777- for value in array. iter ( ) {
778- out. write_u16 :: < LE > ( * value) ?;
779- }
780- }
781- Container :: Run ( runs) => {
782- out. write_u16 :: < LE > ( ( runs. len ( ) ) . try_into ( ) . unwrap ( ) ) ?;
783- for ( start, lenm1) in runs. iter ( ) . copied ( ) {
784- out. write_u16 :: < LE > ( start) ?;
785- out. write_u16 :: < LE > ( lenm1) ?;
786- }
787- }
788- }
789- }
790- Ok ( ( ) )
791- }
792-
793576pub ( crate ) fn get_function_type_for_search < ' tcx > (
794577 item : & clean:: Item ,
795578 tcx : TyCtxt < ' tcx > ,
0 commit comments