schema pdf_page { document pdf_page { field id type string { indexing: summary | index match { word } } field url type string { indexing: summary | index } field title type string { indexing: summary | index index: enable-bm25 match { text } } field page_number type int { indexing: summary | attribute } field image type raw { indexing: summary } field full_image type raw { indexing: summary } field text type string { indexing: summary | index index: enable-bm25 match { text } } field embedding type tensor(patch{}, v[16]) { indexing: attribute | index attribute { distance-metric: hamming } index { hnsw { max-links-per-node: 32 neighbors-to-explore-at-insert: 400 } } } } fieldset default { fields: title, url, page_number, text } document-summary default { from-disk summary text { bolding: on } summary snippet { source: text dynamic } } fieldset image { fields: image } rank-profile bm25 { first-phase { expression: bm25(title) + bm25(text) } } rank-profile default { inputs { query(qt) tensor(querytoken{}, v[128]) } function max_sim() { expression { sum( reduce( sum( query(qt) * unpack_bits(attribute(embedding)) , v ), max, patch ), querytoken ) } } function similarities() { expression { sum( query(qt) * unpack_bits(attribute(embedding)), v ) } } function bm25_score() { expression { bm25(title) + bm25(text) } } first-phase { expression { bm25_score } } second-phase { rerank-count: 10 expression { max_sim } } summary-features: similarities } rank-profile retrieval-and-rerank { inputs { query(rq0) tensor(v[16]) query(rq1) tensor(v[16]) query(rq2) tensor(v[16]) query(rq3) tensor(v[16]) query(rq4) tensor(v[16]) query(rq5) tensor(v[16]) query(rq6) tensor(v[16]) query(rq7) tensor(v[16]) query(rq8) tensor(v[16]) query(rq9) tensor(v[16]) query(rq10) tensor(v[16]) query(rq11) tensor(v[16]) query(rq12) tensor(v[16]) query(rq13) tensor(v[16]) query(rq14) tensor(v[16]) query(rq15) tensor(v[16]) query(rq16) tensor(v[16]) query(rq17) tensor(v[16]) query(rq18) tensor(v[16]) query(rq19) tensor(v[16]) query(rq20) tensor(v[16]) query(rq21) tensor(v[16]) query(rq22) tensor(v[16]) query(rq23) tensor(v[16]) query(rq24) tensor(v[16]) query(rq25) tensor(v[16]) query(rq26) tensor(v[16]) query(rq27) tensor(v[16]) query(rq28) tensor(v[16]) query(rq29) tensor(v[16]) query(rq30) tensor(v[16]) query(rq31) tensor(v[16]) query(rq32) tensor(v[16]) query(rq33) tensor(v[16]) query(rq34) tensor(v[16]) query(rq35) tensor(v[16]) query(rq36) tensor(v[16]) query(rq37) tensor(v[16]) query(rq38) tensor(v[16]) query(rq39) tensor(v[16]) query(rq40) tensor(v[16]) query(rq41) tensor(v[16]) query(rq42) tensor(v[16]) query(rq43) tensor(v[16]) query(rq44) tensor(v[16]) query(rq45) tensor(v[16]) query(rq46) tensor(v[16]) query(rq47) tensor(v[16]) query(rq48) tensor(v[16]) query(rq49) tensor(v[16]) query(rq50) tensor(v[16]) query(rq51) tensor(v[16]) query(rq52) tensor(v[16]) query(rq53) tensor(v[16]) query(rq54) tensor(v[16]) query(rq55) tensor(v[16]) query(rq56) tensor(v[16]) query(rq57) tensor(v[16]) query(rq58) tensor(v[16]) query(rq59) tensor(v[16]) query(rq60) tensor(v[16]) query(rq61) tensor(v[16]) query(rq62) tensor(v[16]) query(rq63) tensor(v[16]) query(qt) tensor(querytoken{}, v[128]) query(qtb) tensor(querytoken{}, v[16]) } function max_sim() { expression { sum( reduce( sum( query(qt) * unpack_bits(attribute(embedding)) , v ), max, patch ), querytoken ) } } function max_sim_binary() { expression { sum( reduce( 1/(1 + sum( hamming(query(qtb), attribute(embedding)) ,v) ), max, patch ), querytoken ) } } first-phase { expression { max_sim_binary } } second-phase { rerank-count: 10 expression { max_sim } } } }