thomasht86's picture
Upload folder using huggingface_hub
be59b6e verified
raw
history blame
8.29 kB
schema pdf_page {
document pdf_page {
field id type string {
indexing: summary | index
match {
word
}
}
field url type string {
indexing: summary | index
}
field title type string {
indexing: summary | index
index: enable-bm25
match {
text
}
}
field page_number type int {
indexing: summary | attribute
}
field image type raw {
indexing: summary
}
field full_image type raw {
indexing: summary
}
field text type string {
indexing: summary | index
index: enable-bm25
match {
text
}
}
field embedding type tensor<int8>(patch{}, v[16]) {
indexing: attribute | index
attribute {
distance-metric: hamming
}
index {
hnsw {
max-links-per-node: 32
neighbors-to-explore-at-insert: 400
}
}
}
}
fieldset default {
fields: title, url, page_number, text
}
document-summary default {
from-disk
summary text {
bolding: on
}
summary snippet {
source: text
dynamic
}
}
fieldset image {
fields: image
}
rank-profile bm25 {
first-phase {
expression: bm25(title) + bm25(text)
}
}
rank-profile default {
inputs {
query(qt) tensor<float>(querytoken{}, v[128])
}
function max_sim() {
expression {
sum(
reduce(
sum(
query(qt) * unpack_bits(attribute(embedding)) , v
),
max, patch
),
querytoken
)
}
}
function similarities() {
expression {
sum(
query(qt) * unpack_bits(attribute(embedding)), v
)
}
}
function bm25_score() {
expression {
bm25(title) + bm25(text)
}
}
first-phase {
expression {
bm25_score
}
}
second-phase {
rerank-count: 10
expression {
max_sim
}
}
summary-features: similarities
}
rank-profile retrieval-and-rerank {
inputs {
query(rq0) tensor<int8>(v[16])
query(rq1) tensor<int8>(v[16])
query(rq2) tensor<int8>(v[16])
query(rq3) tensor<int8>(v[16])
query(rq4) tensor<int8>(v[16])
query(rq5) tensor<int8>(v[16])
query(rq6) tensor<int8>(v[16])
query(rq7) tensor<int8>(v[16])
query(rq8) tensor<int8>(v[16])
query(rq9) tensor<int8>(v[16])
query(rq10) tensor<int8>(v[16])
query(rq11) tensor<int8>(v[16])
query(rq12) tensor<int8>(v[16])
query(rq13) tensor<int8>(v[16])
query(rq14) tensor<int8>(v[16])
query(rq15) tensor<int8>(v[16])
query(rq16) tensor<int8>(v[16])
query(rq17) tensor<int8>(v[16])
query(rq18) tensor<int8>(v[16])
query(rq19) tensor<int8>(v[16])
query(rq20) tensor<int8>(v[16])
query(rq21) tensor<int8>(v[16])
query(rq22) tensor<int8>(v[16])
query(rq23) tensor<int8>(v[16])
query(rq24) tensor<int8>(v[16])
query(rq25) tensor<int8>(v[16])
query(rq26) tensor<int8>(v[16])
query(rq27) tensor<int8>(v[16])
query(rq28) tensor<int8>(v[16])
query(rq29) tensor<int8>(v[16])
query(rq30) tensor<int8>(v[16])
query(rq31) tensor<int8>(v[16])
query(rq32) tensor<int8>(v[16])
query(rq33) tensor<int8>(v[16])
query(rq34) tensor<int8>(v[16])
query(rq35) tensor<int8>(v[16])
query(rq36) tensor<int8>(v[16])
query(rq37) tensor<int8>(v[16])
query(rq38) tensor<int8>(v[16])
query(rq39) tensor<int8>(v[16])
query(rq40) tensor<int8>(v[16])
query(rq41) tensor<int8>(v[16])
query(rq42) tensor<int8>(v[16])
query(rq43) tensor<int8>(v[16])
query(rq44) tensor<int8>(v[16])
query(rq45) tensor<int8>(v[16])
query(rq46) tensor<int8>(v[16])
query(rq47) tensor<int8>(v[16])
query(rq48) tensor<int8>(v[16])
query(rq49) tensor<int8>(v[16])
query(rq50) tensor<int8>(v[16])
query(rq51) tensor<int8>(v[16])
query(rq52) tensor<int8>(v[16])
query(rq53) tensor<int8>(v[16])
query(rq54) tensor<int8>(v[16])
query(rq55) tensor<int8>(v[16])
query(rq56) tensor<int8>(v[16])
query(rq57) tensor<int8>(v[16])
query(rq58) tensor<int8>(v[16])
query(rq59) tensor<int8>(v[16])
query(rq60) tensor<int8>(v[16])
query(rq61) tensor<int8>(v[16])
query(rq62) tensor<int8>(v[16])
query(rq63) tensor<int8>(v[16])
query(qt) tensor<float>(querytoken{}, v[128])
query(qtb) tensor<int8>(querytoken{}, v[16])
}
function max_sim() {
expression {
sum(
reduce(
sum(
query(qt) * unpack_bits(attribute(embedding)) , v
),
max, patch
),
querytoken
)
}
}
function max_sim_binary() {
expression {
sum(
reduce(
1/(1 + sum(
hamming(query(qtb), attribute(embedding)) ,v)
),
max,
patch
),
querytoken
)
}
}
first-phase {
expression {
max_sim_binary
}
}
second-phase {
rerank-count: 10
expression {
max_sim
}
}
}
}