exbert / client /src /ts /data /FaissSearchWrapper.ts
bhoov's picture
First commit
63858e7
raw
history blame
3.69 kB
import * as tp from '../etc/types'
import * as d3 from 'd3'
import 'd3-array'
import * as R from 'ramda'
import {SpacyInfo} from '../etc/SpacyInfo'
import {initZero} from '../etc/xramda'
// If value is not a string, don't try to make lowercase
const makeStringLower = R.ifElse(R.is(String), R.toLower, R.identity)
function argMax(array:number[]) {
return [].map.call(array, (x, i) => [x, i]).reduce((r, a) => (a[0] > r[0] ? a : r))[1];
}
export class FaissSearchResultWrapper {
data: tp.FaissSearchResults[]
options = {
showNext: false
}
constructor(data: tp.FaissSearchResults[], showNext=false) {
this.data = data
this.options.showNext = showNext
}
get matchAtt() {
return this.showNext() ? "matched_att_plus_1" : "matched_att"
}
get matchIdx() {
return this.showNext() ? "next_index" : "index"
}
/**
* Add position info interpretable by the histogram
*
* @param countObj Represents the inforrmation to be displayed by the histogram
*/
countPosInfo() {
const attOffsets = this.data.map((d,i) => +d[this.matchAtt].out.offset_to_max)
const ctObj = {
offset: initZero(attOffsets)
}
attOffsets.forEach(v => {
Object.keys(ctObj).forEach((k) => {
ctObj[k][v] += 1
})
})
return ctObj
}
countMaxAttKeys(indexOffset=0) {
// The keys in the below object dictate what we count
const countObj = {
pos: initZero(SpacyInfo.TotalMetaOptions.pos),
dep: initZero(SpacyInfo.TotalMetaOptions.dep),
is_ent: initZero(SpacyInfo.TotalMetaOptions.is_ent),
}
// Confusing: Show MATCHED WORD attentions, but NEXT WORD distribution
const getMaxToken = (d: tp.FaissSearchResults) => d.tokens[argMax(d.matched_att.out.att)]
this.data.forEach((d, i) => {
const maxMatch = getMaxToken(d)
Object.keys(countObj).forEach(k => {
const val = makeStringLower(String(maxMatch[k]))
countObj[k][val] += 1;
})
})
const newCountObj = Object.assign(countObj, this.countPosInfo())
return newCountObj
}
countMatchedKeys(indexOffset=0) {
// The keys in the below object dictate what we count
const countObj = {
pos: initZero(SpacyInfo.TotalMetaOptions.pos),
dep: initZero(SpacyInfo.TotalMetaOptions.dep),
is_ent: initZero(SpacyInfo.TotalMetaOptions.is_ent),
}
this.data.forEach(d => {
// Confusing: Show MATCHED WORD attentions, but NEXT WORD distribution
const match = d.tokens[d[this.matchIdx] + indexOffset]
Object.keys(countObj).forEach(k => {
const val = makeStringLower(String(match[k]))
countObj[k][val] += 1;
})
})
return countObj
}
getMatchedHistogram(indexOffset=0) {
const totalHist = this.countMatchedKeys(indexOffset)
const filterZeros = (val, key) => val != 0;
const nonZero = R.map(R.pickBy(filterZeros), totalHist)
return nonZero
}
getMaxAttHistogram() {
// const totalHist = this.countPosInfo()
const newHist = this.countMaxAttKeys()
const filterZeros = (val, key) => val != 0;
const nonZero = R.map(R.pickBy(filterZeros), newHist)
return nonZero
}
showNext(): boolean
showNext(v:boolean): this
showNext(v?) {
if (v == null) return this.options.showNext
this.options.showNext = v
return this
}
}