|
import gradio as gr |
|
import pandas as pd |
|
import numpy as np |
|
from rank_bm25 import BM25Okapi |
|
from sentence_transformers import SentenceTransformer |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
gir='''The six General Rules of Interpretation (GIR) guide the classification of goods under the tariff. Rules 1 to 4 must be applied sequentially, while Rules 5 and 6 are standalone. |
|
|
|
Rule 1: Titles and Terms |
|
Titles: Section, Chapter, and sub-Chapter titles are for reference only. |
|
Classification: Determine classification based on the headings, Section or Chapter Notes. Refer to GIRs 2 to 6 for further details. |
|
Rule 2: Incomplete and Mixed Goods |
|
Incomplete Goods (2a): Classification includes unfinished, incomplete, or disassembled items if they have the essential character of the finished product. |
|
|
|
Mixed Goods (2b): A reference to a material includes mixtures or combinations with other materials. Goods with multiple materials are classified based on the principles of Rule 3. |
|
|
|
Example: Dicalcium citrate, a compound with citric acid characteristics, is classified under 2918.15.90.19 as a salt of citric acid. |
|
|
|
Rule 3: Classification Between Multiple Headings |
|
Specific vs. General (3a): Prefer the heading with the most specific description if multiple headings apply. |
|
|
|
Example: Mint tea is classified under tea as it provides a specific description, unlike mint alone. |
|
|
|
Essential Character (3b): For mixtures or composite goods, classify based on the material or component giving the essential character. |
|
|
|
Example: A liquor gift set is classified under the liquor heading because the liquor is the essential item. |
|
|
|
Last Resort (3c): When essential character cannot be determined, classify under the last in numerical order among equally suitable headings. |
|
|
|
Example: A gift set with socks and ties is classified under the tie heading as it comes last numerically. |
|
|
|
Rule 4: Most Akin Goods |
|
Last Resort: If goods cannot be classified using the above rules, classify them under the heading most akin to the goods. |
|
Rule 5: Containers and Packing |
|
Containers (5a): Containers specifically designed for an article and sold together with it are classified with the article unless the container defines the product’s essential character. |
|
|
|
Example: Flute cases are classified with the flutes they contain. |
|
|
|
Packing Materials (5b): Packing materials and containers are classified with the goods if they are not suitable for reuse and are of a kind normally used for packing. |
|
|
|
Example: Styrofoam used for padding is classified with the goods it protects. |
|
|
|
Rule 6: Subheadings |
|
Classification: Classification at the subheading level follows the same rules as for headings, considering any related subheading notes and only comparing subheadings at the same level.''' |
|
|
|
bm25 = None |
|
bert_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") |
|
|
|
|
|
def get_bert_embeddings(corpus, model): |
|
return model.encode(corpus) |
|
|
|
def load_embeddings(filename="corpus_embeddings.npy"): |
|
embeddings = np.load(filename) |
|
return embeddings |
|
|
|
|
|
def bm25_bert_search(query, df, corpus, bm25, bert_model, corpus_embeddings, top_n=10): |
|
tokenized_corpus = [doc.split(" ") for doc in corpus] |
|
tokenized_query = query.split(" ") |
|
|
|
|
|
if bm25 is None: |
|
bm25 = BM25Okapi(tokenized_corpus) |
|
bm25_scores = bm25.get_scores(tokenized_query) |
|
|
|
bm25_results = pd.DataFrame({ |
|
'Index': range(len(bm25_scores)), |
|
'BM25_Score': bm25_scores, |
|
'Concat Description': df['Concat Description'], |
|
'CTH Code': df['CTH Code'] |
|
}).sort_values(by='BM25_Score', ascending=False).head(top_n) |
|
|
|
|
|
query_embedding = bert_model.encode(query) |
|
bert_scores = cosine_similarity([query_embedding], corpus_embeddings)[0] |
|
|
|
bert_results = pd.DataFrame({ |
|
'Index': range(len(bert_scores)), |
|
'BERT_Score': bert_scores, |
|
'Concat Description': df['Concat Description'], |
|
'CTH Code': df['CTH Code'] |
|
}).sort_values(by='BERT_Score', ascending=False).head(top_n) |
|
|
|
|
|
combined_results = pd.concat([bm25_results[['Index', 'Concat Description', 'CTH Code']], |
|
bert_results[['Index', 'Concat Description', 'CTH Code']]]) |
|
|
|
|
|
combined_results = combined_results.drop_duplicates(subset=['CTH Code']) |
|
|
|
|
|
result_strings = [] |
|
for index, row in combined_results.iterrows(): |
|
result_strings.append(f"Description: {row['Concat Description']}, CTH Code: {row['CTH Code']}") |
|
|
|
return result_strings |
|
|
|
|
|
corpus_embeddings = load_embeddings() |
|
|
|
def search_and_explain(query): |
|
|
|
|
|
|
|
|
|
|
|
global df |
|
|
|
df = pd.read_csv('CTH_Description (2).csv', on_bad_lines = 'skip' ) |
|
|
|
corpus = df['Concat Description'].tolist() |
|
|
|
|
|
result_strings = bm25_bert_search(query, df, corpus, bm25, bert_model, corpus_embeddings, top_n=10) |
|
|
|
|
|
prompt = f"Based on the descriptions:\n" + "\n".join(result_strings) + f"\nPlease choose the most suitable CTH code for the given product: '{query}'.Keep the GIR in mind while choosing'{gir}'.Explain the possibility of related CTH codes on certain conditions " |
|
|
|
|
|
from gradio_client import Client |
|
client = Client("Qwen/Qwen1.5-110B-Chat-demo") |
|
response = client.predict( |
|
query=prompt, |
|
history=[], |
|
system="You are a helpful assistant.", |
|
api_name="/model_chat" |
|
) |
|
|
|
api_output = response[1] if response and len(response) > 1 else "No output received from the API." |
|
|
|
return api_output[0][1] |
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft()) as demo: |
|
gr.Markdown( |
|
""" |
|
# AI-powered Indian Customs Tariff Search with Explainability |
|
This app uses a combination of semantic, keyword, and ColBERT models to search the Indian customs tariff and choose the best matching CTH code with the power of interpreting with General Rules of Interpretation (GIR). |
|
""" |
|
) |
|
|
|
query_input = gr.Textbox(label="Enter Product Description", placeholder="e.g., fuel pump for elevator") |
|
result_output = gr.Textbox(label="HSN Prediction with Explanation", lines=10) |
|
|
|
query_input.submit(search_and_explain, inputs=query_input, outputs=result_output) |
|
|
|
demo.launch(debug=True) |
|
|