Spaces:

NCTCMumbai
/

AI_based_Indian_customs_tariff_search

Running

App Files Files Community

AI_based_Indian_customs_tariff_search / app.py

NCTCMumbai

Update app.py

d1079ea verified 4 months ago

raw

history blame

6.93 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	from rank_bm25 import BM25Okapi
	from sentence_transformers import SentenceTransformer
	from sklearn.metrics.pairwise import cosine_similarity
	gir='''The six General Rules of Interpretation (GIR) guide the classification of goods under the tariff. Rules 1 to 4 must be applied sequentially, while Rules 5 and 6 are standalone.

	Rule 1: Titles and Terms
	Titles: Section, Chapter, and sub-Chapter titles are for reference only.
	Classification: Determine classification based on the headings, Section or Chapter Notes. Refer to GIRs 2 to 6 for further details.
	Rule 2: Incomplete and Mixed Goods
	Incomplete Goods (2a): Classification includes unfinished, incomplete, or disassembled items if they have the essential character of the finished product.

	Mixed Goods (2b): A reference to a material includes mixtures or combinations with other materials. Goods with multiple materials are classified based on the principles of Rule 3.

	Example: Dicalcium citrate, a compound with citric acid characteristics, is classified under 2918.15.90.19 as a salt of citric acid.

	Rule 3: Classification Between Multiple Headings
	Specific vs. General (3a): Prefer the heading with the most specific description if multiple headings apply.

	Example: Mint tea is classified under tea as it provides a specific description, unlike mint alone.

	Essential Character (3b): For mixtures or composite goods, classify based on the material or component giving the essential character.

	Example: A liquor gift set is classified under the liquor heading because the liquor is the essential item.

	Last Resort (3c): When essential character cannot be determined, classify under the last in numerical order among equally suitable headings.

	Example: A gift set with socks and ties is classified under the tie heading as it comes last numerically.

	Rule 4: Most Akin Goods
	Last Resort: If goods cannot be classified using the above rules, classify them under the heading most akin to the goods.
	Rule 5: Containers and Packing
	Containers (5a): Containers specifically designed for an article and sold together with it are classified with the article unless the container defines the product’s essential character.

	Example: Flute cases are classified with the flutes they contain.

	Packing Materials (5b): Packing materials and containers are classified with the goods if they are not suitable for reuse and are of a kind normally used for packing.

	Example: Styrofoam used for padding is classified with the goods it protects.

	Rule 6: Subheadings
	Classification: Classification at the subheading level follows the same rules as for headings, considering any related subheading notes and only comparing subheadings at the same level.'''
	# Initialize the models
	bm25 = None # BM25 will be initialized within the function
	bert_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

	# Pre-compute BERT embeddings
	def get_bert_embeddings(corpus, model):
	return model.encode(corpus)

	def load_embeddings(filename="corpus_embeddings.npy"):
	embeddings = np.load(filename)
	return embeddings

	# Perform BM25 and BERT search
	def bm25_bert_search(query, df, corpus, bm25, bert_model, corpus_embeddings, top_n=10):
	tokenized_corpus = [doc.split(" ") for doc in corpus]
	tokenized_query = query.split(" ")

	# BM25 search
	if bm25 is None:
	bm25 = BM25Okapi(tokenized_corpus)
	bm25_scores = bm25.get_scores(tokenized_query)

	bm25_results = pd.DataFrame({
	'Index': range(len(bm25_scores)),
	'BM25_Score': bm25_scores,
	'Concat Description': df['Concat Description'],
	'CTH Code': df['CTH Code']
	}).sort_values(by='BM25_Score', ascending=False).head(top_n)

	# BERT search
	query_embedding = bert_model.encode(query)
	bert_scores = cosine_similarity([query_embedding], corpus_embeddings)[0]

	bert_results = pd.DataFrame({
	'Index': range(len(bert_scores)),
	'BERT_Score': bert_scores,
	'Concat Description': df['Concat Description'],
	'CTH Code': df['CTH Code']
	}).sort_values(by='BERT_Score', ascending=False).head(top_n)

	# Combine BM25 and BERT results
	combined_results = pd.concat([bm25_results[['Index', 'Concat Description', 'CTH Code']],
	bert_results[['Index', 'Concat Description', 'CTH Code']]])

	# Drop duplicates based on 'CTH Code'
	combined_results = combined_results.drop_duplicates(subset=['CTH Code'])

	# Create a string for each row and store them in a list
	result_strings = []
	for index, row in combined_results.iterrows():
	result_strings.append(f"Description: {row['Concat Description']}, CTH Code: {row['CTH Code']}")

	return result_strings

	# Load precomputed embeddings
	corpus_embeddings = load_embeddings()

	def search_and_explain(query):
	# Load your DataFrame df here
	# For demonstration purposes, assuming df is loaded as a global variable
	# Example: df = pd.read_csv('your_dataframe.csv')

	# Replace the following line with actual loading of the DataFrame
	global df

	df = pd.read_csv('CTH_Description (2).csv', on_bad_lines = 'skip' ) # Load your data

	corpus = df['Concat Description'].tolist()

	# Perform the search and get results as strings
	result_strings = bm25_bert_search(query, df, corpus, bm25, bert_model, corpus_embeddings, top_n=10)

	# Prepare the prompt for the API
	prompt = f"Based on the descriptions:\n" + "\n".join(result_strings) + f"\nPlease choose the most suitable CTH code for the given product: '{query}'.Keep the GIR in mind while choosing'{gir}'.Explain the possibility of related CTH codes on certain conditions "

	# Call the API
	from gradio_client import Client
	client = Client("Qwen/Qwen1.5-110B-Chat-demo")
	response = client.predict(
	query=prompt,
	history=[],
	system="You are a helpful assistant.",
	api_name="/model_chat"
	)
	# Extract the API output text
	api_output = response[1] if response and len(response) > 1 else "No output received from the API."

	return api_output[0][1]

	# Create the Gradio interface
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown(
	"""
	# AI-powered Indian Customs Tariff Search with Explainability
	This app uses a combination of semantic, keyword, and ColBERT models to search the Indian customs tariff and choose the best matching CTH code with the power of interpreting with General Rules of Interpretation (GIR).
	"""
	)

	query_input = gr.Textbox(label="Enter Product Description", placeholder="e.g., fuel pump for elevator")
	result_output = gr.Textbox(label="HSN Prediction with Explanation", lines=10)

	query_input.submit(search_and_explain, inputs=query_input, outputs=result_output)

	demo.launch(debug=True)