--- pipeline_tag: tabular-classification tags: - sklearn language: - ko library_name: sklearn --- # label_encoder_map ```python label_encoder_map = { "의약품제형": LabelEncoder(), "색상앞": LabelEncoder(), "색상뒤": LabelEncoder(), "분할선앞": LabelEncoder(), "분할선뒤": LabelEncoder(), "제형코드명": LabelEncoder(), } ds = ds.add_column('의약품제형_encoded', label_encoder_map['의약품제형'].fit_transform(ds.to_pandas()['의약품제형'])) ds = ds.add_column('색상앞_encoded', label_encoder_map['색상앞'].fit_transform(ds.to_pandas()['색상앞'])) ds = ds.add_column('색상뒤_encoded', label_encoder_map['색상뒤'].fit_transform(ds.to_pandas()['색상뒤'])) ds = ds.add_column('분할선앞_encoded', label_encoder_map['분할선앞'].fit_transform(ds.to_pandas()['분할선앞'])) ds = ds.add_column('분할선뒤_encoded', label_encoder_map['분할선뒤'].fit_transform(ds.to_pandas()['분할선뒤'])) ds = ds.add_column('제형코드명_encoded', label_encoder_map['제형코드명'].fit_transform(ds.to_pandas()['제형코드명'])) ``` ```python knn = KNeighborsClassifier(n_neighbors=5, metric='cosine') knn.fit(ds.select_columns( ['의약품제형_encoded', '색상앞_encoded', '색상뒤_encoded', '분할선앞_encoded', '분할선뒤_encoded', '제형코드명_encoded']).to_pandas(), ds.select_columns("품목명").to_pandas()) ``` [Full code](https://gist.github.com/brainer3220/4176af5b013c9cd1dd419626f1a7b0d9) # Condensed-Co-Graph-And-Size-Graph ```python from datasets import load_dataset, disable_caching, Value import numpy as np from sklearn.preprocessing import LabelEncoder co_graph_edges = load_dataset('brainer/pill_identification_graph', 'co-graph-edges')['train'] co_graph_nodes = load_dataset('brainer/pill_identification_graph', 'co-graph-nodes')['train'] size_graph_edges = load_dataset('brainer/pill_identification_graph', 'size-graph-edges')['train'] size_graph_nodes = load_dataset('brainer/pill_identification_graph', 'size-graph-nodes')['train'] pill_ingredients_edges = load_dataset('brainer/pill_identification_graph', 'merge-hira-pill_identification-edges')['train'] pill_ingredients_nodes = load_dataset('brainer/pill_identification_graph', 'merge-hira-pill_identification-nodes')['train'] co_graph_nodes, co_graph_edges, size_graph_nodes, size_graph_edges, pill_ingredients_nodes, pill_ingredients_edges pill_identification_data = load_dataset('brainer/pill_identification_data', 'default') drug_name_encoder = LabelEncoder() gnl_nm_encoder = LabelEncoder() item_serial_number = pill_identification_data.cast_column('품목일련번호', Value(dtype='string'))['train']['품목일련번호'] drug_name_encoder.fit(list(set(np.asarray(size_graph_nodes['id'] + size_graph_edges['target'] + pill_ingredients_edges['target'] + item_serial_number)))) gnl_nm_encoder.fit(list(set(np.asarray(co_graph_nodes['id'] + pill_ingredients_nodes['id'])))) ```