llm-book/ner-wikipedia-dataset
Updated • 134 • 2
How to use llm-book/bert-base-japanese-v3-ner-wikipedia-dataset with Transformers:
# Use a pipeline as a high-level helper
from transformers import pipeline
pipe = pipeline("token-classification", model="llm-book/bert-base-japanese-v3-ner-wikipedia-dataset") # Load model directly
from transformers import AutoTokenizer, AutoModelForTokenClassification
tokenizer = AutoTokenizer.from_pretrained("llm-book/bert-base-japanese-v3-ner-wikipedia-dataset")
model = AutoModelForTokenClassification.from_pretrained("llm-book/bert-base-japanese-v3-ner-wikipedia-dataset")「大規模言語モデル入門」の第6章で紹介している固有表現認識のモデルです。 cl-tohoku/bert-base-japanese-v3をllm-book/ner-wikipedia-datasetでファインチューニングして構築されています。
from transformers import pipeline
from pprint import pprint
ner_pipeline = pipeline(
model="llm-book/bert-base-japanese-v3-ner-wikipedia-dataset",
aggregation_strategy="simple",
)
text = "大谷翔平は岩手県水沢市出身のプロ野球選手"
# text中の固有表現を抽出
pprint(ner_pipeline(text))
# [{'end': None,
# 'entity_group': '人名',
# 'score': 0.99823624,
# 'start': None,
# 'word': '大谷 翔平'},
# {'end': None,
# 'entity_group': '地名',
# 'score': 0.9986874,
# 'start': None,
# 'word': '岩手 県 水沢 市'}]