#!/usr/bin/env python # summarize.py - given a file, use an LLM to summarize it # see: https://medium.com/@ryver.dev/building-a-simple-ai-powered-text-summarizer-with-transformers-in-python-0a31c848e1d2 # Eric Lease Morgan # (c) University of Notre Dame; distributed under a GNU Public License # April 28, 2024 - first cut # configure MODEL = 'philschmid/bart-large-cnn-samsum' TENSOR = 'pt' LENGTH = 1024 TRUNCATION = True PROMPT = 'summarize: ' PENALTY = 2 BEAMS = 4 EARLY = True SKIPTOKENS = True # require from pathlib import Path from transformers import BartTokenizer, BartForConditionalGeneration from sys import argv, exit # get input if len( argv ) != 2 : exit( "Usage: " + argv[ 0 ] + " " ) file = argv[ 1 ] # initialize tokenizer = BartTokenizer.from_pretrained( MODEL ) model = BartForConditionalGeneration.from_pretrained( MODEL ) # read the given file with open( file ) as handle : text = handle.read() # do the work inputs = tokenizer.encode( PROMPT + text, return_tensors=TENSOR, max_length=LENGTH, truncation=TRUNCATION ) identifiers = model.generate( inputs, max_length=150, min_length=50, length_penalty=PENALTY, num_beams=BEAMS, early_stopping=EARLY ) summary = tokenizer.decode( identifiers[ 0 ], skip_special_tokens=SKIPTOKENS ) # output and done print( summary ) exit()