#!/usr/bin/env python

# carrel2tsv.py - given a few configurations, output bibliographics in the form of TSV

# Eric Lease Morgan <emorgan@nd.edu>
# (c) University of Notre Dame; distributed under a GNU Public License

# April 12, 2024 - first cut; illustrating the functionality of HTTP content negotiation


# configure
CARREL   = 'subject-englandAndWales-freebo'
HOST     = 'http://carrels.distantreader.org'
MIMETYPE = 'application/json'
CACHE    = 'cache'
COLUMNS  = [ 'author', 'title', 'url' ]

# require
from requests import get
from json     import loads
from pandas   import DataFrame

# build a request, submit it, and get the content of the response; get the metadata
url     = HOST + '/' + CARREL
headers = { 'Accept':MIMETYPE }
json    = get( url, headers=headers ).text

# format the data into json and iterate over it; create a list of records
metadata = loads( json )
records  = []
for item in metadata : 

	# parse
	identifier = item[ 'id' ]
	author     = item[ 'author' ]
	title      = item[ 'title' ]
	extension  = item[ 'extension' ]
	
	# update the list of records
	records.append( [ author, title, '/'.join( [ HOST, CARREL, CACHE, identifier + extension ] ) ] )

# create a data frame, output, and done
metadata = DataFrame( records, columns=COLUMNS )
print( metadata.to_csv( index=False, sep='\t' ) )
exit()