# Bible parsing script # Designed to export a no-verse Bible # Code authored and released to the public domain by Paul Spooner #manually entered data #the file to read in RAWTEXTFILE = "kjv.txt" #the file to read out OUTFILE = "kjv_no_verse_links.html" #the in-text book prefixes MANUALBOOKPREFIXES = ("Meta", "Ge", "Exo", "Lev", "Num", "Deu", "Josh", "Jdgs", "Ruth", "1Sm", "2Sm", "1Ki", "2Ki", "1Chr", "2Chr", "Ezra", "Neh", "Est", "Job", "Psa", "Prv", "Eccl", "SSol", "Isa", "Jer", "Lam", "Eze", "Dan", "Hos", "Joel", "Amos", "Obad", "Jonah", "Mic", "Nahum", "Hab", "Zep", "Hag", "Zec", "Mal", "Mat", "Mark", "Luke", "John", "Acts", "Rom", "1Cor", "2Cor", "Gal", "Eph", "Phi", "Col", "1Th", "2Th", "1Tim", "2Tim", "Titus", "Phmn", "Heb", "Jas", "1Pet", "2Pet", "1Jn", "2Jn", "3Jn", "Jude", "Rev", "") #the expanded book titles MANUALBOOKTITLES = ("Metadata", "Genesis", "Exodus", "Leviticus", "Numbers", "Deuteronomy", "Joshua", "Judges", "Ruth", "First Samuel", "Second Samuel", "First Kings", "Second Kings", "First Chronicles", "Second Chronicles", "Ezra", "Nehemiah", "Esther", "Job", "Psalms", "Proverbs", "Ecclesiastes", "Song of Songs", "Isaiah", "Jeremiah", "Lamentation", "Ezekiel", "Daniel", "Hosea", "Joel", "Amos", "Obadiah", "Jonah", "Micah", "Nahum", "Habakkuk", "Zephaniah", "Haggai", "Zechariah", "Malachi", "Matthew", "Mark", "Luke", "John", "Acts", "Romans", "First Corinthians", "Second Corinthians", "Galatians", "Ephesians", "Philippians", "Colossians", "First Thessalonians", "Second Thessalonians", "First Timothy", "Second Timothy", "Titus", "Philemon", "Hebrews", "James", "First Peter", "Second Peter", "First John", "Second John", "Third John", "Jude", "Revelation", "") #import pickle f = open(RAWTEXTFILE) raw_data = f.read() f.close() print("Data imported. {} characters.".format(len(raw_data))) #break it into verses broken_data = raw_data.split("\n") versecount = len(broken_data) MANUALVERSECOUNT = 31106 if versecount == MANUALVERSECOUNT: print("Verse count correct") else: print("Total of {} verses, does not match cached value of {}. Check text for completeness.".format(versecount, MANUALVERSECOUNT)) #print(raw_data[:150]) #check to make sure the prefixes and titles are correct book_prefixes= [] for line in broken_data: prefix = line.split(" ")[0] prefix = prefix.rstrip("1234567890:") if prefix in book_prefixes: continue else: book_prefixes += [prefix] #prefix check for i in range(len(MANUALBOOKTITLES)): frst =MANUALBOOKPREFIXES[i] scnd = book_prefixes[i] if frst == scnd: continue else: print("Prefix Mismatch: ", frst, " :Is not the same as: ", scnd) print("Prefix check correct") #Book titles copied from http://www.o-bible.com/kjv.html book_titles = '''Metadata Genesis Exodus Leviticus Numbers Deuteronomy Joshua Judges Ruth 1 Samuel 2 Samuel 1 Kings 2 Kings 1 Chronicles 2 Chronicles Ezra Nehemiah Esther Job Psalms Proverbs Ecclesiastes Song of Songs Isaiah Jeremiah Lamentation Ezekiel Daniel Hosea Joel Amos Obadiah Jonah Micah Nahum Habakkuk Zephaniah Haggai Zechariah Malachi Matthew Mark Luke John Acts Romans 1 Corinthians 2 Corinthians Galatians Ephesians Philippians Colossians 1 Thessalonians 2 Thessalonians 1 Timothy 2 Timothy Titus Philemon Hebrews James 1 Peter 2 Peter 1 John 2 John 3 John Jude Revelation '''.replace("1", "First").replace("2", "Second").replace("3", "Third").split(" ") #title check for i in range(len(MANUALBOOKPREFIXES)): frst =MANUALBOOKTITLES[i] scnd = book_titles[i] if frst == scnd: continue else: print("Title Mismatch: ", frst, " :Is not the same as: ", scnd) print("Title check correct") #parse into list structure. #Item zero in each list is the title and metadata #Each subsequent item is either a sub-list, or the text content #The list is three nestings deep: Book, Chapter, Verse #Initializing structure vrsepos = 0 verse = ["Holy Bible, KJV, Parsed by Paul Spooner"] chappos = 0 chapter = [verse] bookpos = 0 book_title = book_titles[bookpos] book = [book_title,chapter] Bible = [book] for line in broken_data: #debug to keep from doing the whole text #if vrsepos > 12: break splitline = line.split() if len(splitline) == 0: print("Text end reached.") break first_item = splitline[0] prefix = first_item.rstrip("1234567890:") #have we moved to a new book? if prefix != book_prefixes[bookpos]: bookpos += 1 #if it still isn't right, we have a problem if prefix != book_prefixes[bookpos]: print("book prefix mismatch at {}. Should be {}.".format(first_item, book_prefixes[bookpos])) print("!!!Fatal Transcription Error: Structuring halted!!!") break book_title = book_titles[bookpos] vrsepos = 0 verse = [book_title] chappos = 0 chapter = [verse] book = [chapter] Bible += [book] chapverse = first_item[len(prefix):].split(":") #print(chapverse) chapteridx = int(chapverse[0]) verseidx = int(chapverse[1]) #have we moved to a new chapter? if chapteridx != chappos: #book += [chapter] chappos += 1 #if it still isn't right, we have a problem if chapteridx != chappos: print("chapter mismatch at {}. Should be {}.".format(first_item, chappos)) print("!!!Fatal Transcription Error: Structuring halted!!!") break vrsepos = 0 verse = ["Chapter {}".format(chappos)] chapter = [verse] book += [chapter] #Does the verse number match up? vrsepos += 1 if verseidx != vrsepos: print("Verse mismatch at {}. Check numbering and order.".format(first_item)) print("!!!Fatal Transcription Error: Structuring halted!!!") break title = "{} {}:{} ".format(book_title, chapteridx, verseidx) verse = [title] verse += [" ".join(splitline[1:])] chapter += [verse] #print(verse) print("Text structure process complete") #Make a verse-less version nv_Bible = [Bible[0]] #raw_out = "Holy Bible Chapters\n" raw_out = "KJV Bible

Holy Bible

" #Remove verse data booknum = 0 for book in Bible: bookname = book[0][0][0] raw_out += '

The Book of {}

'.format(MANUALBOOKPREFIXES[booknum], bookname) chapnum = 0 no_verse_book = [bookname,] nv_Bible += [no_verse_book] for chapter in book[1:]: chapnum += 1 contents = " ".join([i[1] for i in chapter[1:]]) #no_verse_book += [contents] linktext = '{}_{}'.format(MANUALBOOKPREFIXES[booknum], chapnum) raw_out += '

{} Chapter {}

'.format(linktext, bookname, chapnum) raw_out += "

{}

".format(contents) booknum += 1 raw_out += '' #Export the data for later use f = open(OUTFILE, mode='w') f.write(raw_out) f.close()