example programΒΆ

You can download all the example programs here.

# -*- coding: utf-8 -*-
"""
Search texts for violations of prescriptive grammar rules.
"""

import nltk


# A helper function used by all the checking functions.

def tokenize_tag(sentence):
    """Tokenize a sentence and tag with POS tags.

    Arguments:
        sentence: A string

    Returns:
        List of (token, tag) tuples.
    """

    tokens = nltk.word_tokenize(sentence)

    return nltk.pos_tag(tokens, tagset='universal')


# The checking functions.

def endswith_preposition(sentence):
    """Check whether a sentence ends with a preposition.

    Examples:
        >>> endswith_preposition('Who did you go with?')
        True
        >>> endswith_preposition('With whom did you go?')
        False

    Arguments:
        sentence: A string

    Returns:
        Boolean.
    """

    pos = tokenize_tag(sentence)

    for x in reversed(pos):
        if x[1] != '.':
            return x[1] == 'ADP'

    return False


def split_infinitive(sentence):
    """Check whether a sentence contains a split infinitive.

    Examples:
        >>> split_infinitive('To boldly go.')
        True
        >>> split_infinitive('To go boldly.')
        True

    Arguments:
        sentence: A string

    Returns:
        Boolean.
    """

    pos = tokenize_tag(sentence)

    for i in range(len(pos) - 2):
        if pos[i][0].lower() == 'to':
            if pos[i+1][1] == 'ADV':
                if pos[i+2][1] == 'VERB':
                    return True

    return False


def startswith_conjunction(sentence):
    """Check whether a sentence starts with a conjunction.

    Examples:
        >>> startswith_conjunction('And it was all a dream.')
        True
        >>> startswith_conjunction('It was all a dream.')
        False

    Arguments:
        sentence: A string

    Returns:
        Boolean.
    """

    pos = tokenize_tag(sentence)

    for x in pos:
        if x[1] != '.':
            return x[1] == 'CONJ'

    return False


# An overall function that applies all the checking functions.

VIOLATION_LABELS = {
    'final preposition': endswith_preposition,
    'split infinitive': split_infinitive,
    'initial conjunction': startswith_conjunction,
}


def check_text(text):
    """Check a text for sentences that violate one of the three rules.

    Example:
        >>> check_text('And who would you like to boldly go with?')
        ('And who would you like to boldly go with?',
         ['final preposition', 'split infinitive', 'initial conjunction'])

    Arguments:
        text: A string

    Returns:
        Iterator of tuples of (sentence, [violations]).
    """

    for s in nltk.sent_tokenize(text):
        violations = [label for label, f in VIOLATION_LABELS.items() if f(s)]
        if violations:
            yield (s, violations)


# A demo showing violations of the rules in an example dialogue.

if __name__ == '__main__':

    text = """
    Hello, have you boldly been anywhere lately?
    Why would I want to boldly go anywhere?
    You might have someone special to boldly go with.
    But I don't.
    I have boldly been somewhere, ask me about it.
    Where have you boldly been then?
    To space, the final frontier.
    Did you boldly go with anyone?
    No, but I'd like to.
    And who would you like to boldly go with?
    Captain Kirk, he's a total dreamboat.
    """

    for s, violations in check_text(text):
        print(s, violations, '\n')