[Tutor] Writing to a file

Devansh Rastogi devanshr at gmail.com
Thu Jan 18 15:51:18 EST 2018


Hello,

I'm new to python and programming as such and as an exercise for I/O am
writing a small program that reads data from a .txt file, and analyzes the
text, ie, number of words/characters, avg. length of words, and frequency
of words and characters.

Once the text has been analyzed, the results are then written to a file.

After reading the documentation, and a bit about json, I'm afraid, I've
managed to completely confuse myself.

When do you actually use json or pickle, I understand that with data
written to .json files can be used by programs written in other languages,
and pickle is for python specific objects. So are there specific objects
for which .json is used or pickle is preferred? And if I'm just using
write() then I'm just writing to a file, and has nothing to do with json or
pickle? Also is it possible to append data to a already existing file? So
far it seems that everytime I'm calling my write function, its re-writing
the whole file with just the last variable called.
Ive added my code below, and am currently using json.dump() as  I would
like to send the file to a friend who is writing a similar program but with
a gui, and it would be nice if his program can read the data without
problems.

I realize these are pretty basic questions and am missing some basic
fundamentals. I'd be grateful if someone could point me in the right
direction, any tips would be highly appreciated.

from collections import Counter
import json

class Files:
    def __init__(self, filename):
        with open(filename, 'r', encoding='utf-16') as file_input:
            self.file_input_string = file_input.read().replace('\n', ' ')

    def num_of_words(self):
        """ Return number of words in the file"""
        return str(len(self.file_input_string.split()))

    def num_of_keystrokes(self):
        """ Total number of keystrokes
        # abcde.. = 1 stroke
        # ABCDE.. = 2 strokes
        # '.,-/;[]=\ = 1 stroke
        # !@#$%^&*()_+|}{":?>< = 2 strokes """

        lowercase_letters = sum(1 for c in self.file_input_string if
c.islower())
        uppercase_letters = sum(2 for c in self.file_input_string if
c.isupper())
        one_keystroke_punc = ".,-=[]\;'/ "  # space included
        puncuation_one = sum(1 for c in self.file_input_string if c in
one_keystroke_punc)
        two_keystroke_punc = '!@#$%^&*()_+|}{":?><'
        puncuation_two = sum(2 for c in self.file_input_string if c in
two_keystroke_punc)

        return str(lowercase_letters + uppercase_letters +
puncuation_one + puncuation_two)

    def num_of_char(self):
        """ Return number of characters in the string without spaces"""
        return str(len(self.file_input_string) -
self.file_input_string.count(" "))

    def frequency_of_char(self):
        """ Frequency of characters in the file """
        count = Counter(self.file_input_string)
        dict_count = dict(count)
        print("{:<12} {:<10}".format('Character', 'Frequency'))
        for k, v in dict_count.items():
            print("{:<12} {:<10}".format(k, v))

    def frequency_of_words(self):
        """ Frequency of words in the file"""
        # word_count = Counter()
        # for word in self.file_input_string.replace(' ', '\n'): ###
macht wider char. sollte fuer line funktioniern
        #     word_count.update(word)
        # print("{:<15} {:15}".format("Word", "Frequency"))
        # for k, v in word_count.items():
        #     print("{:<15} {:<15}".format(k, v))

        word_list = self.file_input_string.split()
        word_frequecy = [word_list.count(w) for w in word_list]  ##
funktioniert mit string.count!!
        word_frequecy_dict = dict(zip(word_list, word_frequecy))
        print("{:<15} {:15}".format("Word", "Frequency"))
        for k, v in word_frequecy_dict.items():
            print("{:<15} {:<15}".format(k, v))

    def average_len_of_words(self):
        """ calculate the averge length of the words"""
        word_list = self.file_input_string.split()
        average = sum(len(word) for word in word_list) / len(word_list)
        return str(average)

    def write_to_file(self, data):
        """ collect all data for Morgen_Kinder.txt in a file"""
        with open('data.json', 'w') as f:
            json.dump(data, f, sort_keys=True, indent=4)

#test
x = Files('Morgen_Kinder.txt')
a = Files.num_of_char(x)
Files.write_to_file(x,a)
print(a)
b = Files.num_of_words(x)
Files.write_to_file(x,b)
print(b)
c = Files.frequency_of_char(x)
Files.write_to_file(x,c)
d = Files.frequency_of_words(x)
Files.write_to_file(x,d)
e = Files.average_len_of_words(x)
Files.write_to_file(x,e)
print(e)
g = Files.num_of_keystrokes(x)
Files.write_to_file(x,g)
print(g)


More information about the Tutor mailing list