A simple and fast library/code to store & manage variable data

Question 1

I've been searching through the different ways to store data and found some helpful and actually great libs like:

JSON
XML

And some neh:

text file basic write and read
some binary type data storing?
some other database types...

Yet still I didn't find an easy and light library with basic storing mechanism/way (In my whole search; tell me others if you know :)

So I wanted to create a type of library to store and manage data in special file type extension which I call '.var'. The lib will provide access to all sorts of the specified file type data manipulation methods.

lib pros:

single light file lib
faster than JSON, XML...
Easily understood by humans and reads by robots/computers
can be very flexible when commenting('--', '//', '#') or initializing a variable('tab', '=', ':')

example '.var' file looks like:

#This is a comment using '#'
--This is a comment using '--'
//This is a comment using '//'
NAME alex
IP = 127.0.0.1
port: 1024

lib py file code:

from dataclasses import replace
class varFile:
 isKey_CaseSensitive = None
 
 varFileLocation = None
 varFileText = None
 varFileLines = None
 
 
 def __init__(self, file_location='', file_text='', isKey_CaseSensitive=False):
 self.varFileLocation = file_location
 self.varFileText = file_text
 
 self.isKey_CaseSensitive = isKey_CaseSensitive
 
 
 def SplitLines(self):
 self.varFileLines = self.varFileText.split('\n')
 #print(self.varFileLines) # used to output the list of 'varFileLines'
 
 
 
 def readVarFile(self, file_location=''):
 file = None
 if(file_location != ''):# if file_location parameter was provided
 file = open(file_location, "rt")
 else:# if file_location parameter was NOT provided
 file = open(self.varFileLocation, "rt")
 
 self.varFileText = file.read()
 file.close()
 
 self.SplitLines()
 
 def writeVarFile(self, file_location=''):
 file = None
 if(file_location != ''):# if file_location parameter was provided
 file = open(file_location, "wt")
 else:# if file_location parameter was NOT provided
 file = open(self.varFileLocation, "wt")
 
 file.read(self.varFileText)
 file.close()
 
 
 
 def getVarFileText(self):# also regenerates 'varFileText' from the list 'varFileLines'
 self.varFileText = ''
 for var_line in self.varFileLines:
 self.varFileText += var_line + '\n'
 
 return self.varFileText
 
 
 def getVarParts(self, var_line_text):
 if('=' not in var_line_text and ':' not in var_line_text and '\t' not in var_line_text): #check variable line if may not be properly formated
 raise Exception("varFile: var_line_text has no initializer(e.g. '=')\n" + "var_line_text: " + var_line_text)
 
 #three sections_parts:
 # 1 = key
 # 2 initializer
 # 3 value
 section_part='1'
 part_key=''
 part_initializer=''
 part_value=''
 
 for i in range(len(var_line_text)):
 if(section_part=='1'):
 if(var_line_text[i] != '=' and var_line_text[i] != ':' and var_line_text[i] != '\t'):
 part_key += var_line_text[i]
 else:
 #section_part='2'
 part_initializer=var_line_text[i] # here section part 2 is completed, then proceed to section part 3
 section_part='3'
 
 elif(section_part=='3'):
 part_value+=var_line_text[i]
 
 return [part_key.strip(), part_initializer, part_value.strip()]
 
 
#
#
# HERE starts the ".var" file data mainpulation functions
#
#
 
 
 def getValueByKey(self, key):
 if(self.isKey_CaseSensitive==False):key=key.lower()#if option 'isKey_CaseSensitive' false, then the search will be case-INsensitive
 
 value = None
 for variable_line in self.varFileLines:
 variable_line = variable_line.lstrip()#trim line for unwanted starting space
 if(self.isKey_CaseSensitive==False):variable_line=variable_line.lower()#if option 'isKey_CaseSensitive' false, then the search will be case-INsensitive
 if(variable_line.startswith(key)): 
 #print(variable_line)
 value = self.getVarParts(variable_line)[2]
 break
 
 return value
 
 
 def getValueByLineNumber(self, line_number):
 return self.getVarParts(self.varFileLines[line_number-1])[2]
 
 
 
 
 
 
 def setValueByKey(self, key, value):
 for i in range(len(self.varFileLines)):
 if(self.isKey_CaseSensitive==False):#if option 'isKey_CaseSensitive' false, then the search will be case-INsensitive
 if(self.varFileLines[i].lower().startswith(key.lower())):
 self.varFileLines[i] = value
 break
 
 else:#else then the search will be case-sensitive
 if(self.varFileLines[i].startswith(key)):
 self.varFileLines[i] = value
 break
 
 self.getVarFileText(self)#regenerate 'varFileText' from the list 'varFileLines'
 
 
 
 def setValueByLineNumber(self, line_number, value):
 self.varFileLines[line_number-1] = value
 
 self.getVarFileText(self)#regenerate 'varFileText' from the list 'varFileLines'
 
 
 
 
 
 
 
 
 def getVarByLineNumber(self, line_number):
 return self.getVarParts(self.varFileLines[line_number-1])
 
 
 
 
 
 
 
 def getAbsLineAt(self, line_number):
 return self.varFileLines[line_number-1]
 
 
 
 
 # absolute line (e.g. "name: alex" OR can be a comment "--this' a comment" )
 def appendAbsLineAt(self, abs_line, line_number=0):
 if(line_number==0):#if 'line_number' is 0 then append to the end(this is a special case number)
 self.varFileLines.append(abs_line)
 else:# if 'line_number' is > 0 then make it at the specified position line (e.g. 'line_number' is 1, then it will be the fist line..., and so for 2, second line)
 self.varFileLines.insert(line_number-1, abs_line)
 self.getVarFileText(self)#regenerate 'varFileText' from the list 'varFileLines'
 
 
 
 
 def replaceAbsLineByLineNumber(self, line_number, replace_with):
 self.varFileLines[line_number-1] = replace_with
 self.getVarFileText(self)
 
 
 def removeAbsLineByKey(self, key):# can be only used to remove variables(e.g. "name=alex")
 for i in range(len(self.varFileLines)):
 if(self.isKey_CaseSensitive==False):#if option 'isKey_CaseSensitive' false, then the search will be case-INsensitive
 if(self.varFileLines[i].lower().startswith(key.lower())):
 del self.varFileLines[i]
 break
 
 else:#else then the search will be case-sensitive
 if(self.varFileLines[i].startswith(key)):
 del self.varFileLines[i]
 break
 
 self.getVarFileText(self)#regenerate 'varFileText' from the list 'varFileLines'
 
 
 def removeAbsLineByLineNumber(self, line_number):# can be used to remove either variables(e.g. "name=alex") OR comments
 del self.varFileLines[line_number-1]
 self.getVarFileText(self)#regenerate 'varFileText' from the list 'varFileLines'
 
 
 
 
 
 # no need for this any more since function 'appendAbsLineAt()' exists and is all comprehensive and shiny lol
 # var line (e.g. ['name'],[':'],['alex'])
 #def appendVarLine(self, var_line, line_number=''):
 # pass
 
 
 
#
#
# HERE ends the ".var" file data mainpulation functions
#
#
 
 
 
 
 
 
 
 
 
 def convertValueToInt(self, value):
 return int(''.join(value.split()))
 
 
 def convertValueToBoolean(self, value):
 boolean_value = None
 if(value.lower() in ('true', 't', 'yes', 'y', '1')): boolean_value=True#if 'boolean_value' was detected as TRUE
 elif(value.lower() in ('false', 'f', 'no', 'n', '0')): boolean_value=False#if 'boolean_value' was detected as FALSE
 return boolean_value
 
 
 #not yet implemented!
 def convertValueToBytes(self, value):
 pass
 
 
 def clear():# note the function DOESNT clear the variable 'varFileLocation'
 varFile.varFileText = ''
 varFile.varFileLines = ''

Reference file in github

Interface

using the lib interface/methods to print data:

from varFile import varFile
test_varFile = varFile('../test.var')
test_varFile.readVarFile()
#variable value is retrieved by its key(name), which is e.g. 'NAME'
print('My name is ' + test_varFile.getValueByKey('NAME'))
#variable value is retrieved by the line number, which is e.g. 7
print('My Ip is ' + test_varFile.getValueByLineNumber(7))
#entire variable line is retrieved by the line number, which is e.g. 8
print('\nfull variable line:\n' + test_varFile.getAbsLineAt(8))

Currently the lib is written in python only, but if you think it's good and worth using, I'll write it in different languages.

The source is on GitHub.

Question 2

Have you tried pickle?

Question 3

nope, tho I just read about, and from what I've gathered is that it transforms an object in python exclusively to a byte stream file. Its good in some cases imo, tho still not human readable(like JSON, or pure text), still good for storing data a human may not change the file by himself

Question 4

I'm trying to find preferably some lib the can store and manage data easily like in OP .var file example, imo that would make it prefrect for storing a special case/type of data to be store... it can be easily modified by the user in the file system by any text editor, and contain good meta data/comments to help clarify data variables, not to mention its lighter and faster than other libs, hence the simple storing and manipulation of data mechanism.

Question 5

Incorrect Python

Some parts of the code is incorrect Python, and will raise exceptions when executed. I guess you missed these because your tests don't execute these lines.

Calls like this, passing self as parameter, which is unexpected:

self.getVarFileText(self)

The self parameter should be removed.

In writeVarFile, this line is most certainly an error:

file.read(self.varFileText)

The file.read function expects an int, and self.varFileText is a str.

Suspicious Python

In code like this:

class VarFile:
 varFileLocation = None
 def __init__(self, file_location='', file_text='', isKey_CaseSensitive=False):
 self.varFileLocation = file_location
foo = VarFile()

Be aware that VarFile.varFileLocation and foo.varFileLocation are not the same thing. The first one is a class variable, the second is an instance variable.

All the class variables in the posted code seem to be unintended.

A related issue is this clear function inside the class:

def clear():
 varFile.varFileText = ''
 varFile.varFileLines = ''

Since the function doesn't take self as argument, it looks like a class function, which some tools report as an error. I believe the intention was more like this:

def clear(self):
 self.varFileText = ''
 self.varFileLines = ''

Python style

I suggest to read and follow PEP 8 – Style Guide for Python Code

The posted code doesn't follow the style guide well, which makes it difficult to read. I call out a few bigger points, but please do read that doc and follow it.

Classes should use PascalCase naming, for example VarFile
Functions should use snake_case, for example read_var_file
There should not be large blocks of blank lines
Avoid foo == False, the idiomatic Python is to write not foo
Always break the line after :, for example in if cond: pass
Avoid redundant parentheses, for example in if (cond):
Use inline comments sparingly. Most of not all inline comments in the posted code would be better on their own line.
if (file_location != ''): should be written simpler as if file_location:

I suggest to use a code editor such as PyCharm, which has built in tools to re-format the code nicely, and also calls out practices and violations of PEP8.

Consider encapsulation and information hiding

A class should hide implementation details are not relevant for its users. It makes the API easier to understand, and it helps ensure the integrity of the class.

These functions are for internal use, therefore they should have a name starting with _, to signal to readers that they are private: SplitLines, getVarFileText, getVarParts, and all the convert* functions.

What are the interesting functions for users of the class? The ones that get or set values. Those are the only functions that should be non-private.

Consider the essential data of a class

The class has varFileText and varFileLines. Judging by their names, both could contain the relevant data parsed from the storage file, but which one is the canonical source?

After reading the code, it turns out that self.varFileText is just a middle man: the get* methods use self.varFileLines as the source, and self.varFileText is repeatedly overwritten.

There's no need for self.varFileText, remove it. The code will be simpler, it will be clear where the relevant data is.

Consider the cost of flexibility

The API allows different kinds of comment symbols, in the name of flexibility. Be aware that flexibility can lead to complexity, and to religious wars. The support for flexibility requires more code, more test code, and with that it opens opportunities for more bugs. Some users will prefer one commenting style over another, some will use inconsistent commenting style with or without reason. Style guides will emerge recommending one writing style over another. Religious wars.

Sometimes it's good to have an opinion, and be unburdened by the responsibility of choice. I suggest to choose one commenting style to support. Whichever one.

Use context manager for file operations

The recommended idiom to work with files looks like this:

with open(path) as fh:
 ...

When the code leaves the with block, the file will get closed correctly, and no need to call fh.close().

When not using with, you must remember to call fh.close(), and also to handle exceptions. It's a lot easier to just use with.

Other issues

file = None is unnecessary, remove it.

This import is not used, remove it: from dataclasses import replace

Don't use empty string to mean "not provided". It's better to use None for that. For example instead of this:

def readVarFile(self, file_location=''):
 if file_location != '': # if file_location parameter was provided
 file = open(file_location, "rt")
 else: # if file_location parameter was NOT provided
 file = open(self.varFileLocation, "rt")
 self.varFileText = file.read()
 file.close()
 self.varFileLines = self.varFileText.split('\n')

A better way to write would be (including some other suggestions above):

def readVarFile(self, path=None):
 if not path:
 path = self.varFileLocation
 with open(path, "rt") as fh:
 self.varFileLines = fh.readlines()

Instead of this:

def getVarFileText(self): # also regenerates 'varFileText' from the list 'varFileLines'
 self.varFileText = ''
 for var_line in self.varFileLines:
 self.varFileText += var_line + '\n'
 return self.varFileText

Use join:

def getVarFileText(self):
 return '\n'.join(self.varFileLines) + '\n'

Question 6

Let's see if I understand your criteria correctly:

"Store and manage data easily" (built-in json and pickle already meet this)
"Human-readable" (json and xml built-ins already meet this)
"Light": guess what's lighter than a custom library? No custom libraries. Using built-ins is lighter than a custom library.
"fast": you have not shown any evidence that your implementation is faster, so I will very much not believe this until I see it.
"Supporting comments and good metadata" and "fast" are in conflict, but the former is already met by xml

if you think it's good and worth using

No, and (pardon me for saying this, but) it never will be, since it's non-standard, and won't ever be as well-tested, well-adopted, well-supported, well-documented and well-optimised as the known alternatives.

As I see it, the only reason for you to pursue this is fun or practice - and both are worthy reasons!

As for the code itself:

Classes should be in TitleCase, as in VarFile.

Add PEP484 type hints.

Variables should be in lower_snake_case, as in is_key_case_sensitive.

Don't surround if predicates in () parens: this isn't C/Java/etc.

Protect your open() calls in context management with statements, and don't explicitly close().

Don't iteratively build a string using concatenation +. This on its own guarantees that your implementation will perform poorly as it's O(n^2) for most commonly used versions of Python.

Don't raise Exception. If you want this to be a general-purpose library, you need to derive your own exception types.

mainpulation is spelled manipulation.

for i in range(len(self.varFileLines)) fails to "loop like a native": instead you should for line in self.varFileLines, and rewrite your implementation to avoid mutating varFileLines during setValueByKey.

Don't self.isKey_CaseSensitive == False; use not self.is_key_case_sensitive instead.

Accepting any of 'true', 't', 'yes', 'y', '1' for your booleans does not lend itself to a well-designed protocol; you should be more restrictive so that the values are more predictable. Think: this is not a human-computer interface, primarily; it's a computer-computer interface for serialisation that should not concern itself with UI matters like permissive boolean parsing.

janos janos 113k15 gold badges154 silver badges396 bronze badges · Accepted Answer · 2022-09-17 14:23:36Z

Incorrect Python

Some parts of the code is incorrect Python, and will raise exceptions when executed. I guess you missed these because your tests don't execute these lines.

Calls like this, passing self as parameter, which is unexpected:

self.getVarFileText(self)

The self parameter should be removed.

In writeVarFile, this line is most certainly an error:

file.read(self.varFileText)

The file.read function expects an int, and self.varFileText is a str.

Suspicious Python

In code like this:

class VarFile:
 varFileLocation = None
 def __init__(self, file_location='', file_text='', isKey_CaseSensitive=False):
 self.varFileLocation = file_location
foo = VarFile()

Be aware that VarFile.varFileLocation and foo.varFileLocation are not the same thing. The first one is a class variable, the second is an instance variable.

All the class variables in the posted code seem to be unintended.

A related issue is this clear function inside the class:

def clear():
 varFile.varFileText = ''
 varFile.varFileLines = ''

Since the function doesn't take self as argument, it looks like a class function, which some tools report as an error. I believe the intention was more like this:

def clear(self):
 self.varFileText = ''
 self.varFileLines = ''

Python style

I suggest to read and follow PEP 8 – Style Guide for Python Code

The posted code doesn't follow the style guide well, which makes it difficult to read. I call out a few bigger points, but please do read that doc and follow it.

Classes should use PascalCase naming, for example VarFile
Functions should use snake_case, for example read_var_file
There should not be large blocks of blank lines
Avoid foo == False, the idiomatic Python is to write not foo
Always break the line after :, for example in if cond: pass
Avoid redundant parentheses, for example in if (cond):
Use inline comments sparingly. Most of not all inline comments in the posted code would be better on their own line.
if (file_location != ''): should be written simpler as if file_location:

I suggest to use a code editor such as PyCharm, which has built in tools to re-format the code nicely, and also calls out practices and violations of PEP8.

Consider encapsulation and information hiding

A class should hide implementation details are not relevant for its users. It makes the API easier to understand, and it helps ensure the integrity of the class.

These functions are for internal use, therefore they should have a name starting with _, to signal to readers that they are private: SplitLines, getVarFileText, getVarParts, and all the convert* functions.

What are the interesting functions for users of the class? The ones that get or set values. Those are the only functions that should be non-private.

Consider the essential data of a class

The class has varFileText and varFileLines. Judging by their names, both could contain the relevant data parsed from the storage file, but which one is the canonical source?

After reading the code, it turns out that self.varFileText is just a middle man: the get* methods use self.varFileLines as the source, and self.varFileText is repeatedly overwritten.

There's no need for self.varFileText, remove it. The code will be simpler, it will be clear where the relevant data is.

Consider the cost of flexibility

The API allows different kinds of comment symbols, in the name of flexibility. Be aware that flexibility can lead to complexity, and to religious wars. The support for flexibility requires more code, more test code, and with that it opens opportunities for more bugs. Some users will prefer one commenting style over another, some will use inconsistent commenting style with or without reason. Style guides will emerge recommending one writing style over another. Religious wars.

Sometimes it's good to have an opinion, and be unburdened by the responsibility of choice. I suggest to choose one commenting style to support. Whichever one.

Use context manager for file operations

The recommended idiom to work with files looks like this:

with open(path) as fh:
 ...

When the code leaves the with block, the file will get closed correctly, and no need to call fh.close().

When not using with, you must remember to call fh.close(), and also to handle exceptions. It's a lot easier to just use with.

Other issues

file = None is unnecessary, remove it.

This import is not used, remove it: from dataclasses import replace

Don't use empty string to mean "not provided". It's better to use None for that. For example instead of this:

def readVarFile(self, file_location=''):
 if file_location != '': # if file_location parameter was provided
 file = open(file_location, "rt")
 else: # if file_location parameter was NOT provided
 file = open(self.varFileLocation, "rt")
 self.varFileText = file.read()
 file.close()
 self.varFileLines = self.varFileText.split('\n')

A better way to write would be (including some other suggestions above):

def readVarFile(self, path=None):
 if not path:
 path = self.varFileLocation
 with open(path, "rt") as fh:
 self.varFileLines = fh.readlines()

Instead of this:

def getVarFileText(self): # also regenerates 'varFileText' from the list 'varFileLines'
 self.varFileText = ''
 for var_line in self.varFileLines:
 self.varFileText += var_line + '\n'
 return self.varFileText

Use join:

def getVarFileText(self):
 return '\n'.join(self.varFileLines) + '\n'

Stack Exchange Network

A simple and fast library/code to store & manage variable data

lib py file code:

Interface

2 Answers 2

Incorrect Python

Suspicious Python

Python style

Consider encapsulation and information hiding

Consider the essential data of a class

Consider the cost of flexibility

Use context manager for file operations

Other issues

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

A simple and fast library/code to store & manage variable data

lib py file code:

Interface

2 Answers 2

Incorrect Python

Suspicious Python

Python style

Consider encapsulation and information hiding

Consider the essential data of a class

Consider the cost of flexibility

Use context manager for file operations

Other issues

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions