I've been searching through the different ways to store data and found some helpful and actually great libs like:
- JSON
- XML
And some neh:
- text file basic write and read
- some binary type data storing?
- some other database types...
Yet still I didn't find an easy and light library with basic storing mechanism/way (In my whole search; tell me others if you know :)
So I wanted to create a type of library to store and manage data in special file type extension which I call '.var'. The lib will provide access to all sorts of the specified file type data manipulation methods.
lib pros:
- single light file lib
- faster than JSON, XML...
- Easily understood by humans and reads by robots/computers
- can be very flexible when commenting('--', '//', '#') or initializing a variable('tab', '=', ':')
example '.var' file looks like:
#This is a comment using '#'
--This is a comment using '--'
//This is a comment using '//'
NAME alex
IP = 127.0.0.1
port: 1024
lib py file code:
from dataclasses import replace
class varFile:
isKey_CaseSensitive = None
varFileLocation = None
varFileText = None
varFileLines = None
def __init__(self, file_location='', file_text='', isKey_CaseSensitive=False):
self.varFileLocation = file_location
self.varFileText = file_text
self.isKey_CaseSensitive = isKey_CaseSensitive
def SplitLines(self):
self.varFileLines = self.varFileText.split('\n')
#print(self.varFileLines) # used to output the list of 'varFileLines'
def readVarFile(self, file_location=''):
file = None
if(file_location != ''):# if file_location parameter was provided
file = open(file_location, "rt")
else:# if file_location parameter was NOT provided
file = open(self.varFileLocation, "rt")
self.varFileText = file.read()
file.close()
self.SplitLines()
def writeVarFile(self, file_location=''):
file = None
if(file_location != ''):# if file_location parameter was provided
file = open(file_location, "wt")
else:# if file_location parameter was NOT provided
file = open(self.varFileLocation, "wt")
file.read(self.varFileText)
file.close()
def getVarFileText(self):# also regenerates 'varFileText' from the list 'varFileLines'
self.varFileText = ''
for var_line in self.varFileLines:
self.varFileText += var_line + '\n'
return self.varFileText
def getVarParts(self, var_line_text):
if('=' not in var_line_text and ':' not in var_line_text and '\t' not in var_line_text): #check variable line if may not be properly formated
raise Exception("varFile: var_line_text has no initializer(e.g. '=')\n" + "var_line_text: " + var_line_text)
#three sections_parts:
# 1 = key
# 2 initializer
# 3 value
section_part='1'
part_key=''
part_initializer=''
part_value=''
for i in range(len(var_line_text)):
if(section_part=='1'):
if(var_line_text[i] != '=' and var_line_text[i] != ':' and var_line_text[i] != '\t'):
part_key += var_line_text[i]
else:
#section_part='2'
part_initializer=var_line_text[i] # here section part 2 is completed, then proceed to section part 3
section_part='3'
elif(section_part=='3'):
part_value+=var_line_text[i]
return [part_key.strip(), part_initializer, part_value.strip()]
#
#
# HERE starts the ".var" file data mainpulation functions
#
#
def getValueByKey(self, key):
if(self.isKey_CaseSensitive==False):key=key.lower()#if option 'isKey_CaseSensitive' false, then the search will be case-INsensitive
value = None
for variable_line in self.varFileLines:
variable_line = variable_line.lstrip()#trim line for unwanted starting space
if(self.isKey_CaseSensitive==False):variable_line=variable_line.lower()#if option 'isKey_CaseSensitive' false, then the search will be case-INsensitive
if(variable_line.startswith(key)):
#print(variable_line)
value = self.getVarParts(variable_line)[2]
break
return value
def getValueByLineNumber(self, line_number):
return self.getVarParts(self.varFileLines[line_number-1])[2]
def setValueByKey(self, key, value):
for i in range(len(self.varFileLines)):
if(self.isKey_CaseSensitive==False):#if option 'isKey_CaseSensitive' false, then the search will be case-INsensitive
if(self.varFileLines[i].lower().startswith(key.lower())):
self.varFileLines[i] = value
break
else:#else then the search will be case-sensitive
if(self.varFileLines[i].startswith(key)):
self.varFileLines[i] = value
break
self.getVarFileText(self)#regenerate 'varFileText' from the list 'varFileLines'
def setValueByLineNumber(self, line_number, value):
self.varFileLines[line_number-1] = value
self.getVarFileText(self)#regenerate 'varFileText' from the list 'varFileLines'
def getVarByLineNumber(self, line_number):
return self.getVarParts(self.varFileLines[line_number-1])
def getAbsLineAt(self, line_number):
return self.varFileLines[line_number-1]
# absolute line (e.g. "name: alex" OR can be a comment "--this' a comment" )
def appendAbsLineAt(self, abs_line, line_number=0):
if(line_number==0):#if 'line_number' is 0 then append to the end(this is a special case number)
self.varFileLines.append(abs_line)
else:# if 'line_number' is > 0 then make it at the specified position line (e.g. 'line_number' is 1, then it will be the fist line..., and so for 2, second line)
self.varFileLines.insert(line_number-1, abs_line)
self.getVarFileText(self)#regenerate 'varFileText' from the list 'varFileLines'
def replaceAbsLineByLineNumber(self, line_number, replace_with):
self.varFileLines[line_number-1] = replace_with
self.getVarFileText(self)
def removeAbsLineByKey(self, key):# can be only used to remove variables(e.g. "name=alex")
for i in range(len(self.varFileLines)):
if(self.isKey_CaseSensitive==False):#if option 'isKey_CaseSensitive' false, then the search will be case-INsensitive
if(self.varFileLines[i].lower().startswith(key.lower())):
del self.varFileLines[i]
break
else:#else then the search will be case-sensitive
if(self.varFileLines[i].startswith(key)):
del self.varFileLines[i]
break
self.getVarFileText(self)#regenerate 'varFileText' from the list 'varFileLines'
def removeAbsLineByLineNumber(self, line_number):# can be used to remove either variables(e.g. "name=alex") OR comments
del self.varFileLines[line_number-1]
self.getVarFileText(self)#regenerate 'varFileText' from the list 'varFileLines'
# no need for this any more since function 'appendAbsLineAt()' exists and is all comprehensive and shiny lol
# var line (e.g. ['name'],[':'],['alex'])
#def appendVarLine(self, var_line, line_number=''):
# pass
#
#
# HERE ends the ".var" file data mainpulation functions
#
#
def convertValueToInt(self, value):
return int(''.join(value.split()))
def convertValueToBoolean(self, value):
boolean_value = None
if(value.lower() in ('true', 't', 'yes', 'y', '1')): boolean_value=True#if 'boolean_value' was detected as TRUE
elif(value.lower() in ('false', 'f', 'no', 'n', '0')): boolean_value=False#if 'boolean_value' was detected as FALSE
return boolean_value
#not yet implemented!
def convertValueToBytes(self, value):
pass
def clear():# note the function DOESNT clear the variable 'varFileLocation'
varFile.varFileText = ''
varFile.varFileLines = ''
Reference file in github
Interface
using the lib interface/methods to print data:
from varFile import varFile
test_varFile = varFile('../test.var')
test_varFile.readVarFile()
#variable value is retrieved by its key(name), which is e.g. 'NAME'
print('My name is ' + test_varFile.getValueByKey('NAME'))
#variable value is retrieved by the line number, which is e.g. 7
print('My Ip is ' + test_varFile.getValueByLineNumber(7))
#entire variable line is retrieved by the line number, which is e.g. 8
print('\nfull variable line:\n' + test_varFile.getAbsLineAt(8))
Currently the lib is written in python only, but if you think it's good and worth using, I'll write it in different languages.
The source is on GitHub.
-
\$\begingroup\$ Have you tried pickle? \$\endgroup\$Reinderien– Reinderien2022年09月16日 15:47:45 +00:00Commented Sep 16, 2022 at 15:47
-
\$\begingroup\$ nope, tho I just read about, and from what I've gathered is that it transforms an object in python exclusively to a byte stream file. Its good in some cases imo, tho still not human readable(like JSON, or pure text), still good for storing data a human may not change the file by himself \$\endgroup\$user264933– user2649332022年09月17日 03:38:53 +00:00Commented Sep 17, 2022 at 3:38
-
\$\begingroup\$ I'm trying to find preferably some lib the can store and manage data easily like in OP .var file example, imo that would make it prefrect for storing a special case/type of data to be store... it can be easily modified by the user in the file system by any text editor, and contain good meta data/comments to help clarify data variables, not to mention its lighter and faster than other libs, hence the simple storing and manipulation of data mechanism. \$\endgroup\$user264933– user2649332022年09月17日 03:45:12 +00:00Commented Sep 17, 2022 at 3:45
2 Answers 2
Incorrect Python
Some parts of the code is incorrect Python, and will raise exceptions when executed. I guess you missed these because your tests don't execute these lines.
Calls like this, passing self
as parameter, which is unexpected:
self.getVarFileText(self)
The self
parameter should be removed.
In writeVarFile
, this line is most certainly an error:
file.read(self.varFileText)
The file.read
function expects an int
, and self.varFileText
is a str
.
Suspicious Python
In code like this:
class VarFile: varFileLocation = None def __init__(self, file_location='', file_text='', isKey_CaseSensitive=False): self.varFileLocation = file_location foo = VarFile()
Be aware that VarFile.varFileLocation
and foo.varFileLocation
are not the same thing.
The first one is a class variable, the second is an instance variable.
All the class variables in the posted code seem to be unintended.
A related issue is this clear
function inside the class:
def clear(): varFile.varFileText = '' varFile.varFileLines = ''
Since the function doesn't take self
as argument,
it looks like a class function,
which some tools report as an error.
I believe the intention was more like this:
def clear(self):
self.varFileText = ''
self.varFileLines = ''
Python style
I suggest to read and follow PEP 8 – Style Guide for Python Code
The posted code doesn't follow the style guide well, which makes it difficult to read. I call out a few bigger points, but please do read that doc and follow it.
- Classes should use PascalCase naming, for example
VarFile
- Functions should use snake_case, for example
read_var_file
- There should not be large blocks of blank lines
- Avoid
foo == False
, the idiomatic Python is to writenot foo
- Always break the line after
:
, for example inif cond: pass
- Avoid redundant parentheses, for example in
if (cond):
- Use inline comments sparingly. Most of not all inline comments in the posted code would be better on their own line.
if (file_location != ''):
should be written simpler asif file_location:
I suggest to use a code editor such as PyCharm, which has built in tools to re-format the code nicely, and also calls out practices and violations of PEP8.
Consider encapsulation and information hiding
A class should hide implementation details are not relevant for its users. It makes the API easier to understand, and it helps ensure the integrity of the class.
These functions are for internal use,
therefore they should have a name starting with _
,
to signal to readers that they are private:
SplitLines
, getVarFileText
, getVarParts
, and all the convert*
functions.
What are the interesting functions for users of the class? The ones that get or set values. Those are the only functions that should be non-private.
Consider the essential data of a class
The class has varFileText
and varFileLines
.
Judging by their names,
both could contain the relevant data parsed from the storage file,
but which one is the canonical source?
After reading the code,
it turns out that self.varFileText
is just a middle man:
the get*
methods use self.varFileLines
as the source,
and self.varFileText
is repeatedly overwritten.
There's no need for self.varFileText
, remove it.
The code will be simpler,
it will be clear where the relevant data is.
Consider the cost of flexibility
The API allows different kinds of comment symbols, in the name of flexibility. Be aware that flexibility can lead to complexity, and to religious wars. The support for flexibility requires more code, more test code, and with that it opens opportunities for more bugs. Some users will prefer one commenting style over another, some will use inconsistent commenting style with or without reason. Style guides will emerge recommending one writing style over another. Religious wars.
Sometimes it's good to have an opinion, and be unburdened by the responsibility of choice. I suggest to choose one commenting style to support. Whichever one.
Use context manager for file operations
The recommended idiom to work with files looks like this:
with open(path) as fh:
...
When the code leaves the with
block,
the file will get closed correctly,
and no need to call fh.close()
.
When not using with
,
you must remember to call fh.close()
,
and also to handle exceptions.
It's a lot easier to just use with
.
Other issues
file = None
is unnecessary, remove it.
This import is not used, remove it: from dataclasses import replace
Don't use empty string to mean "not provided".
It's better to use None
for that.
For example instead of this:
def readVarFile(self, file_location=''): if file_location != '': # if file_location parameter was provided file = open(file_location, "rt") else: # if file_location parameter was NOT provided file = open(self.varFileLocation, "rt") self.varFileText = file.read() file.close() self.varFileLines = self.varFileText.split('\n')
A better way to write would be (including some other suggestions above):
def readVarFile(self, path=None):
if not path:
path = self.varFileLocation
with open(path, "rt") as fh:
self.varFileLines = fh.readlines()
Instead of this:
def getVarFileText(self): # also regenerates 'varFileText' from the list 'varFileLines' self.varFileText = '' for var_line in self.varFileLines: self.varFileText += var_line + '\n' return self.varFileText
Use join
:
def getVarFileText(self):
return '\n'.join(self.varFileLines) + '\n'
Let's see if I understand your criteria correctly:
- "Store and manage data easily" (built-in
json
andpickle
already meet this) - "Human-readable" (
json
andxml
built-ins already meet this) - "Light": guess what's lighter than a custom library? No custom libraries. Using built-ins is lighter than a custom library.
- "fast": you have not shown any evidence that your implementation is faster, so I will very much not believe this until I see it.
- "Supporting comments and good metadata" and "fast" are in conflict, but the former is already met by
xml
if you think it's good and worth using
No, and (pardon me for saying this, but) it never will be, since it's non-standard, and won't ever be as well-tested, well-adopted, well-supported, well-documented and well-optimised as the known alternatives.
As I see it, the only reason for you to pursue this is fun or practice - and both are worthy reasons!
As for the code itself:
Classes should be in TitleCase, as in VarFile
.
Add PEP484 type hints.
Variables should be in lower_snake_case, as in is_key_case_sensitive
.
Don't surround if
predicates in ()
parens: this isn't C/Java/etc.
Protect your open()
calls in context management with
statements, and don't explicitly close()
.
Don't iteratively build a string using concatenation +
. This on its own guarantees that your implementation will perform poorly as it's O(n^2) for most commonly used versions of Python.
Don't raise Exception
. If you want this to be a general-purpose library, you need to derive your own exception types.
mainpulation
is spelled manipulation
.
for i in range(len(self.varFileLines))
fails to "loop like a native": instead you should for line in self.varFileLines
, and rewrite your implementation to avoid mutating varFileLines
during setValueByKey
.
Don't self.isKey_CaseSensitive == False
; use not self.is_key_case_sensitive
instead.
Accepting any of 'true', 't', 'yes', 'y', '1'
for your booleans does not lend itself to a well-designed protocol; you should be more restrictive so that the values are more predictable. Think: this is not a human-computer interface, primarily; it's a computer-computer interface for serialisation that should not concern itself with UI matters like permissive boolean parsing.