Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 02ab593

Browse files
Merge pull request #296 from pyerie/main
Create: Anti-Malware program using ML and RAG boilerplate using langchain
2 parents da39e0c + 9adb41d commit 02ab593

File tree

5 files changed

+988
-0
lines changed

5 files changed

+988
-0
lines changed

‎A/Anti-Malware_application/README.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# A ML-based anti-malware program written in Python #
2+
3+
<p> I first created this project for a competition. I had to learn a lot of new stuff and take inspiration(and a bit of code) from others who had tried similar projects before. This project uses customtkinter and scikit-learn to handle the GUI and ML operations respectively. It utilizes a decision tree classifier to classify files as malware or benign. It can classify a single file or all the files in a folder. It currently only works with executable files(but can be modified to work with other file types). Using the PEfile library, this program extracts information from the executable file(s) to classify them.
4+
5+
6+
Note: It is not perfect and is prone to a lot of false-positives, but I hardly encountered any false-negatives. I think this is due to the overfitting of the model.
7+
8+
### Installation and execution
9+
10+
1) Install the necessary libraries
11+
```
12+
pip3 install customtkinter
13+
pip3 install tkinter
14+
pip3 install pandas
15+
pip3 install sklearn
16+
pip3 install pefile
17+
pip3 install numpy
18+
19+
```
20+
21+
2) Execute the program
22+
```
23+
python3 anti-malware.py
24+
```
25+
26+
Happy learning!
Lines changed: 241 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,241 @@
1+
#################################################################################
2+
### Author: Pyerie #
3+
### Application: A not-so-accurate ML based anti-malware solution #
4+
#################################################################################
5+
6+
print("[+] Loading.... ")
7+
import customtkinter
8+
from tkinter.filedialog import *
9+
from tkinter import *
10+
import pefile
11+
import numpy as np
12+
import pandas as pd
13+
from sklearn.tree import DecisionTreeClassifier
14+
from sklearn.model_selection import train_test_split
15+
from sklearn import metrics
16+
import os
17+
18+
19+
20+
dataset = pd.read_csv('database3.csv')
21+
X = dataset.drop(['legitimate'],axis=1).values
22+
23+
y = dataset['legitimate'].values
24+
25+
26+
27+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)
28+
clf = DecisionTreeClassifier()
29+
30+
31+
y_test = y_test.reshape(-1,1)
32+
for i in range(0, 10):
33+
clf = clf.fit(X_train,y_train)
34+
res1 = clf.predict(X_test)
35+
accuracy = metrics.accuracy_score(y_test, res1)
36+
accuracy = str(accuracy)[2:4] + "%"
37+
print("Accuracy: "+accuracy)
38+
39+
40+
customtkinter.set_appearance_mode("dark")
41+
customtkinter.set_default_color_theme("dark-blue")
42+
43+
44+
window = Tk()
45+
screen_width = window.winfo_screenwidth()
46+
screen_height = window.winfo_screenheight()
47+
window.geometry(str(screen_width)+"x"+str(screen_height))
48+
window.title("eSuraksha")
49+
window['bg'] = "#121212"
50+
def extract_features(file):
51+
features = []
52+
53+
54+
55+
try:
56+
57+
pe_obj = pefile.PE(file, fast_load=True)
58+
except pefile.PEFormatError as error:
59+
print("Not PE file!")
60+
61+
features.append(pe_obj.OPTIONAL_HEADER.DATA_DIRECTORY[6].Size)
62+
features.append(pe_obj.OPTIONAL_HEADER.DATA_DIRECTORY[6].VirtualAddress)
63+
features.append(pe_obj.OPTIONAL_HEADER.MajorImageVersion)
64+
features.append(pe_obj.OPTIONAL_HEADER.MajorOperatingSystemVersion)
65+
features.append(pe_obj.OPTIONAL_HEADER.DATA_DIRECTORY[0].VirtualAddress)
66+
features.append(pe_obj.OPTIONAL_HEADER.DATA_DIRECTORY[0].Size)
67+
try:
68+
features.append(pe_obj.OPTIONAL_HEADER.DATA_DIRECTORY[12].VirtualAddress)
69+
except:
70+
features.append(0)
71+
features.append(pe_obj.OPTIONAL_HEADER.DATA_DIRECTORY[2].Size)
72+
features.append(pe_obj.OPTIONAL_HEADER.MajorLinkerVersion)
73+
features.append(pe_obj.FILE_HEADER.NumberOfSections)
74+
features.append(pe_obj.OPTIONAL_HEADER.SizeOfStackReserve)
75+
features.append(pe_obj.OPTIONAL_HEADER.DllCharacteristics)
76+
features.append(pe_obj.OPTIONAL_HEADER.AddressOfEntryPoint)
77+
features.append(pe_obj.OPTIONAL_HEADER.ImageBase)
78+
79+
80+
81+
82+
83+
84+
return features
85+
86+
toplevel_created = False
87+
88+
toplevel2_created = False
89+
90+
def single_file():
91+
92+
global toplevel_created
93+
global toplevel2_created
94+
global single_file_top
95+
if toplevel_created == "True":
96+
single_file_top.destroy()
97+
toplevel_created = "False"
98+
elif toplevel_created == "False":
99+
pass
100+
101+
if toplevel2_created == "True":
102+
many_files.destroy()
103+
toplevel2_created = "False"
104+
elif toplevel2_created == "False":
105+
pass
106+
107+
single_file_top = Toplevel(window)
108+
single_file_top.geometry("350x200")
109+
customtkinter.set_appearance_mode("dark")
110+
customtkinter.set_default_color_theme("dark-blue")
111+
single_file_top['bg'] = "#121212"
112+
single_file_top.title("Scan a single file")
113+
toplevel_created = "True"
114+
result = customtkinter.CTkLabel(single_file_top, text="Loading...")
115+
result.pack()
116+
117+
file_path = askopenfilename()
118+
try:
119+
features_extracted = extract_features(str(file_path))
120+
not_pe = False
121+
except UnboundLocalError as e:
122+
not_pe = True
123+
result.after(0, result.destroy)
124+
benign_l = customtkinter.CTkLabel(single_file_top, text="Not PE file!")
125+
benign_l.pack()
126+
toplevel2_created = False
127+
128+
if not_pe != True:
129+
data_of_sample = np.array(features_extracted)
130+
data_of_sample = data_of_sample.reshape(1,-1)
131+
132+
133+
prediction = clf.predict(data_of_sample)
134+
135+
136+
if prediction == 1:
137+
result.after(0, result.destroy)
138+
139+
malware_l = customtkinter.CTkLabel(single_file_top, fg_color="red", text="ML model detected malware!")
140+
malware_l.pack()
141+
142+
143+
elif prediction == 0:
144+
result.after(0, result.destroy)
145+
benign_l = customtkinter.CTkLabel(single_file_top, fg_color="green", text="No malware detected!")
146+
benign_l.pack()
147+
148+
149+
def scan_many():
150+
151+
152+
global toplevel2_created
153+
global toplevel_created
154+
global many_files
155+
156+
if toplevel2_created == "True":
157+
many_files.destroy()
158+
toplevel2_created = "False"
159+
elif toplevel2_created == "False":
160+
pass
161+
162+
if toplevel_created == "True":
163+
single_file_top.destroy()
164+
toplevel_created = "False"
165+
elif toplevel_created == "False":
166+
pass
167+
168+
many_files = Toplevel(window)
169+
many_files.geometry("350x200")
170+
customtkinter.set_appearance_mode("dark")
171+
customtkinter.set_default_color_theme("dark-blue")
172+
many_files['bg'] = "#121212"
173+
many_files.title("Scan a directory")
174+
toplevel2_created = "True"
175+
result2 = customtkinter.CTkLabel(many_files, text="Loading...")
176+
result2.pack()
177+
malware_many = []
178+
directory = askdirectory()
179+
global extracted
180+
for root, directory, files in os.walk(str(directory)):
181+
for name_of_file in files:
182+
path = os.path.join(str(root),str(name_of_file))
183+
184+
formats_of_pe = [".acm" , ".ax" , ".cpl" , ".dll" , ".drv" , ".efi" , ".exe" , ".mui" , ".ocx" , ".scr" , ".sys" , ".tsp", ".bin"]
185+
for format_i in formats_of_pe:
186+
if name_of_file.endswith(format_i) == True:
187+
188+
extracted = 1
189+
try:
190+
191+
features_of_many = extract_features(str(path))
192+
except UnboundLocalError as e:
193+
pass
194+
break
195+
196+
else:
197+
extracted = 0
198+
199+
200+
201+
if extracted == 1:
202+
data_for_many = np.array(features_of_many)
203+
data_for_many = data_for_many.reshape(1,-1)
204+
205+
prediction_for_many = clf.predict(data_for_many)
206+
207+
208+
if prediction_for_many == 1:
209+
malware_many.append(str(path))
210+
211+
212+
if len(malware_many) != 0:
213+
result2.after(0, result2.destroy)
214+
malware_label2 = customtkinter.CTkLabel(many_files,text="Malware found: ")
215+
malware_label2.pack()
216+
malware_text_box = customtkinter.CTkTextbox(many_files)
217+
for_text_box = ''
218+
219+
for name_of_malware in malware_many:
220+
for_text_box += "".join([name_of_malware, '\n------------------------------------------'])
221+
222+
223+
224+
malware_text_box.insert('0.0',for_text_box)
225+
malware_text_box.configure(state="disabled")
226+
malware_text_box.pack()
227+
228+
229+
230+
231+
elif len(malware_many) == 0:
232+
result2.after(0, result2.destroy)
233+
benign_label = customtkinter.CTkLabel(many_files,text="No malware found!")
234+
benign_label.pack()
235+
236+
button1 = customtkinter.CTkButton(master=window, command=single_file,text="Scan a single file")
237+
button1.pack()
238+
button2 = customtkinter.CTkButton(master=window, command=scan_many, text="Scan a folder")
239+
button2.pack()
240+
241+
window.mainloop()

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /