Function to calculate the GC content variation in a sequence

Question 1

I came across this BMC Genomics paper: Analysis of intra-genomic GC content homogeneity within prokaryotes

And I implemented some Python functions to make this available as part of a personal project. The functions are these:

import numpy as np
from collections import defaultdict
def get_sequence_chunks(sequence, window, step, as_overlap=False):
 """GC Content in a DNA/RNA sub-sequence length k. In
 overlap windows of length k, other wise non overlapping.
 
 Inputs:
 sequence - a string representing a DNA sequence. 
 as_overlap - boolean that represents if overlap is needed.
 k - a integer representing the lengths of overlapping bases.
 Default is 20.
 
 Outputs:
 subseq - a string representing a slice of the sequence with or without
 overlapping characters.
 """
 # make sequence upper case and getting the length of it
 sequence, seq_len = sequence.upper(), len(sequence)
 # non overlap sequence length
 non_overlap = range(0, seq_len - window + 1, step)
 # overlap sequence length
 overlap = range(0, seq_len - window + 1)
 # overlap is needed
 if as_overlap:
 # iterates to the overlap region
 for i in overlap:
 # creates the substring 
 subseq = sequence[i:i + window]
 yield subseq
 # if non overlap is choosed
 else:
 # iterates to the non overlap region
 for j in non_overlap:
 # creates the sub-string to count the gc_content
 subseq = sequence[j:j + window]
 yield subseq
def gc_content(seq, percent=True):
 """
 Calculate G+C content, return percentage (as float between 0 and 100).
 Copes mixed case sequences, and with the ambiguous nucleotide S (G or C)
 when counting the G and C content. The percentage is calculated against
 the full length.
 
 Inputs:
 sequence - a string representing a sequence (DNA/RNA/Protein)
 
 Outputs:
 gc - a float number or a percent value representing the gc content of the sequence. 
 
 """
 seq_len = len(seq) 
 gc = sum(seq.count(x) for x in ["G", "C", "g", "c", "S", "s"])
 if percent:
 try:
 return gc * 100.0 / seq_len
 except ZeroDivisionError:
 return 0.0 
 else:
 return gc / seq_len 
def get_gc_content_slide_window(sequence, window, step):
 """
 Calculate teh GC (G+C) content along a sequence. Returns a dictionary -like
 object mapping the sequence window to the gc value (floats).
 Returns 0 for windows without any G/C by handling zero division errors, and 
 does NOT look at any ambiguous nucleotides.
 
 Inputs:
 sequence - a string representing a sequence (DNA/RNA/Protein)
 window_size - a integer representing the length of sequence to be analyzed
 at each step in the full sequence.
 step - a integer representing the length of oerlapping sequence allowed.
 
 Outputs:
 gc- a dictionary-like object mapping the window size sequence to it gc values 
 as floating numbers
 """
 gc = defaultdict(float)
 for s in get_sequence_chunks(sequence, window, step, as_overlap=False):
 gc[s] = gc.get(s, 0.0) + gc_content(s)
 return gc 
def difference_gc(mean_gc, gc_dict):
 """
 Calculates the difference between the mean GC content of window i, and the
 mean chromosomal GC content, as Di = GC i − GC
 """
 # get the keys and default values in the dictionary
 d_i = defaultdict(float, [(k, 0.0) for k in gc_dict.keys()])
 # iterate through all keys, gc calculated values for the
 # genome chunk
 for chunk, gc_cnt in gc_dict.items():
 # get the difference between the chromosomal mean and the chunks
 dif = gc_cnt - mean_gc
 # add the difference to the appropriate chunk
 d_i[chunk] = dif
 return d_i
def get_chromosomal_gc_variantion(difference_dic):
 """
 Calculates the chromosomal GC variation defined as the log-transformed average 
 of the absolute value of the difference between the mean GC content of each 
 non-overlapping sliding window i and mean chromosomal GC content. 
 chromosomal_gc_variantion = log(1/N*sum(|Di|), where N is the maximum number of 
 non-overlapping window size in bp in a sliding windows.
 """
 # get the number of chunks
 n = len(difference_dic.keys())
 arr = np.array(list(difference_dic.values()))
 var = np.log((1/n) * sum(abs(arr)))
 return var

I worked with e. coli genome: Escherichia coli strain RM13322 chromosome, complete genome

gc = gc_content(ec, percent=True) = 50.76819424506625

It is similar to found here: Genome Assembly and Annotation report (26286) ~ 50.73.

The slide window gave it non overlapping results like, where ec = escherichia coli genome, 100 size window, and step = 100:

get_gc_content_slide_window(ec, 100, 100)
defaultdict(float,
 {'GTTGGCATGCCATGGTGATGTTTGTTAGGAAAGCAAAGATGGCAAAACTGCTGGGGGTTTTGTGGTTGAGTATGCCAATATAATTAATAGATTAAAGAGT': 39.0,
 'TAGTTGTGAAGAAAATATGGATAAACAGGACGACGAATGCTTTCACCGATAAGGACAACTTTCCATAACTCAGTAAATATAGTGCAGAGTTCACCCTGTC': 39.0,
 'AAACGGTTTCTTTTGCAGGAAAGGAATATGAGTTAAAGGTCATTGATGAAAAAACGCCTATTCTTTTTCAGTGGTTTGAACCTAATCCTGAACGATATAA': 34.0,
 'GAAAGATGAGGTTCCAATAGTTAATACTAAGCAGCATCCCTATTTAGATAATGTCACAAATGCGGCAAGGATAGAGAGTGATCGTATGATAGGTATTTTT': 36.0,
 'GTTGATGGCGATTTTTCAGTCAACCAAAAGACTGCTTTTTCAAAATTGGAACGAGATTTTGAAAATGTAATGATAATCTATCGGGAAGATGTTGACTTCA': 34.0,
 'GTATGTATGACAGAAAACTATCAGATATTTATCATGATATTATATGTGAACAAAGGTTACGAACTGAAGACAAAAGAGATGAATACTTGTTGAATCTGTT': 29.0,...}

The difference function (slide window gc content - gc content all sequence), where ec_gc = gc content of e.coli genome and ec_non = gc content in each subsequence of size 100 non overlapped:

difference_gc(ec_gc, ec_non)
defaultdict(float,
 {'GTTGGCATGCCATGGTGATGTTTGTTAGGAAAGCAAAGATGGCAAAACTGCTGGGGGTTTTGTGGTTGAGTATGCCAATATAATTAATAGATTAAAGAGT': -11.768194245066248,
 'TAGTTGTGAAGAAAATATGGATAAACAGGACGACGAATGCTTTCACCGATAAGGACAACTTTCCATAACTCAGTAAATATAGTGCAGAGTTCACCCTGTC': -11.768194245066248,
 'AAACGGTTTCTTTTGCAGGAAAGGAATATGAGTTAAAGGTCATTGATGAAAAAACGCCTATTCTTTTTCAGTGGTTTGAACCTAATCCTGAACGATATAA': -16.768194245066248,
 'GAAAGATGAGGTTCCAATAGTTAATACTAAGCAGCATCCCTATTTAGATAATGTCACAAATGCGGCAAGGATAGAGAGTGATCGTATGATAGGTATTTTT': -14.768194245066248,
 'GTTGATGGCGATTTTTCAGTCAACCAAAAGACTGCTTTTTCAAAATTGGAACGAGATTTTGAAAATGTAATGATAATCTATCGGGAAGATGTTGACTTCA': -16.768194245066248,...}

And the (ec_dif = difference on gc content from total sequence and from subsequences):

get_chromosomal_gc_variantion(ec_dif)
1.7809591767493207

My principal concern It is to know if this function get_chromosomal_gc_variantion(ec_dif) was implemented as in the paper above (GCVAR):

journal excerpt

As suggested, a toy example:

s="TAGTTGTGAAGAAAATATGGATAAACAGGACGACGAATGCTTTCACCGATAAGGACAACTTTCCATAACTCAGTAAATATAGTGCAGAGTTCACCCTGTC"
seq_gc_non_overllap = get_gc_content_slide_window(s, 20, 20)
seq_gc_non_overllap
defaultdict(float,
 {'TAGTTGTGAAGAAAATATGG': 30.0,
 'ATAAACAGGACGACGAATGC': 45.0,
 'TTTCACCGATAAGGACAACT': 40.0,
 'TTCCATAACTCAGTAAATAT': 25.0,
 'AGTGCAGAGTTCACCCTGTC': 55.0})
seq_gc_total = gc_content(s, percent=True)
seq_gc_total
39.0
seq_dif_gctot_gc_slidewindow = difference_gc(seq_gc_total, seq_gc_non_overllap)
defaultdict(float,
 {'TAGTTGTGAAGAAAATATGG': -9.0,
 'ATAAACAGGACGACGAATGC': 6.0,
 'TTTCACCGATAAGGACAACT': 1.0,
 'TTCCATAACTCAGTAAATAT': -14.0,
 'AGTGCAGAGTTCACCCTGTC': 16.0})
chromosome_gc_variation = get_chromosomal_gc_variantion(seq_dif_gctot, gc_slidewindow)
chromosome_gc_variation
2.2192034840549946

Any tip to improve the code would be very welcome, once I only have been working with python for a couple of years and I am not very skilled, and much less with numpy.

Question 2

What are ec, ec_gc, ec_non, ec_dif?

Question 3

It's good for you to have described them, but it would be more helpful to show the code that loads them, and perhaps a reduced-size sample for your reviewers to be able to run your code.

Question 4

@PauloSergioSchlogl Could you also link your project once you've completed/published it?

Question 5

Quite interesting!

You will benefit from introducing PEP484 type hints.

Your docstring for get_sequence_chunks is way out of whack; you'll need to revisit your parameter names.

There isn't a whole lot of benefit from tuple-combination assignment for your sequence and seq_len so I suggest just doing them separately.

Your overlap logic can be greatly simplified: notice that the only difference between the two code paths is the step size.

Minor spelling issues such as other wise that should be one word, overllap -> overlap, and variantion -> variation.

Comments such as

# make sequence upper case and getting the length of it

are less helpful than not having a comment at all. Note the difference between that and

# get the difference between the chromosomal mean and the chunks

which has content that can't necessarily be inferred from the code alone (though even this will be redundant with a sufficiently descriptive variable name).

This comprehension:

gc = sum(seq.count(x) for x in ["G", "C", "g", "c", "S", "s"])

will iterate through the entire seq string for every potential character. It's likely that refactoring this to use set membership checks and one single iteration through the string will be more efficient.

This code to prevent division-by-zero:

seq_len = len(seq) 
gc = sum(seq.count(x) for x in ["G", "C", "g", "c", "S", "s"])
if percent:
 try:
 return gc * 100.0 / seq_len
 except ZeroDivisionError:
 return 0.0 
else:
 return gc / seq_len

first of all can trivially avoid logic-by-exception by checking if the sequence length is zero; and also should extend that check to the case where a non-percent figure is returned.

In difference_gc the initial defaultdict initialization is not needed, since you overwrite every single key. You can replace the whole function with one dictionary comprehension.

Try to avoid abbreviating words like count to cnt.

The len of a dict's keys is equal to the len of a dict itself, so you can drop the .keys() in len(difference_dic.keys()).

You can avoid the construction of a list here:

arr = np.array(list(difference_dic.values()))

by using fromiter instead.

Your example is kind of broken. get_chromosomal_gc_variation only accepts one parameter.

Consider reworking your example into a basic unit test.

Suggested

import math
from typing import Iterator, DefaultDict, Dict
import numpy as np
from collections import defaultdict
def get_sequence_chunks(
 sequence: str,
 window: int,
 step: int,
 as_overlap: bool = False,
) -> Iterator[str]:
 """GC Content in a DNA/RNA sub-sequence length k. In
 overlap windows of length k, other wise non overlapping.
 Inputs:
 sequence - a string representing a DNA sequence.
 as_overlap - boolean that represents if overlap is needed.
 k - a integer representing the lengths of overlapping bases.
 Default is 20.
 Outputs:
 subseq - a string representing a slice of the sequence with or without
 overlapping characters.
 """
 sequence = sequence.upper()
 if as_overlap:
 # overlap sequence length
 step = 1
 # iterates to the overlap region
 for i in range(0, len(sequence) - window + 1, step):
 # creates the substring
 yield sequence[i:i + window]
def gc_content(seq: str, percent: bool = True) -> float:
 """
 Calculate G+C content, return percentage (as float between 0 and 100).
 Copes mixed case sequences, and with the ambiguous nucleotide S (G or C)
 when counting the G and C content. The percentage is calculated against
 the full length.
 Inputs:
 sequence - a string representing a sequence (DNA/RNA/Protein)
 Outputs:
 gc - a float number or a percent value representing the gc content of the sequence.
 """
 seq_len = len(seq)
 gc = sum(1 for x in seq.upper() if x in {"G", "C", "S"})
 if seq_len == 0:
 return 0
 gc /= seq_len
 if percent:
 return gc * 100
 return gc
def get_gc_content_slide_window(sequence: str, window: int, step: int) -> DefaultDict[str, float]:
 """
 Calculate the GC (G+C) content along a sequence. Returns a dictionary-like
 object mapping the sequence window to the gc value (floats).
 Returns 0 for windows without any G/C by handling zero division errors, and
 does NOT look at any ambiguous nucleotides.
 Inputs:
 sequence - a string representing a sequence (DNA/RNA/Protein)
 window_size - a integer representing the length of sequence to be analyzed
 at each step in the full sequence.
 step - a integer representing the length of oerlapping sequence allowed.
 Outputs:
 gc- a dictionary-like object mapping the window size sequence to it gc values
 as floating numbers
 """
 gc = defaultdict(float)
 for s in get_sequence_chunks(sequence, window, step, as_overlap=False):
 gc[s] += gc_content(s)
 return gc
def difference_gc(mean_gc: float, gc_dict: Dict[str, float]) -> Dict[str, float]:
 """
 Calculates the difference between the mean GC content of window i, and the
 mean chromosomal GC content, as Di = GC i − GC
 """
 # iterate through all keys, gc calculated values for the
 # genome chunk
 # get the difference between the chromosomal mean and the chunks
 # add the difference to the appropriate chunk
 d_i = {
 chunk: gc_cnt - mean_gc
 for chunk, gc_cnt in gc_dict.items()
 }
 return d_i
def get_chromosomal_gc_variation(difference_dic: Dict[str, float]) -> float:
 """
 Calculates the chromosomal GC variation defined as the log-transformed average
 of the absolute value of the difference between the mean GC content of each
 non-overlapping sliding window i and mean chromosomal GC content.
 chromosomal_gc_variantion = log(1/N*sum(|Di|), where N is the maximum number of
 non-overlapping window size in bp in a sliding windows.
 """
 # get the number of chunks
 n = len(difference_dic)
 arr = np.fromiter(
 difference_dic.values(),
 dtype=np.float32,
 count=len(difference_dic),
 )
 var = np.log(np.sum(np.abs(arr)) / n)
 return var
def test() -> None:
 s = (
 "TAGTTGTGAAGAAAATATGGATAAACAGGACGACGAATGCTTTCACCGAT"
 "AAGGACAACTTTCCATAACTCAGTAAATATAGTGCAGAGTTCACCCTGTC"
 )
 seq_gc_non_overlap = get_gc_content_slide_window(s, 20, 20)
 expected = {
 'TAGTTGTGAAGAAAATATGG': 30,
 'ATAAACAGGACGACGAATGC': 45,
 'TTTCACCGATAAGGACAACT': 40,
 'TTCCATAACTCAGTAAATAT': 25,
 'AGTGCAGAGTTCACCCTGTC': 55,
 }
 assert expected.keys() == seq_gc_non_overlap.keys()
 for k, v in expected.items():
 assert math.isclose(seq_gc_non_overlap[k], v)
 seq_gc_total = gc_content(s, percent=True)
 assert math.isclose(seq_gc_total, 39)
 seq_dif_gctot_gc_slidewindow = difference_gc(seq_gc_total, seq_gc_non_overlap)
 expected = {
 'TAGTTGTGAAGAAAATATGG': -9,
 'ATAAACAGGACGACGAATGC': 6,
 'TTTCACCGATAAGGACAACT': 1,
 'TTCCATAACTCAGTAAATAT': -14,
 'AGTGCAGAGTTCACCCTGTC': 16,
 }
 assert expected.keys() == seq_dif_gctot_gc_slidewindow.keys()
 for k, v in expected.items():
 assert math.isclose(seq_dif_gctot_gc_slidewindow[k], v)
 chromosome_gc_variation = get_chromosomal_gc_variation(seq_dif_gctot_gc_slidewindow)
 assert math.isclose(chromosome_gc_variation, 2.2192034840549946)
if __name__ == '__main__':
 test()

Question 6

thank you for your suggestions! I will check it out and sorry because English it is not my native language...some times I do a lot of mistakes. But for me it is much important to check if the statistics function chromosome_gc_variation is doing what it is need to do. But, Is always excellent to learn to write a good/efficient and beautiful code. Thank you for your time and tips. They are awesome. Paulo

Reinderien Reinderien 70.9k5 gold badges76 silver badges256 bronze badges · Accepted Answer · 2021-11-07 03:39:48Z

Quite interesting!

You will benefit from introducing PEP484 type hints.

Your docstring for get_sequence_chunks is way out of whack; you'll need to revisit your parameter names.

There isn't a whole lot of benefit from tuple-combination assignment for your sequence and seq_len so I suggest just doing them separately.

Your overlap logic can be greatly simplified: notice that the only difference between the two code paths is the step size.

Minor spelling issues such as other wise that should be one word, overllap -> overlap, and variantion -> variation.

Comments such as

# make sequence upper case and getting the length of it

are less helpful than not having a comment at all. Note the difference between that and

# get the difference between the chromosomal mean and the chunks

which has content that can't necessarily be inferred from the code alone (though even this will be redundant with a sufficiently descriptive variable name).

This comprehension:

gc = sum(seq.count(x) for x in ["G", "C", "g", "c", "S", "s"])

will iterate through the entire seq string for every potential character. It's likely that refactoring this to use set membership checks and one single iteration through the string will be more efficient.

This code to prevent division-by-zero:

seq_len = len(seq) 
gc = sum(seq.count(x) for x in ["G", "C", "g", "c", "S", "s"])
if percent:
 try:
 return gc * 100.0 / seq_len
 except ZeroDivisionError:
 return 0.0 
else:
 return gc / seq_len

first of all can trivially avoid logic-by-exception by checking if the sequence length is zero; and also should extend that check to the case where a non-percent figure is returned.

In difference_gc the initial defaultdict initialization is not needed, since you overwrite every single key. You can replace the whole function with one dictionary comprehension.

Try to avoid abbreviating words like count to cnt.

The len of a dict's keys is equal to the len of a dict itself, so you can drop the .keys() in len(difference_dic.keys()).

You can avoid the construction of a list here:

arr = np.array(list(difference_dic.values()))

by using fromiter instead.

Your example is kind of broken. get_chromosomal_gc_variation only accepts one parameter.

Consider reworking your example into a basic unit test.

Suggested

import math
from typing import Iterator, DefaultDict, Dict
import numpy as np
from collections import defaultdict
def get_sequence_chunks(
 sequence: str,
 window: int,
 step: int,
 as_overlap: bool = False,
) -> Iterator[str]:
 """GC Content in a DNA/RNA sub-sequence length k. In
 overlap windows of length k, other wise non overlapping.
 Inputs:
 sequence - a string representing a DNA sequence.
 as_overlap - boolean that represents if overlap is needed.
 k - a integer representing the lengths of overlapping bases.
 Default is 20.
 Outputs:
 subseq - a string representing a slice of the sequence with or without
 overlapping characters.
 """
 sequence = sequence.upper()
 if as_overlap:
 # overlap sequence length
 step = 1
 # iterates to the overlap region
 for i in range(0, len(sequence) - window + 1, step):
 # creates the substring
 yield sequence[i:i + window]
def gc_content(seq: str, percent: bool = True) -> float:
 """
 Calculate G+C content, return percentage (as float between 0 and 100).
 Copes mixed case sequences, and with the ambiguous nucleotide S (G or C)
 when counting the G and C content. The percentage is calculated against
 the full length.
 Inputs:
 sequence - a string representing a sequence (DNA/RNA/Protein)
 Outputs:
 gc - a float number or a percent value representing the gc content of the sequence.
 """
 seq_len = len(seq)
 gc = sum(1 for x in seq.upper() if x in {"G", "C", "S"})
 if seq_len == 0:
 return 0
 gc /= seq_len
 if percent:
 return gc * 100
 return gc
def get_gc_content_slide_window(sequence: str, window: int, step: int) -> DefaultDict[str, float]:
 """
 Calculate the GC (G+C) content along a sequence. Returns a dictionary-like
 object mapping the sequence window to the gc value (floats).
 Returns 0 for windows without any G/C by handling zero division errors, and
 does NOT look at any ambiguous nucleotides.
 Inputs:
 sequence - a string representing a sequence (DNA/RNA/Protein)
 window_size - a integer representing the length of sequence to be analyzed
 at each step in the full sequence.
 step - a integer representing the length of oerlapping sequence allowed.
 Outputs:
 gc- a dictionary-like object mapping the window size sequence to it gc values
 as floating numbers
 """
 gc = defaultdict(float)
 for s in get_sequence_chunks(sequence, window, step, as_overlap=False):
 gc[s] += gc_content(s)
 return gc
def difference_gc(mean_gc: float, gc_dict: Dict[str, float]) -> Dict[str, float]:
 """
 Calculates the difference between the mean GC content of window i, and the
 mean chromosomal GC content, as Di = GC i − GC
 """
 # iterate through all keys, gc calculated values for the
 # genome chunk
 # get the difference between the chromosomal mean and the chunks
 # add the difference to the appropriate chunk
 d_i = {
 chunk: gc_cnt - mean_gc
 for chunk, gc_cnt in gc_dict.items()
 }
 return d_i
def get_chromosomal_gc_variation(difference_dic: Dict[str, float]) -> float:
 """
 Calculates the chromosomal GC variation defined as the log-transformed average
 of the absolute value of the difference between the mean GC content of each
 non-overlapping sliding window i and mean chromosomal GC content.
 chromosomal_gc_variantion = log(1/N*sum(|Di|), where N is the maximum number of
 non-overlapping window size in bp in a sliding windows.
 """
 # get the number of chunks
 n = len(difference_dic)
 arr = np.fromiter(
 difference_dic.values(),
 dtype=np.float32,
 count=len(difference_dic),
 )
 var = np.log(np.sum(np.abs(arr)) / n)
 return var
def test() -> None:
 s = (
 "TAGTTGTGAAGAAAATATGGATAAACAGGACGACGAATGCTTTCACCGAT"
 "AAGGACAACTTTCCATAACTCAGTAAATATAGTGCAGAGTTCACCCTGTC"
 )
 seq_gc_non_overlap = get_gc_content_slide_window(s, 20, 20)
 expected = {
 'TAGTTGTGAAGAAAATATGG': 30,
 'ATAAACAGGACGACGAATGC': 45,
 'TTTCACCGATAAGGACAACT': 40,
 'TTCCATAACTCAGTAAATAT': 25,
 'AGTGCAGAGTTCACCCTGTC': 55,
 }
 assert expected.keys() == seq_gc_non_overlap.keys()
 for k, v in expected.items():
 assert math.isclose(seq_gc_non_overlap[k], v)
 seq_gc_total = gc_content(s, percent=True)
 assert math.isclose(seq_gc_total, 39)
 seq_dif_gctot_gc_slidewindow = difference_gc(seq_gc_total, seq_gc_non_overlap)
 expected = {
 'TAGTTGTGAAGAAAATATGG': -9,
 'ATAAACAGGACGACGAATGC': 6,
 'TTTCACCGATAAGGACAACT': 1,
 'TTCCATAACTCAGTAAATAT': -14,
 'AGTGCAGAGTTCACCCTGTC': 16,
 }
 assert expected.keys() == seq_dif_gctot_gc_slidewindow.keys()
 for k, v in expected.items():
 assert math.isclose(seq_dif_gctot_gc_slidewindow[k], v)
 chromosome_gc_variation = get_chromosomal_gc_variation(seq_dif_gctot_gc_slidewindow)
 assert math.isclose(chromosome_gc_variation, 2.2192034840549946)
if __name__ == '__main__':
 test()

thank you for your suggestions! I will check it out and sorry because English it is not my native language...some times I do a lot of mistakes. But for me it is much important to check if the statistics function chromosome_gc_variation is doing what it is need to do. But, Is always excellent to learn to write a good/efficient and beautiful code. Thank you for your time and tips. They are awesome. Paulo

Stack Exchange Network

Function to calculate the GC content variation in a sequence

1 Answer 1

Suggested

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Function to calculate the GC content variation in a sequence

1 Answer 1

Suggested

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions