I was doing a problem, SPOJ: FLIB. My code was running slow (getting Time Limit Exceeded error) when I was using Code1.
However, my code accepted within the required time limit when I hardcoded the multiplication instead, using Code2. Can someone tell how this was a significant change? Note that this program relies heavily on multiplication, and it is the performed repeatedly.
The question is on matrix exponentiation, and multiplication of two matrices is a very crucial aspect in it.
A[3][3]
and B[3][3]
are two matrices which are multiplied, and whose result is stored in C[3][3]
. M
is an int
with which I am supposed to take the modulus. M = 10^9 + 7
. s
is of type long long
, and I have used to it to reduce the number of times I have to apply the modulus operator.
Each element in these matrices can be of the order of \10ドル^9\,ドル after taking the modulus.
Code1:
for(i=0;i<3;i++)
for(j=0;j<3;j++)
C[i][j] = 0;
for(i=0;i<3;i++)
for(j=0;j<3;j++)
{
s=0;
for(k=0;k<3;k++)
s += ((long long)(A[i][k]))*B[k][j];
s = s%M;
C[i][j] = s;
}
Code2:
C[0][0]= (((long long)A[0][0])*B[0][0] + ((long long)A[0][1])*B[1][0] + ((long long)A[0][2])*B[2][0])%M;
C[0][1]= (((long long)A[0][0])*B[0][1] + ((long long)A[0][1])*B[1][1] + ((long long)A[0][2])*B[2][1])%M;
C[0][2]= (((long long)A[0][0])*B[0][2] + ((long long)A[0][1])*B[1][2] + ((long long)A[0][2])*B[2][2])%M;
C[1][0]= (((long long)A[1][0])*B[0][0] + ((long long)A[1][1])*B[1][0] + ((long long)A[1][2])*B[2][0])%M;
C[1][1]= (((long long)A[1][0])*B[0][1] + ((long long)A[1][1])*B[1][1] + ((long long)A[1][2])*B[2][1])%M;
C[1][2]= (((long long)A[1][0])*B[0][2] + ((long long)A[1][1])*B[1][2] + ((long long)A[1][2])*B[2][2])%M;
C[2][0]= (((long long)A[2][0])*B[0][0] + ((long long)A[2][1])*B[1][0] + ((long long)A[2][2])*B[2][0])%M;
C[2][1]= (((long long)A[2][0])*B[0][1] + ((long long)A[2][1])*B[1][1] + ((long long)A[2][2])*B[2][1])%M;
C[2][2]= (((long long)A[2][0])*B[0][2] + ((long long)A[2][1])*B[1][2] + ((long long)A[2][2])*B[2][2])%M;
This is the complete code:
#include <bits/stdc++.h>
using namespace std;
#define M 1000000007
void matmult(int A[3][3], int B[3][3], int C[3][3])
{
C[0][0]= (((long long)A[0][0])*B[0][0] + ((long long)A[0][1])*B[1][0] + ((long long)A[0][2])*B[2][0])%M;
C[0][1]= (((long long)A[0][0])*B[0][1] + ((long long)A[0][1])*B[1][1] + ((long long)A[0][2])*B[2][1])%M;
C[0][2]= (((long long)A[0][0])*B[0][2] + ((long long)A[0][1])*B[1][2] + ((long long)A[0][2])*B[2][2])%M;
C[1][0]= (((long long)A[1][0])*B[0][0] + ((long long)A[1][1])*B[1][0] + ((long long)A[1][2])*B[2][0])%M;
C[1][1]= (((long long)A[1][0])*B[0][1] + ((long long)A[1][1])*B[1][1] + ((long long)A[1][2])*B[2][1])%M;
C[1][2]= (((long long)A[1][0])*B[0][2] + ((long long)A[1][1])*B[1][2] + ((long long)A[1][2])*B[2][2])%M;
C[2][0]= (((long long)A[2][0])*B[0][0] + ((long long)A[2][1])*B[1][0] + ((long long)A[2][2])*B[2][0])%M;
C[2][1]= (((long long)A[2][0])*B[0][1] + ((long long)A[2][1])*B[1][1] + ((long long)A[2][2])*B[2][1])%M;
C[2][2]= (((long long)A[2][0])*B[0][2] + ((long long)A[2][1])*B[1][2] + ((long long)A[2][2])*B[2][2])%M;
}
void matpow(int Z[3][3], long long n, int A[3][3])
{
int temp[3][3];
int i,j;
A[0][0] = 1;
A[0][1] = 0;
A[0][2] = 0;
A[1][0] = 0;
A[1][1] = 1;
A[1][2] = 0;
A[2][0] = 0;
A[2][1] = 0;
A[2][2] = 1;
while(n>0)
{
if(n&1)
{
matmult(A,Z,temp);
for(i=0;i<3;i++)
for(j=0;j<3;j++)
A[i][j] = temp[i][j];
}
matmult(Z, Z, temp);
for(i=0;i<3;i++)
for(j=0;j<3;j++)
Z[i][j] = temp[i][j];
n/=2;
}
}
int main(int argc, const char * argv[])
{
ios_base::sync_with_stdio(false);
cin.tie(NULL);
int t;
long long n,ans = 0;
int A[3][3];
cin>>t;
while(t--)
{
cin>>n;
int Z[3][3] = {{1,2,1},{0,5,3},{0,3,2}};
if(n>1)
{
matpow(Z,n-1,A);
ans = ((long long)(A[0][0])*2 + (long long)(A[0][1])*5 + (long long)A[0][2]*3)%M;
}
else
{
if(n==0)
ans = 0;
else if(n==1)
ans = 2;
else if(n==2)
ans = 15;
}
cout<<ans<<"\n";
}
return 0;
}
2 Answers 2
You also have this little loop in your original code that is not needed.
for(i=0;i<3;i++)
for(j=0;j<3;j++)
C[i][j] = 0;
Note: It is not needed because you have an explicit assignment to each element.
C[i][j] = s;
So the two pieces of code are not equivalent.
Also you have introduced a bug:
C[2][2]= (((long long)A[2][0])*B[0][2] + ((long long)A[2][1])*B[1][2] + ((long long)A[2][2])*B[2][2])%M;
The operator %
has a higher precedence than +
so your modulus is being applied to the last element only before the addition.
// ie. You have
C[2][2] = T1 + T2 + (T3 % M);
// You want
C[2][2] = (T1 + T2 + T3) % M;
Other things:
// Don't do this.
using namespace std;
// Macros have not type information.
#define M 1000000007
// Prefer to use static const
static long long const M = 1000000007;
You may want to initialize t
int t;
cin>>t; // If the user types in Fred. Then the read will fail
// the value of t is then undefined
while(t--) // This could go for a very long time.
One variable per line pelase.
long long n,ans = 0;
Declare variables as close to the point of use as possible.
cin>>n; // n is defined outside the current scope.
// yet not used anywhere but inside the loop
Some white space between identifiers would be nice.
First did you compile with optimizations enabled? If not then do so when profiling. A good optimizer will produce equivalent code after unrolling the loops.
The first double for loop is superfluous. It is doing busy work that is overwritten in the second loop.
A
,B
,C
,M
, ands
are. Also, what are typical values for each? \$\endgroup\$