Friday, February 9, 2018

using perf for profiling

the sample code test_a.c as below:

#include <stdio.h>
#include <stdint.h>
#include <string.h>

void func3(void)
{
   int count = 0;
   char src[100];
   char dst[100];
   for(count=0; count < 0XFF; count++)
       memcpy(src,dst, sizeof(src));

   return;
}

void func2()
{
   int count = 0;
   int64_t s =1;
   for(count=0; count < 0XFF; count++)
   {
       s =s *(count+1);
       func3();
   }

   return;
}

void func4()
{
   int count = 0;
   int64_t s =1;
   for(count=0; count < 0XFF; count++)
   {
       s =s *(count+1);
       func3();
   }

   return;
}
void func1(void)
{
   int count = 0;
   for(count=0; count < 0XFFFF; count++)
       func2();

   return;
}

int main(void)
{
    printf("\n Hello World! \n");
    func1();
    printf("\n step 2! \n");
    func4();
    return 0;
}

#compiling with:
gcc -Wall  test_a.c -g -o test_a

#install perf on ubuntu 14:
 sudo apt-get install linux-tools-common linux-tools-generic linux-tools-`uname -r`

#run test_a:
./test_a

#find test_a pid as 21033 through
ps aux|grep test_a

sudo perf record -p 21033
#ctrl+c to break

sudo perf report
it will show the profiling result as below:


now you know the bottle neck--- func3()
you can also see real time cpu usage by :
sudo perf top

other option -g
perf record -g -p pid
perf report -g 'graph,0.5,caller'

perf report --max-stack=6 --stdio -s parent

ref: http://rhaas.blogspot.co.uk/2012/06/perf-good-bad-ugly.html

No comments:

Post a Comment