문제
2단계 cache simulator를 구현하여라. LRU와 FIFO 정책을 지원해야 한다. 명령 인수로 초기 조건(set index bits, block offset bits, associativity, verbosity, trace file)을 받아야 한다.
필요한 사전 지식
- Linux Shell 사용법
- Cache 작동 방식
- C 언어의 코딩 협약(convention)
대면 소감
사형 선고와도 같았다. 나는 Linux Shell을 쓸 줄도 몰랐고, Cache 작동 방식도 문제만 적당히 풀 수 있을 정도로 어렴풋이 알고 있었으며, C 언어로는 코테 문제만 좀 끄적여본 게 다였다. 그나마 Skeleton Code를 주셔서 다행이었다.
테스트 방식
- 주어진 trace에 적힌 명령어(예시 : L, 0x10, 1)를 읽어서, 주어진 csim-ref와 같은 결과를 출력하면 된다.
- 출력해야 할 값으로는 L1_Hit, L1_Miss, L1_Eviction, L2 ~~~, Mem_Write 등이 있다.
삽질 과정
Windows를 쓰고 있었기에, 어찌저찌 WSL를 깔았다. 그 후, Linux 전용 컴파일 문구와 ls, cd 같은 간단한 명령어를 익혔다.
테스트를 편하게 하기 위해서 복붙해놓은 명령어들;
정답 simulator (csim-ref)
./csim-ref -v -s 2 -E 2 -b 2 -p 0 -t traces/yi.trace
compile & build
gcc -g -o csim.exe csim.c cachelab.c -lm
검사
./csim.exe -v -s 2 -E 2 -b 2 -p 0 -t traces/yi.trace
./csim.exe -v -s 2 -E 2 -b 2 -p 1 -t traces/yi.trace
cachelab.h
cachelab.c를 위한 헤더 파일이다. arrCount로 결과를 출력해야 한다. 헤더 파일이 두 번 이상 포함되지 않도록 #ifndef로 헤더 가드를 만든 점이 인상깊다. 이외에도 verbosity, policy와 같은 핵심 인수들이 선언되어 있다.
/*
* cachelab.h - Prototypes for Cache Lab helper functions
*/
#ifndef CACHELAB_TOOLS_H
#define CACHELAB_TOOLS_H
#define MAX_TRANS_FUNCS 100
typedef struct trans_func
{
void (*func_ptr)(int M,int N,int[N][M],int[M][N]);
char* description;
char correct;
unsigned int num_hits;
unsigned int num_misses;
unsigned int num_evictions;
} trans_func_t;
typedef enum Type
{
L1_Hit = 0,
L1_Miss,
L1_Eviction,
L2_Hit,
L2_Miss,
L2_Eviction,
L2_Write,
Mem_Write,
TYPECOUNT
} TYPE;
extern int arrCount[TYPECOUNT];
extern int verbosity; /* print trace if set */
extern int cachePolicy; // 0 : lru, 1 : fifo
/*
* printSummary - This function provides a standard way for your cache
* simulator * to display its final hit and miss statistics
*/
void printSummary(); /* number of evictions */
void initCount();
void Verbose(TYPE t);
#endif /* CACHELAB_TOOLS_H */
cachelab.c
printSummary가 있지만, Verbose를 통해 결과를 한눈에 볼 수 있었기에, 딱히 쓰진 않았다(?)
/*
* cachelab.c - Cache Lab helper functions
*/
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "cachelab.h"
#include <time.h>
int arrCount[TYPECOUNT];
int verbosity; /* print trace if set */
int cachePolicy; // 0 : lru, 1 : fifo
void initCount()
{
for(int i = 0; i < TYPECOUNT; i++)
{
arrCount[i] = 0;
}
}
void Verbose(TYPE t)
{
arrCount[t]++;
if(verbosity)
{
switch(t)
{
case L1_Miss:
printf("L1 Miss ");
break;
case L1_Hit:
printf("L1 Hit ");
break;
case L1_Eviction:
printf("L1 Eviction ");
break;
case L2_Miss:
printf("L2 Miss ");
break;
case L2_Hit:
printf("L2 Hit ");
break;
case L2_Eviction:
printf("L2 Eviction ");
break;
case L2_Write:
printf("L2 Write ");
break;
case Mem_Write:
printf("Mem Write ");
break;
default:
break;
}
}
}
/*
* printSummary - Summarize the cache simulation statistics. Student cache simulators
* must call this function in order to be properly autograded.
*/
void printSummary()
{
FILE* output_fp = fopen("./csim_results", "w");
assert(output_fp);
for(int i = 0; i < TYPECOUNT; i++)
{
fprintf(output_fp, "%d ", arrCount[i]);
}
fprintf(output_fp, "\n");
fclose(output_fp);
}
csim.c
이런 Skeleton Code를 볼 때는, main 함수부터 읽어야 한다고 들었다. 대략적인 흐름은; 명령인수 읽기, 캐시들의 기본 스펙 계산, 카운터 초기화, trace 파일 읽기 -> accessData(), freeCache()이다.
매우 화가 나는 점 : getopt.h와 unistd.h를 VSCode/VS에서 둘 다 지원을 안 해준다. 그래서 Linux Shell로 디버깅을 했는데, 고급 IDE(?)들과는 다르게 그냥 Segment Fault를 내고 죽어버리는 꼴을 여러 번 봐야해서 정말 힘들었다.
/*
* csim.c - A cache simulator that can replay traces from Valgrind
* and output statistics such as number of hits, misses, and
* evictions. The replacement policy is LRU.
*
* Implementation and assumptions:
* 1. Each load/store can cause at most one cache miss. (I examined the trace,
* the largest request I saw was for 8 bytes).
* 2. Instruction loads (I) are ignored, since we are interested in evaluating
* trans.c in terms of its data cache performance.
* 3. data modify (M) is treated as a load followed by a store to the same
* address. Hence, an M operation can result in two cache hits, or a miss and a
* hit plus an possible eviction.
*
* The function printSummary() is given to print output.
* Please use this function to print the number of hits, misses and evictions.
* This is crucial for the driver to evaluate your work.
*/
#include <getopt.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <assert.h>
#include <math.h>
#include <limits.h>
#include <string.h>
#include <errno.h>
#include "cachelab.h"
//#define DEBUG_ON
#define ADDRESS_LENGTH 64
extern int arrCount[TYPECOUNT];
extern int verbosity; /* print trace if set */
extern int cachePolicy; // 0 : lru, 1 : fifo
/* Type: Memory address */
typedef unsigned long long int mem_addr_t;
/* Type: Cache line
LRU is a counter used to implement LRU replacement policy */
typedef struct cache_line {
char valid;
mem_addr_t tag;
unsigned long long int lru;
char dirty;
} cache_line_t;
typedef cache_line_t* cache_set_t;
typedef cache_set_t* cache_t;
/* Globals set by command line args */
//int verbosity = 0; /* print trace if set */
int s = 0; /* set index bits */
int s2 = 0; /* set index bits for L2 */
int b = 0; /* block offset bits */
int E = 0; /* associativity */
char* trace_file = NULL;
/* Derived from command line args */
int S; /* number of sets */
int B; /* block size (bytes) */
int S2; /* number of sets in L2 */
int E2; /* associativity of L2 */
/* Counters used to record cache statistics */
unsigned long long int lru_counter = 1;
unsigned long long int l2_lru_counter = 1;
int* l1_fifo = 0;
int* l2_fifo = 0;
/* The cache we are simulating */
cache_t cache, cache_l2;
mem_addr_t set_index_mask, set_index_mask_l2;
/*
* initCache - Allocate memory, write 0's for valid and tag and LRU
* also computes the set_index_mask
*/
void initCache()
{
int i,j;
cache = (cache_set_t*) malloc(sizeof(cache_set_t) * S);
cache_l2 = (cache_set_t*)malloc(sizeof(cache_set_t) * S2);
for (i=0; i<S; i++){
cache[i]=(cache_line_t*) malloc(sizeof(cache_line_t) * E);
for (j=0; j<E; j++){
cache[i][j].valid = 0;
cache[i][j].tag = 0;
cache[i][j].lru = 0;
cache[i][j].dirty = 0;
}
}
for(i = 0; i < S2; i++){
cache_l2[i] = (cache_line_t*) malloc(sizeof(cache_line_t) * E2);
for(j = 0; j < E2; j++){
cache_l2[i][j].valid = 0;
cache_l2[i][j].tag = 0;
cache_l2[i][j].lru = 0;
cache_l2[i][j].dirty = 0;
}
}
/* Computes set index mask */
set_index_mask = (mem_addr_t) (pow(2, s) - 1);
set_index_mask_l2 = (mem_addr_t) (pow(2, s2) - 1);
l1_fifo = (int*)malloc(sizeof(int) * S);
l2_fifo = (int*)malloc(sizeof(int) * S2);
for (i=0; i<S; i++){
l1_fifo[i] = 0;
}
for (i=0; i<S2; i++){
l2_fifo[i] = 0;
}
}
/*
* freeCache - free allocated memory
*/
void freeCache()
{
int i;
for (i=0; i<S; i++){
free(cache[i]);
}
free(cache);
for(i = 0; i < S2; i++){
free(cache_l2[i]);
}
free(cache_l2);
free(l1_fifo);
free(l2_fifo);
}
/*
* accessData - Access data at memory address addr.
* If it is already in cache, increast hit_count
* If it is not in cache, bring it in cache, increase miss count.
* Also increase eviction_count if a line is evicted.
* Process writes if write == 1, process reads if write == 0
*/
/*
Load : write = 0
Save, Modify : write = 1
*/
void accessData(mem_addr_t addr, int write)
{
mem_addr_t set_index = (addr >> b) & set_index_mask;
mem_addr_t tag = addr >> (s + b);
mem_addr_t set_index_l2 = (addr >> b) & set_index_mask_l2;
mem_addr_t tag_l2 = addr >> (s2 + b);
// check L1 hit
for (int i = 0; i < E; i++)
{
if (cache[set_index][i].valid && cache[set_index][i].tag == tag)
{
arrCount[L1_Hit]++;
cache[set_index][i].lru = 0; // Reset LRU
return; // L1 hit -> return;
}
}
// L1 miss
arrCount[L1_Miss]++;
// check L2 hit
for (int i = 0; i < E2; i++)
{
if (cache[set_index_l2][i].valid && cache[set_index_l2][i].tag == tag_l2)
{
arrCount[L2_Hit]++;
cache[set_index_l2][i].lru = 0; // Reset LRU
// L1 write-back (L1 miss, L2 hit)
int empty_index = -1; // for L1
for (int j = 0; j < E; j++)
if (!cache[set_index][j].valid)
empty_index = j;
if (empty_index != -1) // if L1 has vacant space
{
cache[set_index][empty_index].valid = 1;
cache[set_index][empty_index].tag = tag_l2;
cache[set_index][empty_index].lru = 0;
}
else // do L1 eviction
{
arrCount[L1_Eviction]++;
int evicted_index = -1;
if (cachePolicy == 0) // LRU
{
int max_lru = -1;
for (int j = 0; j < E; j++)
{
if (cache[set_index][i].lru > max_lru)
{
max_lru = cache[set_index][i].lru;
evicted_index = i;
}
}
}
else // FIFO
{
// ...
}
printf("%d", empty_index); // =========================================================
cache[set_index][empty_index].valid = 1;
cache[set_index][empty_index].tag = tag_l2;
cache[set_index][empty_index].lru = 0;
}
return; // L1 miss, L2 hit -> L1 write-back, return;
}
}
// L2 miss
arrCount[L2_Miss]++;
// Load from RAM =============================================================================================================================================
// L2 write-back
int empty_index_l2 = -1; // for L2
for (int i = 0; i < E2; i++)
if (!cache_l2[set_index_l2][i].valid)
empty_index_l2 = i;
if (empty_index_l2 != -1) // if L2 has vacant space
{
cache_l2[set_index_l2][empty_index_l2].valid = 1;
cache_l2[set_index_l2][empty_index_l2].tag = tag_l2;
cache_l2[set_index_l2][empty_index_l2].lru = 0;
}
else // do L2 eviction
{
arrCount[L2_Eviction]++;
int evicted_index_l2 = -1;
if (cachePolicy == 0) // LRU
{
int max_lru = -1;
for (int i = 0; i < E2; i++)
{
if (cache_l2[set_index_l2][i].lru > max_lru)
{
max_lru = cache_l2[set_index_l2][i].lru;
evicted_index_l2 = i;
}
}
}
else // FIFO
{
// ...
}
printf("%d", empty_index_l2); // =========================================================
cache_l2[set_index_l2][empty_index_l2].valid = 1;
cache_l2[set_index_l2][empty_index_l2].tag = tag_l2;
cache_l2[set_index_l2][empty_index_l2].lru = 0;
}
// L1 write-back
int empty_index = -1; // for L1
for (int i = 0; i < E; i++)
if (!cache[set_index][i].valid)
empty_index = i;
if (empty_index != -1) // if L1 has vacant space
{
cache[set_index][empty_index].valid = 1;
cache[set_index][empty_index].tag = tag;
cache[set_index][empty_index].lru = 0;
}
else // do L1 eviction
{
arrCount[L1_Eviction]++;
int evicted_index = -1;
if (cachePolicy == 0) // LRU
{
int max_lru = -1;
for (int i = 0; i < E; i++)
{
if (cache[set_index][i].lru > max_lru)
{
max_lru = cache[set_index][i].lru;
evicted_index = i;
}
}
}
else // FIFO
{
// ...
}
printf("%d", empty_index); // =========================================================
cache[set_index][empty_index].valid = 1;
cache[set_index][empty_index].tag = tag;
cache[set_index][empty_index].lru = 0;
}
return; // L1 miss, L2 miss, RAM hit -> L1, L2 write-back, return;
// ===========================================================================================================================================================
}
/*
* replayTrace - replays the given trace file against the cache
*/
void replayTrace(char* trace_fn)
{
char buf[1000];
mem_addr_t addr=0;
unsigned int len=0;
FILE* trace_fp = fopen(trace_fn, "r");
if(!trace_fp){
fprintf(stderr, "%s: %s\n", trace_fn, strerror(errno));
exit(1);
}
while( fgets(buf, 1000, trace_fp) != NULL) {
if(buf[1]=='S' || buf[1]=='L' || buf[1]=='M') {
sscanf(buf+3, "%llx,%u", &addr, &len);
if(verbosity)
printf("%c 0x%llx,%u ", buf[1], addr, len);
/* If the instruction contains R */
if(buf[1] == 'L' || buf[1] == 'M'){
accessData(addr, 0);
}
/* If the instruction contains W */
if(buf[1]=='M' || buf[1] == 'S')
accessData(addr, 1);
if (verbosity)
printf("\n");
}
}
fclose(trace_fp);
}
/*
* printUsage - Print usage info
*/
void printUsage(char* argv[])
{
printf("Usage: %s [-hv] -s <num> -E <num> -b <num> -p <num> -t <file>\n", argv[0]);
printf("Options:\n");
printf(" -h Print this help message.\n");
printf(" -v Optional verbose flag.\n");
printf(" -s <num> Number of set index bits.\n");
printf(" -E <num> Number of lines per set.\n");
printf(" -b <num> Number of block offset bits.\n");
printf(" -p <num> Select Cache Policy, 0 : lru, 1 : fifo.\n");
printf(" -t <file> Trace file.\n");
printf("\nExamples:\n");
printf(" linux> %s -s 4 -E 1 -b 4 -t traces/yi.trace\n", argv[0]);
printf(" linux> %s -v -s 8 -E 2 -b 4 -t traces/yi.trace\n", argv[0]);
exit(0);
}
/*
* main - Main routine
*/
int main(int argc, char* argv[])
{
char c;
verbosity = 0;
cachePolicy = 0;
while( (c=getopt(argc,argv,"s:E:b:t:p:vh")) != -1){
switch(c){
case 's':
s = atoi(optarg);
break;
case 'E':
E = atoi(optarg);
break;
case 'b':
b = atoi(optarg);
break;
case 't':
trace_file = optarg;
break;
case 'v':
verbosity = 1;
break;
case 'p':
cachePolicy = atoi(optarg);
break;
case 'h':
printUsage(argv);
exit(0);
default:
printUsage(argv);
exit(1);
}
}
/* Make sure that all required command line args were specified */
if (s == 0 || E == 0 || b == 0 || trace_file == NULL || !(cachePolicy == 0 || cachePolicy == 1)) {
printf("%s: Missing required command line argument\n", argv[0]);
printUsage(argv);
exit(1);
}
s2 = s + 1;
/* Compute S, E and B from command line args */
S = (unsigned int) pow(2, s);
B = (unsigned int) pow(2, b);
/* Compute S2, E2, and B2 from command line args */
S2 = (unsigned int) pow(2, s2);
E2 = (unsigned int) E * 4;
/* Initialize cache */
initCache();
initCount();
#ifdef DEBUG_ON
printf("DEBUG: S:%u E:%u B:%u trace:%s\n", S, E, B, trace_file);
printf("DEBUG: set_index_mask: %llu\n", set_index_mask);
#endif
replayTrace(trace_file);
/* Free allocated memory */
freeCache();
printf("level\t\thits\t\tmisses\t\tevictions\t\t\n");
printf("L1\t\t%d\t\t%d\t\t%d\t\t\n", arrCount[L1_Hit], arrCount[L1_Miss], arrCount[L1_Eviction]);
printf("L2\t\t%d\t\t%d\t\t%d\t\t\n", arrCount[L2_Hit], arrCount[L2_Miss], arrCount[L2_Eviction]);
printf("writes to L2: %d, writes to memory: %d\n", arrCount[L2_Write], arrCount[Mem_Write]);
//printSummary();
return 0;
}
제가 이 과제를 끝마칠 때까지 이 포스트를 붙잡고 있어야 할 것 같습니다...
살려주세요...