/* SPDX-License-Identifier: GPL-2.0-or-later */ #include #include #include #include #include #include #include "x4x.h" #include "iomap.h" static void print_dll_setting(const struct dll_setting *dll_setting, u8 default_verbose) { u8 debug_level = default_verbose ? BIOS_DEBUG : RAM_DEBUG; printk(debug_level, "%d.%d.%d.%d:%d.%d\n", dll_setting->coarse, dll_setting->clk_delay, dll_setting->tap, dll_setting->pi, dll_setting->db_en, dll_setting->db_sel); } struct db_limit { u8 tap0; u8 tap1; u8 pi0; u8 pi1; }; static void set_db(const struct sysinfo *s, struct dll_setting *dq_dqs_setting) { struct db_limit limit; switch (s->selected_timings.mem_clk) { default: case MEM_CLOCK_800MHz: limit.tap0 = 3; limit.tap1 = 10; limit.pi0 = 2; limit.pi1 = 3; break; case MEM_CLOCK_1066MHz: limit.tap0 = 2; limit.tap1 = 8; limit.pi0 = 6; limit.pi1 = 7; break; case MEM_CLOCK_1333MHz: limit.tap0 = 3; limit.tap1 = 11; /* TO CHECK: Might be reverse since this makes little sense */ limit.pi0 = 6; limit.pi1 = 4; break; } if (dq_dqs_setting->tap < limit.tap0) { dq_dqs_setting->db_en = 1; dq_dqs_setting->db_sel = 1; } else if ((dq_dqs_setting->tap == limit.tap0) && (dq_dqs_setting->pi < limit.pi0)) { dq_dqs_setting->db_en = 1; dq_dqs_setting->db_sel = 1; } else if (dq_dqs_setting->tap < limit.tap1) { dq_dqs_setting->db_en = 0; dq_dqs_setting->db_sel = 0; } else if ((dq_dqs_setting->tap == limit.tap1) && (dq_dqs_setting->pi < limit.pi1)) { dq_dqs_setting->db_en = 0; dq_dqs_setting->db_sel = 0; } else { dq_dqs_setting->db_en = 1; dq_dqs_setting->db_sel = 0; } } static const u8 max_tap[3] = {12, 10, 13}; static int increment_dq_dqs(const struct sysinfo *s, struct dll_setting *dq_dqs_setting) { u8 max_tap_val = max_tap[s->selected_timings.mem_clk - MEM_CLOCK_800MHz]; if (dq_dqs_setting->pi < 6) { dq_dqs_setting->pi += 1; } else if (dq_dqs_setting->tap < max_tap_val) { dq_dqs_setting->pi = 0; dq_dqs_setting->tap += 1; } else if (dq_dqs_setting->clk_delay < 2) { dq_dqs_setting->pi = 0; dq_dqs_setting->tap = 0; dq_dqs_setting->clk_delay += 1; } else if (dq_dqs_setting->coarse < 1) { dq_dqs_setting->pi = 0; dq_dqs_setting->tap = 0; dq_dqs_setting->clk_delay -= 1; dq_dqs_setting->coarse += 1; } else { return CB_ERR; } set_db(s, dq_dqs_setting); return CB_SUCCESS; } static int decrement_dq_dqs(const struct sysinfo *s, struct dll_setting *dq_dqs_setting) { u8 max_tap_val = max_tap[s->selected_timings.mem_clk - MEM_CLOCK_800MHz]; if (dq_dqs_setting->pi > 0) { dq_dqs_setting->pi -= 1; } else if (dq_dqs_setting->tap > 0) { dq_dqs_setting->pi = 6; dq_dqs_setting->tap -= 1; } else if (dq_dqs_setting->clk_delay > 0) { dq_dqs_setting->pi = 6; dq_dqs_setting->tap = max_tap_val; dq_dqs_setting->clk_delay -= 1; } else if (dq_dqs_setting->coarse > 0) { dq_dqs_setting->pi = 6; dq_dqs_setting->tap = max_tap_val; dq_dqs_setting->clk_delay += 1; dq_dqs_setting->coarse -= 1; } else { return CB_ERR; } set_db(s, dq_dqs_setting); return CB_SUCCESS; } #define WT_PATTERN_SIZE 80 static const u32 write_training_schedule[WT_PATTERN_SIZE] = { 0xffffffff, 0x00000000, 0xffffffff, 0x00000000, 0xffffffff, 0x00000000, 0xffffffff, 0x00000000, 0xffffffff, 0x00000000, 0xffffffff, 0x00000000, 0xffffffff, 0x00000000, 0xffffffff, 0x00000000, 0xefefefef, 0x10101010, 0xefefefef, 0x10101010, 0xefefefef, 0x10101010, 0xefefefef, 0x10101010, 0xefefefef, 0x10101010, 0xefefefef, 0x10101010, 0xefefefef, 0x10101010, 0xefefefef, 0x10101010, 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010, 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010, 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010, 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010, 0x03030303, 0x04040404, 0x09090909, 0x10101010, 0x21212121, 0x40404040, 0x81818181, 0x00000000, 0x03030303, 0x04040404, 0x09090909, 0x10101010, 0x21212121, 0x40404040, 0x81818181, 0x00000000, 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee, 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe, 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee, 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe, }; enum training_modes { SUCCEEDING = 0, FAILING = 1 }; static u8 test_dq_aligned(const struct sysinfo *s, const u8 channel) { u32 address; int rank, lane; u8 count, count1; u8 data[8]; u8 lane_error = 0; FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) { address = test_address(channel, rank); for (count = 0; count < WT_PATTERN_SIZE; count++) { for (count1 = 0; count1 < WT_PATTERN_SIZE; count1++) { if ((count1 % 16) == 0) MCHBAR32(0xf90) = 1; const u32 pattern = write_training_schedule[count1]; write32((u32 *)address + 8 * count1, pattern); write32((u32 *)address + 8 * count1 + 4, pattern); } const u32 good = write_training_schedule[count]; write32(&data[0], read32((u32 *)address + 8 * count)); write32(&data[4], read32((u32 *)address + 8 * count + 4)); FOR_EACH_BYTELANE(lane) { u8 expected = (good >> ((lane % 4) * 8)) & 0xff; if (data[lane] != expected) lane_error |= 1 << lane; } } } return lane_error; } #define CONSISTENCY 10 /* * This function finds either failing or succeeding writes by increasing DQ. * When it has found a failing or succeeding setting it will increase DQ * another 10 times to make sure the result is consistent. * This is probably done because lanes cannot be trained independent from * each other. */ static int find_dq_limit(const struct sysinfo *s, const u8 channel, struct dll_setting dq_setting[TOTAL_BYTELANES], u8 dq_lim[TOTAL_BYTELANES], const enum training_modes expected_result) { int status = CB_SUCCESS; int lane; u8 test_result; u8 pass_count[TOTAL_BYTELANES]; u8 succes_mask = 0xff; printk(RAM_DEBUG, "Looking for %s writes on channel %d\n", expected_result == FAILING ? "failing" : "succeeding", channel); memset(pass_count, 0, sizeof(pass_count)); while(succes_mask) { test_result = test_dq_aligned(s, channel); FOR_EACH_BYTELANE(lane) { if (((test_result >> lane) & 1) != expected_result) { status = increment_dq_dqs(s, &dq_setting[lane]); dqset(channel, lane, &dq_setting[lane]); dq_lim[lane]++; } else if (pass_count[lane] < CONSISTENCY) { status = increment_dq_dqs(s, &dq_setting[lane]); dqset(channel, lane, &dq_setting[lane]); dq_lim[lane]++; pass_count[lane]++; } else if (pass_count[lane] == CONSISTENCY) { succes_mask &= ~(1 << lane); } if (status == CB_ERR) { printk(BIOS_CRIT, "Could not find a case of %s " "writes on CH%d, lane %d\n", expected_result == FAILING ? "failing" : "succeeding", channel, lane); return CB_ERR; } } } return CB_SUCCESS; } /* * This attempts to find the ideal delay for DQ to account for the skew between * the DQ and the DQS signal. * The training works this way: * - start from the DQS delay values (DQ is always later than DQS) * - increment the DQ delay until a succeeding write is found on all bytelayes, * on all ranks on a channel and save these values * - again increment the DQ delay until write start to fail on all bytelanes and * save that value * - use the mean between the saved succeeding and failing value * - note: bytelanes cannot be trained independently, so the delays need to be * adjusted and tested for all of them at the same time */ int do_write_training(struct sysinfo *s) { int i; u8 channel, lane; u8 dq_lower[TOTAL_BYTELANES]; u8 dq_upper[TOTAL_BYTELANES]; struct dll_setting dq_setting[TOTAL_BYTELANES]; printk(BIOS_DEBUG, "Starting DQ write training\n"); FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) { printk(BIOS_DEBUG, "Doing DQ write training on CH%d\n", channel); /* Start all lanes at DQS values */ FOR_EACH_BYTELANE(lane) { dqset(channel, lane, &s->dqs_settings[channel][lane]); s->dq_settings[channel][lane] = s->dqs_settings[channel][lane]; } memset(dq_lower, 0, sizeof(dq_lower)); /* Start from DQS settings */ memcpy(dq_setting, s->dqs_settings[channel], sizeof(dq_setting)); if (find_dq_limit(s, channel, dq_setting, dq_lower, SUCCEEDING)) { printk(BIOS_CRIT, "Could not find working lower limit DQ setting\n"); return CB_ERR; } memcpy(dq_upper, dq_lower, sizeof(dq_lower)); if (find_dq_limit(s, channel, dq_setting, dq_upper, FAILING)) { printk(BIOS_WARNING, "Could not find failing upper limit DQ setting\n"); return CB_ERR; } FOR_EACH_BYTELANE(lane) { dq_lower[lane] -= CONSISTENCY - 1; dq_upper[lane] -= CONSISTENCY - 1; u8 dq_center = (dq_upper[lane] + dq_lower[lane]) / 2; printk(RAM_DEBUG, "Centered value for DQ DLL:" " ch%d, lane %d, #steps = %d\n", channel, lane, dq_center); for (i = 0; i < dq_center; i++) { /* Should never happen */ if (increment_dq_dqs(s, &s->dq_settings[channel][lane]) == CB_ERR) printk(BIOS_ERR, "Huh? write training overflowed!!\n"); } } /* Reset DQ DLL settings and increment with centered value*/ printk(BIOS_DEBUG, "Final DQ timings on CH%d\n", channel); FOR_EACH_BYTELANE(lane) { printk(BIOS_DEBUG, "\tlane%d: ", lane); print_dll_setting(&s->dq_settings[channel][lane], 1); dqset(channel, lane, &s->dq_settings[channel][lane]); } } printk(BIOS_DEBUG, "Done DQ write training\n"); return CB_SUCCESS; } #define RT_PATTERN_SIZE 40 static const u32 read_training_schedule[RT_PATTERN_SIZE] = { 0xffffffff, 0x00000000, 0xffffffff, 0x00000000, 0xffffffff, 0x00000000, 0xffffffff, 0x00000000, 0xefefefef, 0x10101010, 0xefefefef, 0x10101010, 0xefefefef, 0x10101010, 0xefefefef, 0x10101010, 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010, 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010, 0x03030303, 0x04040404, 0x09090909, 0x10101010, 0x21212121, 0x40404040, 0x81818181, 0x00000000, 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee, 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe }; static int rt_increment_dqs(struct rt_dqs_setting *setting) { if (setting->pi < 7) { setting->pi++; } else if (setting->tap < 14) { setting->pi = 0; setting->tap++; } else { return CB_ERR; } return CB_SUCCESS; } static u8 test_dqs_aligned(const struct sysinfo *s, const u8 channel) { int i, rank, lane; volatile u8 data[8]; u32 address; u8 bytelane_error = 0; FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) { address = test_address(channel, rank); for (i = 0; i < RT_PATTERN_SIZE; i++) { const u32 good = read_training_schedule[i]; write32(&data[0], read32((u32 *)address + i * 8)); write32(&data[4], read32((u32 *)address + i * 8 + 4)); FOR_EACH_BYTELANE(lane) { if (data[lane] != (good & 0xff)) bytelane_error |= 1 << lane; } } } return bytelane_error; } static int rt_find_dqs_limit(struct sysinfo *s, u8 channel, struct rt_dqs_setting dqs_setting[TOTAL_BYTELANES], u8 dqs_lim[TOTAL_BYTELANES], const enum training_modes expected_result) { int lane; u8 test_result; int status = CB_SUCCESS; FOR_EACH_BYTELANE(lane) rt_set_dqs(channel, lane, 0, &dqs_setting[lane]); while(status == CB_SUCCESS) { test_result = test_dqs_aligned(s, channel); if (test_result == (expected_result == SUCCEEDING ? 0 : 0xff)) return CB_SUCCESS; FOR_EACH_BYTELANE(lane) { if (((test_result >> lane) & 1) != expected_result) { status = rt_increment_dqs(&dqs_setting[lane]); dqs_lim[lane]++; rt_set_dqs(channel, lane, 0, &dqs_setting[lane]); } } } if (expected_result == SUCCEEDING) { printk(BIOS_CRIT, "Could not find RT DQS setting\n"); return CB_ERR; } else { printk(RAM_DEBUG, "Read succeeded over all DQS" " settings, continuing\n"); return CB_SUCCESS; } } #define RT_LOOPS 3 /* * This attempts to find the ideal delay for DQS on reads (rx). * The training works this way: * - start from the lowest possible delay (0) on all bytelanes * - increment the DQS rx delays until a succeeding write is found on all * bytelayes, on all ranks on a channel and save these values * - again increment the DQS rx delay until write start to fail on all bytelanes * and save that value * - use the mean between the saved succeeding and failing value * - note0: bytelanes cannot be trained independently, so the delays need to be * adjusted and tested for all of them at the same time * - note1: At this stage all ranks effectively use the rank0's rt_dqs settings, * but later on their respective settings are used (TODO where is the * 'switch' register??). So programming the results for all ranks at the end * of the training. Programming on all ranks instead of all populated ranks, * seems to be required, most likely because the signals can't really be generated * separately. */ int do_read_training(struct sysinfo *s) { int loop, channel, i, lane, rank; u32 address, content; u8 dqs_lower[TOTAL_BYTELANES]; u8 dqs_upper[TOTAL_BYTELANES]; struct rt_dqs_setting dqs_setting[TOTAL_BYTELANES]; u16 saved_dqs_center[TOTAL_CHANNELS][TOTAL_BYTELANES]; memset(saved_dqs_center, 0, sizeof(saved_dqs_center)); printk(BIOS_DEBUG, "Starting DQS read training\n"); for (loop = 0; loop < RT_LOOPS; loop++) { FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) { printk(RAM_DEBUG, "Doing DQS read training on CH%d\n", channel); /* Write pattern to strobe address */ FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) { address = test_address(channel, rank); for (i = 0; i < RT_PATTERN_SIZE; i++) { content = read_training_schedule[i]; write32((u32 *)address + 8 * i, content); write32((u32 *)address + 8 * i + 4, content); } } memset(dqs_lower, 0, sizeof(dqs_lower)); memset(&dqs_setting, 0, sizeof(dqs_setting)); if (rt_find_dqs_limit(s, channel, dqs_setting, dqs_lower, SUCCEEDING)) { printk(BIOS_CRIT, "Could not find working lower limit DQS setting\n"); return CB_ERR; } FOR_EACH_BYTELANE(lane) dqs_upper[lane] = dqs_lower[lane]; if (rt_find_dqs_limit(s, channel, dqs_setting, dqs_upper, FAILING)) { printk(BIOS_CRIT, "Could not find failing upper limit DQ setting\n"); return CB_ERR; } printk(RAM_DEBUG, "Centered values, loop %d:\n", loop); FOR_EACH_BYTELANE(lane) { u8 center = (dqs_lower[lane] + dqs_upper[lane]) / 2; printk(RAM_DEBUG, "\t lane%d: #%d\n", lane, center); saved_dqs_center[channel][lane] += center; } } /* END FOR_EACH_POPULATED_CHANNEL */ } /* end RT_LOOPS */ memset(s->rt_dqs, 0, sizeof(s->rt_dqs)); FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) { printk(BIOS_DEBUG, "Final timings on CH%d:\n", channel); FOR_EACH_BYTELANE(lane) { saved_dqs_center[channel][lane] /= RT_LOOPS; while (saved_dqs_center[channel][lane]--) { if(rt_increment_dqs(&s->rt_dqs[channel][lane]) == CB_ERR) /* Should never happen */ printk(BIOS_ERR, "Huh? read training overflowed!!\n"); } /* Later on separate settings for each rank are used so program all of them */ FOR_EACH_RANK_IN_CHANNEL(rank) rt_set_dqs(channel, lane, rank, &s->rt_dqs[channel][lane]); printk(BIOS_DEBUG, "\tlane%d: %d.%d\n", lane, s->rt_dqs[channel][lane].tap, s->rt_dqs[channel][lane].pi); } } printk(BIOS_DEBUG, "Done DQS read training\n"); return CB_SUCCESS; } /* Enable write leveling on selected rank and disable output on other ranks */ static void set_rank_write_level(struct sysinfo *s, u8 channel, u8 config, u8 config_rank, u8 target_rank, int wl_enable) { u32 emrs1; /* Is shifted by bits 2 later so u8 can be used to reduce size */ static const u8 emrs1_lut[8][4][4] = { /* [Config][Leveling Rank][Rank] */ { /* Config 0: 2R2R */ {0x11, 0x00, 0x91, 0x00}, {0x00, 0x11, 0x91, 0x00}, {0x91, 0x00, 0x11, 0x00}, {0x91, 0x00, 0x00, 0x11} }, { // Config 1: 2R1R {0x11, 0x00, 0x91, 0x00}, {0x00, 0x11, 0x91, 0x00}, {0x91, 0x00, 0x11, 0x00}, {0x00, 0x00, 0x00, 0x00} }, { // Config 2: 1R2R {0x11, 0x00, 0x91, 0x00}, {0x00, 0x00, 0x00, 0x00}, {0x91, 0x00, 0x11, 0x00}, {0x91, 0x00, 0x00, 0x11} }, { // Config 3: 1R1R {0x11, 0x00, 0x91, 0x00}, {0x00, 0x00, 0x00, 0x00}, {0x91, 0x00, 0x11, 0x00}, {0x00, 0x00, 0x00, 0x00} }, { // Config 4: 2R0R {0x11, 0x00, 0x00, 0x00}, {0x00, 0x11, 0x00, 0x00}, {0x00, 0x00, 0x00, 0x00}, {0x00, 0x00, 0x00, 0x00} }, { // Config 5: 0R2R {0x00, 0x00, 0x00, 0x00}, {0x00, 0x00, 0x00, 0x00}, {0x00, 0x00, 0x11, 0x00}, {0x00, 0x00, 0x00, 0x11} }, { // Config 6: 1R0R {0x11, 0x00, 0x00, 0x00}, {0x00, 0x00, 0x00, 0x00}, {0x00, 0x00, 0x00, 0x00}, {0x00, 0x00, 0x00, 0x00} }, { // Config 7: 0R1R {0x00, 0x00, 0x00, 0x00}, {0x00, 0x00, 0x00, 0x00}, {0x00, 0x00, 0x11, 0x00}, {0x00, 0x00, 0x00, 0x00} } }; if (wl_enable) { printk(RAM_DEBUG, "Entering WL mode\n"); printk(RAM_DEBUG, "Using WL ODT values\n"); emrs1 = emrs1_lut[config][target_rank][config_rank]; } else { printk(RAM_DEBUG, "Exiting WL mode\n"); emrs1 = ddr3_emrs1_rtt_nom_config[s->dimm_config[channel]][config_rank]; } printk(RAM_DEBUG, "Setting ODT for rank%d to ", config_rank); switch (emrs1) { case 0: printk(RAM_DEBUG, "High-Z\n"); break; case 0x11: printk(RAM_DEBUG, "40 Ohm\n"); break; case 0x81: printk(RAM_DEBUG, "30 Ohm\n"); break; case 0x80: printk(RAM_DEBUG, "20 Ohm\n"); break; case 0x10: printk(RAM_DEBUG, "120 Ohm\n"); break; case 0x01: printk(RAM_DEBUG, "60 Ohm\n"); break; default: printk(BIOS_WARNING, "ODT value Undefined!\n"); break; } emrs1 <<= 2; /* Set output drive strength to 34 Ohm during write levelling */ emrs1 |= (1 << 1); if (wl_enable && (target_rank != config_rank)) { printk(RAM_DEBUG, "Disabling output for rank%d\n", config_rank); emrs1 |= (1 << 12); } if (wl_enable && (target_rank == config_rank)) { printk(RAM_DEBUG, "Enabling WL for rank%d\n", config_rank); emrs1 |= (1 << 7); } send_jedec_cmd(s, config_rank, channel, EMRS1_CMD, emrs1); } #define N_SAMPLES 5 static void sample_dq(const struct sysinfo *s, u8 channel, u8 rank, u8 high_found[8]) { u32 address = test_address(channel, rank); int samples, lane; memset(high_found, 0, TOTAL_BYTELANES * sizeof(high_found[0])); for (samples = 0; samples < N_SAMPLES; samples++) { write32((u32 *)address, 0x12341234); write32((u32 *)address + 4, 0x12341234); udelay(5); FOR_EACH_BYTELANE(lane) { u8 dq_high = (MCHBAR8(0x561 + 0x400 * channel + (lane * 4)) >> 7) & 1; high_found[lane] += dq_high; } } } static enum cb_err increment_to_dqs_edge(struct sysinfo *s, u8 channel, u8 rank) { int lane; u8 saved_24d; struct dll_setting dqs_setting[TOTAL_BYTELANES]; u8 bytelane_ok = 0; u8 dq_sample[TOTAL_BYTELANES]; memcpy(dqs_setting, s->dqs_settings[channel], sizeof(dqs_setting)); FOR_EACH_BYTELANE(lane) dqsset(channel, lane, &dqs_setting[lane]); saved_24d = MCHBAR8(0x24d + 0x400 * channel); /* Loop 0: Find DQ sample low, by decreasing */ while (bytelane_ok != 0xff) { sample_dq(s, channel, rank, dq_sample); FOR_EACH_BYTELANE(lane) { if (bytelane_ok & (1 << lane)) continue; printk(RAM_SPEW, "%d, %d, %02d, %d," " lane%d sample: %d\n", dqs_setting[lane].coarse, dqs_setting[lane].clk_delay, dqs_setting[lane].tap, dqs_setting[lane].pi, lane, dq_sample[lane]); if (dq_sample[lane] > 0) { if (decrement_dq_dqs(s, &dqs_setting[lane])) { printk(BIOS_EMERG, "DQS setting channel%d, " "lane %d reached a minimum!\n", channel, lane); return CB_ERR; } } else { bytelane_ok |= (1 << lane); } dqsset(channel, lane, &dqs_setting[lane]); } } printk(RAM_DEBUG, "DQS settings on PASS #0:\n"); FOR_EACH_BYTELANE(lane) { printk(RAM_DEBUG, "lane %d: ", lane); print_dll_setting(&dqs_setting[lane], 0); } /* Loop 1: Find DQ sample high, by increasing */ bytelane_ok = 0; while (bytelane_ok != 0xff) { sample_dq(s, channel, rank, dq_sample); FOR_EACH_BYTELANE(lane) { if (bytelane_ok & (1 << lane)) continue; printk(RAM_SPEW, "%d, %d, %02d, %d, lane%d sample: %d\n", dqs_setting[lane].coarse, dqs_setting[lane].clk_delay, dqs_setting[lane].tap, dqs_setting[lane].pi, lane, dq_sample[lane]); if (dq_sample[lane] == N_SAMPLES) { bytelane_ok |= (1 << lane); } else { if (increment_dq_dqs(s, &dqs_setting[lane])) { printk(BIOS_EMERG, "DQS setting channel%d, " "lane %d reached a maximum!\n", channel, lane); return CB_ERR; } } dqsset(channel, lane, &dqs_setting[lane]); } } printk(RAM_DEBUG, "DQS settings on PASS #1:\n"); FOR_EACH_BYTELANE(lane) { printk(RAM_DEBUG, "lane %d: ", lane); print_dll_setting(&dqs_setting[lane], 0); } printk(BIOS_DEBUG, "final WL DQS settings on CH%d\n", channel); FOR_EACH_BYTELANE(lane) { printk(BIOS_DEBUG, "\tlane%d: ", lane); print_dll_setting(&dqs_setting[lane], 1); s->dqs_settings[channel][lane] = dqs_setting[lane]; } MCHBAR8(0x24d + 0x400 * channel) = saved_24d; return CB_SUCCESS; } /* * DDR3 uses flyby topology where the clock signal takes a different path * than the data signal, to allow for better signal intergrity. * Therefore the delay on the data signals needs to account for this. * This is done by by sampleling the the DQS write (tx) signal back over * the DQ signal and looking for delay values where the sample transitions * from high to low. * Here the following is done: * - enable write levelling on the first populated rank * - disable output on other populated ranks * - start from safe DQS (tx) delays (other transitions can be * found at different starting values but are generally bad) * - loop0: decrease DQS (tx) delays until low is sampled, * loop1: increase DQS (tx) delays until high is sampled, * That way we are sure to hit a low-high transition * - put all ranks in normal mode of operation again * - note: All ranks need to be leveled together */ void search_write_leveling(struct sysinfo *s) { int i, ch, count; u8 config, rank0, rank1, lane; struct dll_setting dq_setting; u8 chanconfig_lut[16]={0, 6, 4, 6, 7, 3, 1, 3, 5, 2, 0, 2, 7, 3, 1, 3}; u8 odt_force[8][4] = { /* [Config][leveling rank] */ {0x5, 0x6, 0x5, 0x9}, {0x5, 0x6, 0x5, 0x0}, {0x5, 0x0, 0x5, 0x9}, {0x5, 0x0, 0x5, 0x0}, {0x1, 0x2, 0x0, 0x0}, {0x0, 0x0, 0x4, 0x8}, {0x1, 0x0, 0x0, 0x0}, {0x0, 0x0, 0x4, 0x0} }; printk(BIOS_DEBUG, "Starting write levelling.\n"); FOR_EACH_POPULATED_CHANNEL(s->dimms, ch) { printk(BIOS_DEBUG, "\tCH%d\n", ch); config = chanconfig_lut[s->dimm_config[ch]]; MCHBAR8(0x5d8 + 0x400 * ch) = MCHBAR8(0x5d8 + 0x400 * ch) & ~0x0e; MCHBAR16(0x5c4 + 0x400 * ch) = (MCHBAR16(0x5c4 + 0x400 * ch) & ~0x3fff) | 0x3fff; MCHBAR8(0x265 + 0x400 * ch) = MCHBAR8(0x265 + 0x400 * ch) & ~0x1f; /* find the first populated rank */ FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank0) break; /* Enable WL for the first populated rank and disable output for others */ FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank1) set_rank_write_level(s, ch, config, rank1, rank0, 1); MCHBAR8(0x298 + 2 + 0x400 * ch) = (MCHBAR8(0x298 + 2 + 0x400 * ch) & ~0x0f) | odt_force[config][rank0]; MCHBAR8(0x271 + 0x400 * ch) = (MCHBAR8(0x271 + 0x400 * ch) & ~0x7e) | 0x4e; MCHBAR8(0x5d9 + 0x400 * ch) = (MCHBAR8(0x5d9 + 0x400 * ch) & ~0x04) | 0x04; MCHBAR32(0x1a0) = (MCHBAR32(0x1a0) & ~0x07ffffff) | 0x00014000; if (increment_to_dqs_edge(s, ch, rank0)) die("Write Leveling failed!"); MCHBAR8(0x298 + 2 + 0x400 * ch) = MCHBAR8(0x298 + 2 + 0x400 * ch) & ~0x0f; MCHBAR8(0x271 + 0x400 * ch) = (MCHBAR8(0x271 + 0x400 * ch) & ~0x7e) | 0x0e; MCHBAR8(0x5d9 + 0x400 * ch) = (MCHBAR8(0x5d9 + 0x400 * ch) & ~0x04); MCHBAR32(0x1a0) = (MCHBAR32(0x1a0) & ~0x07ffffff) | 0x00555801; /* Disable WL on the trained rank */ set_rank_write_level(s, ch, config, rank0, rank0, 0); send_jedec_cmd(s, rank0, ch, NORMALOP_CMD, 1 << 12); MCHBAR8(0x5d8 + 0x400 * ch) = (MCHBAR8(0x5d8 + 0x400 * ch) & ~0x0e) | 0x0e; MCHBAR16(0x5c4 + 0x400 * ch) = (MCHBAR16(0x5c4 + 0x400 * ch) & ~0x3fff) | 0x1807; MCHBAR8(0x265 + 0x400 * ch) = MCHBAR8(0x265 + 0x400 * ch) & ~0x1f; /* Disable write level mode for all ranks */ FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank0) set_rank_write_level(s, ch, config, rank0, rank0, 0); } MCHBAR8(0x5dc) = (MCHBAR8(0x5dc) & ~0x80) | 0x80; /* Increment DQ (rx) dll setting by a standard amount past DQS, This is further trained in write training. */ switch (s->selected_timings.mem_clk) { default: case MEM_CLOCK_800MHz: count = 39; break; case MEM_CLOCK_1066MHz: count = 32; break; case MEM_CLOCK_1333MHz: count = 42; break; } FOR_EACH_POPULATED_CHANNEL_AND_BYTELANE(s->dimms, ch, lane) { dq_setting = s->dqs_settings[ch][lane]; for (i = 0; i < count; i++) if (increment_dq_dqs(s, &dq_setting)) die("Can't further increase DQ past DQS delay"); dqset(ch, lane, &dq_setting); } printk(BIOS_DEBUG, "Done write levelling.\n"); }