1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
|
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) Meta Platforms, Inc. and affiliates. */
#include <linux/bitfield.h>
#include <linux/jiffies.h>
#include <linux/limits.h>
#include <linux/ptp_clock_kernel.h>
#include <linux/timer.h>
#include "fbnic.h"
#include "fbnic_csr.h"
#include "fbnic_netdev.h"
/* FBNIC timing & PTP implementation
* Datapath uses truncated 40b timestamps for scheduling and event reporting.
* We need to promote those to full 64b, hence we periodically cache the top
* 32bit of the HW time counter. Since this makes our time reporting non-atomic
* we leave the HW clock free running and adjust time offsets in SW as needed.
* Time offset is 64bit - we need a seq counter for 32bit machines.
* Time offset and the cache of top bits are independent so we don't need
* a coherent snapshot of both - READ_ONCE()/WRITE_ONCE() + writer side lock
* are enough.
*/
/* Period of refresh of top bits of timestamp, give ourselves a 8x margin.
* This should translate to once a minute.
* The use of nsecs_to_jiffies() should be safe for a <=40b nsec value.
*/
#define FBNIC_TS_HIGH_REFRESH_JIF nsecs_to_jiffies((1ULL << 40) / 16)
static struct fbnic_dev *fbnic_from_ptp_info(struct ptp_clock_info *ptp)
{
return container_of(ptp, struct fbnic_dev, ptp_info);
}
/* This function is "slow" because we could try guessing which high part
* is correct based on low instead of re-reading, and skip reading @hi
* twice altogether if @lo is far enough from 0.
*/
static u64 __fbnic_time_get_slow(struct fbnic_dev *fbd)
{
u32 hi, lo;
lockdep_assert_held(&fbd->time_lock);
do {
hi = fbnic_rd32(fbd, FBNIC_PTP_CTR_VAL_HI);
lo = fbnic_rd32(fbd, FBNIC_PTP_CTR_VAL_LO);
} while (hi != fbnic_rd32(fbd, FBNIC_PTP_CTR_VAL_HI));
return (u64)hi << 32 | lo;
}
static void __fbnic_time_set_addend(struct fbnic_dev *fbd, u64 addend)
{
lockdep_assert_held(&fbd->time_lock);
fbnic_wr32(fbd, FBNIC_PTP_ADD_VAL_NS,
FIELD_PREP(FBNIC_PTP_ADD_VAL_NS_MASK, addend >> 32));
fbnic_wr32(fbd, FBNIC_PTP_ADD_VAL_SUBNS, (u32)addend);
}
static void fbnic_ptp_fresh_check(struct fbnic_dev *fbd)
{
if (time_is_after_jiffies(fbd->last_read +
FBNIC_TS_HIGH_REFRESH_JIF * 3 / 2))
return;
dev_warn(fbd->dev, "NIC timestamp refresh stall, delayed by %lu sec\n",
(jiffies - fbd->last_read - FBNIC_TS_HIGH_REFRESH_JIF) / HZ);
}
static void fbnic_ptp_refresh_time(struct fbnic_dev *fbd, struct fbnic_net *fbn)
{
unsigned long flags;
u32 hi;
spin_lock_irqsave(&fbd->time_lock, flags);
hi = fbnic_rd32(fbn->fbd, FBNIC_PTP_CTR_VAL_HI);
if (!fbnic_present(fbd))
goto out; /* Don't bother handling, reset is pending */
/* Let's keep high cached value a bit lower to avoid race with
* incoming timestamps. The logic in fbnic_ts40_to_ns() will
* take care of overflow in this case. It will make cached time
* ~1 minute lower and incoming timestamp will always be later
* then cached time.
*/
WRITE_ONCE(fbn->time_high, hi - 16);
fbd->last_read = jiffies;
out:
spin_unlock_irqrestore(&fbd->time_lock, flags);
}
static long fbnic_ptp_do_aux_work(struct ptp_clock_info *ptp)
{
struct fbnic_dev *fbd = fbnic_from_ptp_info(ptp);
struct fbnic_net *fbn;
fbn = netdev_priv(fbd->netdev);
fbnic_ptp_fresh_check(fbd);
fbnic_ptp_refresh_time(fbd, fbn);
return FBNIC_TS_HIGH_REFRESH_JIF;
}
static int fbnic_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
{
struct fbnic_dev *fbd = fbnic_from_ptp_info(ptp);
u64 addend, dclk_period;
unsigned long flags;
/* d_clock is 600 MHz; which in Q16.32 fixed point ns is: */
dclk_period = (((u64)1000000000) << 32) / FBNIC_CLOCK_FREQ;
addend = adjust_by_scaled_ppm(dclk_period, scaled_ppm);
spin_lock_irqsave(&fbd->time_lock, flags);
__fbnic_time_set_addend(fbd, addend);
fbnic_wr32(fbd, FBNIC_PTP_ADJUST, FBNIC_PTP_ADJUST_ADDEND_SET);
/* Flush, make sure FBNIC_PTP_ADD_VAL_* is stable for at least 4 clks */
fbnic_rd32(fbd, FBNIC_PTP_SPARE);
spin_unlock_irqrestore(&fbd->time_lock, flags);
return fbnic_present(fbd) ? 0 : -EIO;
}
static int fbnic_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
{
struct fbnic_dev *fbd = fbnic_from_ptp_info(ptp);
struct fbnic_net *fbn;
unsigned long flags;
fbn = netdev_priv(fbd->netdev);
spin_lock_irqsave(&fbd->time_lock, flags);
u64_stats_update_begin(&fbn->time_seq);
WRITE_ONCE(fbn->time_offset, READ_ONCE(fbn->time_offset) + delta);
u64_stats_update_end(&fbn->time_seq);
spin_unlock_irqrestore(&fbd->time_lock, flags);
return 0;
}
static int
fbnic_ptp_gettimex64(struct ptp_clock_info *ptp, struct timespec64 *ts,
struct ptp_system_timestamp *sts)
{
struct fbnic_dev *fbd = fbnic_from_ptp_info(ptp);
struct fbnic_net *fbn;
unsigned long flags;
u64 time_ns;
u32 hi, lo;
fbn = netdev_priv(fbd->netdev);
spin_lock_irqsave(&fbd->time_lock, flags);
do {
hi = fbnic_rd32(fbd, FBNIC_PTP_CTR_VAL_HI);
ptp_read_system_prets(sts);
lo = fbnic_rd32(fbd, FBNIC_PTP_CTR_VAL_LO);
ptp_read_system_postts(sts);
/* Similarly to comment above __fbnic_time_get_slow()
* - this can be optimized if needed.
*/
} while (hi != fbnic_rd32(fbd, FBNIC_PTP_CTR_VAL_HI));
time_ns = ((u64)hi << 32 | lo) + fbn->time_offset;
spin_unlock_irqrestore(&fbd->time_lock, flags);
if (!fbnic_present(fbd))
return -EIO;
*ts = ns_to_timespec64(time_ns);
return 0;
}
static int
fbnic_ptp_settime64(struct ptp_clock_info *ptp, const struct timespec64 *ts)
{
struct fbnic_dev *fbd = fbnic_from_ptp_info(ptp);
struct fbnic_net *fbn;
unsigned long flags;
u64 dev_ns, host_ns;
int ret;
fbn = netdev_priv(fbd->netdev);
host_ns = timespec64_to_ns(ts);
spin_lock_irqsave(&fbd->time_lock, flags);
dev_ns = __fbnic_time_get_slow(fbd);
if (fbnic_present(fbd)) {
u64_stats_update_begin(&fbn->time_seq);
WRITE_ONCE(fbn->time_offset, host_ns - dev_ns);
u64_stats_update_end(&fbn->time_seq);
ret = 0;
} else {
ret = -EIO;
}
spin_unlock_irqrestore(&fbd->time_lock, flags);
return ret;
}
static const struct ptp_clock_info fbnic_ptp_info = {
.owner = THIS_MODULE,
/* 1,000,000,000 - 1 PPB to ensure increment is positive
* after max negative adjustment.
*/
.max_adj = 999999999,
.do_aux_work = fbnic_ptp_do_aux_work,
.adjfine = fbnic_ptp_adjfine,
.adjtime = fbnic_ptp_adjtime,
.gettimex64 = fbnic_ptp_gettimex64,
.settime64 = fbnic_ptp_settime64,
};
static void fbnic_ptp_reset(struct fbnic_dev *fbd)
{
struct fbnic_net *fbn = netdev_priv(fbd->netdev);
u64 dclk_period;
fbnic_wr32(fbd, FBNIC_PTP_CTRL,
FBNIC_PTP_CTRL_EN |
FIELD_PREP(FBNIC_PTP_CTRL_TICK_IVAL, 1));
/* d_clock is 600 MHz; which in Q16.32 fixed point ns is: */
dclk_period = (((u64)1000000000) << 32) / FBNIC_CLOCK_FREQ;
__fbnic_time_set_addend(fbd, dclk_period);
fbnic_wr32(fbd, FBNIC_PTP_INIT_HI, 0);
fbnic_wr32(fbd, FBNIC_PTP_INIT_LO, 0);
fbnic_wr32(fbd, FBNIC_PTP_ADJUST, FBNIC_PTP_ADJUST_INIT);
fbnic_wr32(fbd, FBNIC_PTP_CTRL,
FBNIC_PTP_CTRL_EN |
FBNIC_PTP_CTRL_TQS_OUT_EN |
FIELD_PREP(FBNIC_PTP_CTRL_MAC_OUT_IVAL, 3) |
FIELD_PREP(FBNIC_PTP_CTRL_TICK_IVAL, 1));
fbnic_rd32(fbd, FBNIC_PTP_SPARE);
fbn->time_offset = 0;
fbn->time_high = 0;
}
void fbnic_time_init(struct fbnic_net *fbn)
{
/* This is not really a statistic, but the lockng primitive fits
* our usecase perfectly, we need an atomic 8 bytes READ_ONCE() /
* WRITE_ONCE() behavior.
*/
u64_stats_init(&fbn->time_seq);
}
int fbnic_time_start(struct fbnic_net *fbn)
{
fbnic_ptp_refresh_time(fbn->fbd, fbn);
/* Assume that fbnic_ptp_do_aux_work() will never be called if not
* scheduled here
*/
return ptp_schedule_worker(fbn->fbd->ptp, FBNIC_TS_HIGH_REFRESH_JIF);
}
void fbnic_time_stop(struct fbnic_net *fbn)
{
ptp_cancel_worker_sync(fbn->fbd->ptp);
fbnic_ptp_fresh_check(fbn->fbd);
}
int fbnic_ptp_setup(struct fbnic_dev *fbd)
{
struct device *dev = fbd->dev;
unsigned long flags;
spin_lock_init(&fbd->time_lock);
spin_lock_irqsave(&fbd->time_lock, flags); /* Appease lockdep */
fbnic_ptp_reset(fbd);
spin_unlock_irqrestore(&fbd->time_lock, flags);
memcpy(&fbd->ptp_info, &fbnic_ptp_info, sizeof(fbnic_ptp_info));
fbd->ptp = ptp_clock_register(&fbd->ptp_info, dev);
if (IS_ERR(fbd->ptp))
dev_err(dev, "Failed to register PTP: %pe\n", fbd->ptp);
return PTR_ERR_OR_ZERO(fbd->ptp);
}
void fbnic_ptp_destroy(struct fbnic_dev *fbd)
{
if (!fbd->ptp)
return;
ptp_clock_unregister(fbd->ptp);
}
|